Notes for Machine Learning Specialization: More Numerically Accurate Code Examples

This is a note for the Machine Learning Specialization.

From:

MNIST

Original:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
# model
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense
model = Sequential([
Dense(units=25, activation='relu')
Dense(units=15, activation='relu')
Dense(units=10, activation='softmax')
])
# loss and cost
from tensorflow.keras.losses import SparseCategoricalCrossentropy
model.compile(loss=SparseCategoricalCrossentropy())
# training
model.fit(X, Y, epochs=100)

More numerically accurate:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
# model
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense
model = Sequential([
Dense(units=25, activation='relu')
Dense(units=15, activation='relu')
Dense(units=10, activation='linear') # the final layer no longer outputs
# these probabilities a_1 through a_10
# it's instead outputting z_1 through z_10
])
# loss
from tensorflow.keras.losses import SparseCategoricalCrossentropy
model.compile(...,loss=SparseCategoricalCrossentropy(from_logits=True)) # from_logits is the key
# fit
model.fit(X, Y, epochs=100)
# predict
logits = model(X) # not a_1...a_10, is z_1...z_10
f_x = tf.nn.softmax(logits)

Logistic regression

Original:

1
2
3
4
5
6
7
8
9
10
11
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense
model = Sequential([
Dense(units=25, activation='sigmoid')
Dense(units=15, activation='sigmoid')
Dense(units=1, activation='sigmoid')
])
from tensorflow.keras.losses import BinaryCrossentropy
model.compile(loss=BinaryCrossentropy())
model.fit(X, Y, epochs=100)

More numerically accurate:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
# model
model = Sequential([
Dense(units=25, activation='sigmoid')
Dense(units=15, activation='sigmoid')
Dense(units=1, activation='linear')
])
# loss
from tensorflow.keras.losses import BinaryCrossentropy
model.compile(..., loss=BinaryCrossentropy(from_logits=True))
# fit
model.fit(X, Y, epochs=100)
# predict
logit = model(X)
f_x = tf.nn.sigmoid(logit)