Multi-layer Neural Network
import tensorflow as tf
import numpy as np
from tensorflow import keras
single layer network
my_layer = keras.layers.Dense(units=1, input_shape=[1])
model = tf.keras.Sequential([my_layer])
model.compile(optimizer='sgd', loss='mean_squared_error')
xs = np.array([-1.0, 0.0, 1.0, 2.0, 3.0, 4.0], dtype=float)
ys = np.array([-3.0, -1.0, 1.0, 3.0, 5.0, 7.0], dtype=float)
model.fit(xs, ys, epochs=500)
print(my_layer.get_weights())
Output:
[array([[1.9965485]], dtype=float32), array([-0.9892993], dtype=float32)]
2-layer network
my_layer_1 = keras.layers.Dense(units=2, input_shape=[1])
my_layer_2 = keras.layers.Dense(units=1)
model = tf.keras.Sequential([my_layer_1, my_layer_2])
model.compile(optimizer='sgd', loss='mean_squared_error')
xs = np.array([-1.0, 0.0, 1.0, 2.0, 3.0, 4.0], dtype=float)
ys = np.array([-3.0, -1.0, 1.0, 3.0, 5.0, 7.0], dtype=float)
model.fit(xs, ys, epochs=500)
print(my_layer_1.get_weights())
print(my_layer_2.get_weights())
Output:
[array([[-0.14746471, -1.2075169 ]], dtype=float32), array([0.13128424, 0.41595024], dtype=float32)]
[array([[-0.5002602], [-1.5951983]], dtype=float32), array([-0.27079943], dtype=float32)]
Finally we can manually compute the output for our 2-layer network to better understand how it works
print(model.predict([10.0]))
Output:
1/1 [==============================] - 0s 44ms/step [[18.999998]]
value_to_predict = 10.0
layer1_w1 = (my_layer_1.get_weights()[0][0][0])
layer1_w2 = (my_layer_1.get_weights()[0][0][1])
layer1_b1 = (my_layer_1.get_weights()[1][0])
layer1_b2 = (my_layer_1.get_weights()[1][1])
layer2_w1 = (my_layer_2.get_weights()[0][0])
layer2_w2 = (my_layer_2.get_weights()[0][1])
layer2_b = (my_layer_2.get_weights()[1][0])
neuron1_output = (layer1_w1 * value_to_predict) + layer1_b1
neuron2_output = (layer1_w2 * value_to_predict) + layer1_b2
neuron3_output = (layer2_w1 * neuron1_output) + (layer2_w2 * neuron2_output) + layer2_b
print(neuron3_output)
Output:
[18.999998]
Classification
import tensorflow as tf
data = tf.keras.datasets.mnist
(training_images, training_labels), (val_images, val_labels) = data.load_data()
training_images = training_images / 255.0
val_images = val_images / 255.0
model = tf.keras.models.Sequential([tf.keras.layers.Flatten(input_shape=(28,28)),
tf.keras.layers.Dense(20, activation=tf.nn.relu),
tf.keras.layers.Dense(10, activation=tf.nn.softmax)])
model.compile(optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
model.fit(training_images, training_labels, epochs=20, validation_data=(val_images, val_labels))
# Examine the test data
model.evaluate(val_images, val_labels)
classifications = model.predict(val_images)
print(classifications[0])
print(val_labels[0])
Output:
313/313 [==============================] - 1s 2ms/step - loss: 0.1355 - accuracy: 0.9639
313/313 [==============================] - 0s 1ms/step
[2.6026129e-07 3.5424837e-13 3.9708298e-06 1.2176799e-03 1.6572245e-10
7.4538526e-08 8.1040211e-16 9.9869943e-01 7.6264507e-05 2.2553024e-06]
7
Modify to inspect learned values
This code is identical, except that the layers are named prior to adding to the sequential. This allows us to inspect their learned parameters later.
import tensorflow as tf
data = tf.keras.datasets.mnist
(training_images, training_labels), (val_images, val_labels) = data.load_data()
training_images = training_images / 255.0
val_images = val_images / 255.0
layer_1 = tf.keras.layers.Dense(20, activation=tf.nn.relu)
layer_2 = tf.keras.layers.Dense(10, activation=tf.nn.softmax)
model = tf.keras.models.Sequential([tf.keras.layers.Flatten(input_shape=(28,28)),
layer_1,
layer_2])
model.compile(optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
model.fit(training_images, training_labels, epochs=20)
model.evaluate(val_images, val_labels)
classifications = model.predict(val_images)
print(classifications[0])
print(val_labels[0])
# Inspect weights
print(layer_1.get_weights()[0].size)
print(layer_1.get_weights()[1].size)
Output:
15680
20
Add validation set
import tensorflow as tf
data = tf.keras.datasets.mnist
(training_images, training_labels), (val_images, val_labels) = data.load_data()
training_images = training_images / 255.0
val_images = val_images / 255.0
model = tf.keras.models.Sequential([tf.keras.layers.Flatten(input_shape=(28,28)),
tf.keras.layers.Dense(20, activation=tf.nn.relu),
tf.keras.layers.Dense(10, activation=tf.nn.softmax)])
model.compile(optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
model.fit(training_images, training_labels, epochs=20,
validation_data=(val_images, val_labels))
Callbacks
Sometimes if you set the training for too many epochs you may find that training stops improving and you wish you could quit early. Good news, you can! TensorFlow has a function called Callbacks which can check the results from each epoch. Modify this callback function to make sure it exits training early but not before reaching at least the second epoch!
A hint: logs.get(METRIC_NAME) will return the value of METRIC_NAME at the current step
# define and instantiate your custom Callback
class myCallback(tf.keras.callbacks.Callback):
def on_epoch_end(self, epoch, logs={}):
if(logs.get('accuracy')>0.86):
self.model.stop_training = True
callbacks = myCallback()