Multi-layer Neural Network

import tensorflow as tf
import numpy as np
from tensorflow import keras

single layer network

my_layer = keras.layers.Dense(units=1, input_shape=[1])
model = tf.keras.Sequential([my_layer])
model.compile(optimizer='sgd', loss='mean_squared_error')

xs = np.array([-1.0,  0.0, 1.0, 2.0, 3.0, 4.0], dtype=float)
ys = np.array([-3.0, -1.0, 1.0, 3.0, 5.0, 7.0], dtype=float)

model.fit(xs, ys, epochs=500)

print(my_layer.get_weights())

Output:
[array([[1.9965485]], dtype=float32), array([-0.9892993], dtype=float32)]

2-layer network

my_layer_1 = keras.layers.Dense(units=2, input_shape=[1])
my_layer_2 = keras.layers.Dense(units=1)
model = tf.keras.Sequential([my_layer_1, my_layer_2])
model.compile(optimizer='sgd', loss='mean_squared_error')

xs = np.array([-1.0,  0.0, 1.0, 2.0, 3.0, 4.0], dtype=float)
ys = np.array([-3.0, -1.0, 1.0, 3.0, 5.0, 7.0], dtype=float)

model.fit(xs, ys, epochs=500)

print(my_layer_1.get_weights())
print(my_layer_2.get_weights())

Output:
[array([[-0.14746471, -1.2075169 ]], dtype=float32), array([0.13128424, 0.41595024], dtype=float32)]
[array([[-0.5002602], [-1.5951983]], dtype=float32), array([-0.27079943], dtype=float32)]

Finally we can manually compute the output for our 2-layer network to better understand how it works

print(model.predict([10.0]))

Output:
1/1 [==============================] - 0s 44ms/step [[18.999998]]

value_to_predict = 10.0

layer1_w1 = (my_layer_1.get_weights()[0][0][0])
layer1_w2 = (my_layer_1.get_weights()[0][0][1])
layer1_b1 = (my_layer_1.get_weights()[1][0])
layer1_b2 = (my_layer_1.get_weights()[1][1])


layer2_w1 = (my_layer_2.get_weights()[0][0])
layer2_w2 = (my_layer_2.get_weights()[0][1])
layer2_b = (my_layer_2.get_weights()[1][0])

neuron1_output = (layer1_w1 * value_to_predict) + layer1_b1
neuron2_output = (layer1_w2 * value_to_predict) + layer1_b2

neuron3_output = (layer2_w1 * neuron1_output) + (layer2_w2 * neuron2_output) + layer2_b

print(neuron3_output)

Output:
[18.999998]

Classification

import tensorflow as tf
data = tf.keras.datasets.mnist

(training_images, training_labels), (val_images, val_labels) = data.load_data()

training_images  = training_images / 255.0
val_images = val_images / 255.0
model = tf.keras.models.Sequential([tf.keras.layers.Flatten(input_shape=(28,28)),
                                    tf.keras.layers.Dense(20, activation=tf.nn.relu),
                                    tf.keras.layers.Dense(10, activation=tf.nn.softmax)])

model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

model.fit(training_images, training_labels, epochs=20, validation_data=(val_images, val_labels))

# Examine the test data
model.evaluate(val_images, val_labels)

classifications = model.predict(val_images)
print(classifications[0])
print(val_labels[0])

Output: 313/313 [==============================] - 1s 2ms/step - loss: 0.1355 - accuracy: 0.9639
313/313 [==============================] - 0s 1ms/step
[2.6026129e-07 3.5424837e-13 3.9708298e-06 1.2176799e-03 1.6572245e-10 7.4538526e-08 8.1040211e-16 9.9869943e-01 7.6264507e-05 2.2553024e-06]
7

Modify to inspect learned values

This code is identical, except that the layers are named prior to adding to the sequential. This allows us to inspect their learned parameters later.

import tensorflow as tf
data = tf.keras.datasets.mnist

(training_images, training_labels), (val_images, val_labels) = data.load_data()

training_images  = training_images / 255.0
val_images = val_images / 255.0
layer_1 = tf.keras.layers.Dense(20, activation=tf.nn.relu)
layer_2 = tf.keras.layers.Dense(10, activation=tf.nn.softmax)
model = tf.keras.models.Sequential([tf.keras.layers.Flatten(input_shape=(28,28)),
                                   layer_1,
                                   layer_2])

model.compile(optimizer='adam',
             loss='sparse_categorical_crossentropy',
             metrics=['accuracy'])

model.fit(training_images, training_labels, epochs=20)

model.evaluate(val_images, val_labels)

classifications = model.predict(val_images)
print(classifications[0])
print(val_labels[0])

# Inspect weights
print(layer_1.get_weights()[0].size)
print(layer_1.get_weights()[1].size)

Output: 15680
20

Add validation set

import tensorflow as tf

data = tf.keras.datasets.mnist

(training_images, training_labels), (val_images, val_labels) = data.load_data()

training_images  = training_images / 255.0

val_images = val_images / 255.0

model = tf.keras.models.Sequential([tf.keras.layers.Flatten(input_shape=(28,28)),

                                    tf.keras.layers.Dense(20, activation=tf.nn.relu),

                                    tf.keras.layers.Dense(10, activation=tf.nn.softmax)])

model.compile(optimizer='adam',

              loss='sparse_categorical_crossentropy',

              metrics=['accuracy'])

model.fit(training_images, training_labels, epochs=20, 

          validation_data=(val_images, val_labels))

Callbacks

Sometimes if you set the training for too many epochs you may find that training stops improving and you wish you could quit early. Good news, you can! TensorFlow has a function called Callbacks which can check the results from each epoch. Modify this callback function to make sure it exits training early but not before reaching at least the second epoch!

A hint: logs.get(METRIC_NAME) will return the value of METRIC_NAME at the current step

# define and instantiate your custom Callback
class myCallback(tf.keras.callbacks.Callback):
  def on_epoch_end(self, epoch, logs={}):
    if(logs.get('accuracy')>0.86):
      self.model.stop_training = True
callbacks = myCallback()

A coding assignment

Coding Assignment

Fundamentals of TinyML: The Building Blocks of Deep Learning