Numpy实现MultilayerPerceptron

28 阅读1分钟

from mlfromscratch.deep_learning.loss_functions import CrossEntropy

class MultilayerPerceptron():

"""Multilayer Perceptron classifier. A fully-connected neural network with one hidden layer.

Unrolled to display the whole forward and backward pass.

Parameters:


n_hidden: int:

The number of processing nodes (neurons) in the hidden layer.

n_iterations: float

The number of training iterations the algorithm will tune the weights for.

learning_rate: float

The step length that will be used when updating the weights.

"""

def init(self, n_hidden, n_iterations=3000, learning_rate=0.01):

self.n_hidden = n_hidden

self.n_iterations = n_iterations

self.learning_rate = learning_rate

self.hidden_activation = Sigmoid()

self.output_activation = Softmax()

self.loss = CrossEntropy()

def _initialize_weights(self, X, y):

n_samples, n_features = X.shape

_, n_outputs = y.shape

Hidden layer

limit = 1 / math.sqrt(n_features)

self.W = np.random.uniform(-limit, limit, (n_features, self.n_hidden))

self.w0 = np.zeros((1, self.n_hidden))

Output layer

limit = 1 / math.sqrt(self.n_hidden)

self.V = np.random.uniform(-limit, limit, (self.n_hidden, n_outputs))

self.v0 = np.zeros((1, n_outputs))

def fit(self, X, y):

self._initialize_weights(X, y)

for i in range(self.n_iterations):

..............

Forward Pass

..............

HIDDEN LAYER

hidden_input = X.dot(self.W) + self.w0

hidden_output = self.hidden_activation(hidden_input)

OUTPUT LAYER

output_layer_input = hidden_output.dot(self.V) + self.v0

y_pred = self.output_activation(output_layer_input)

...............

Backward Pass

...............

OUTPUT LAYER

Grad. w.r.t input of output layer

grad_wrt_out_l_input = self.loss.gradient(y, y_pred) * self.output_activation.gradient(output_layer_input)

grad_v = hidden_output.T.dot(grad_wrt_out_l_input)

grad_v0 = np.sum(grad_wrt_out_l_input, axis=0, keepdims=True)

HIDDEN LAYER

Grad. w.r.t input of hidden layer

grad_wrt_hidden_l_input = grad_wrt_out_l_input.dot(self.V.T) * self.hidden_activation.gradient(hidden_input)

grad_w = X.T.dot(grad_wrt_hidden_l_input)

grad_w0 = np.sum(grad_wrt_hidden_l_input, axis=0, keepdims=True)

Update weights (by gradient descent)

Move against the gradient to minimize loss

self.V -= self.learning_rate * grad_v

self.v0 -= self.learning_rate * grad_v0

self.W -= self.learning_rate * grad_w

self.w0 -= self.learning_rate * grad_w0

Use the trained model to predict labels of X

def predict(self, X):

Forward pass:

hidden_input = X.dot(self.W) + self.w0

hidden_output = self.hidden_activation(hidden_input)

output_layer_input = hidden_output.dot(self.V) + self.v0

y_pred = self.output_activation(output_layer_input)

return y_pred

def main():

data = datasets.load_digits()

X = normalize(data.data)

y = data.target

Convert the nominal y values to binary

y = to_categorical(y)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, seed=1)