第五章(反向传播算法)
反向误差传播算法
- 计算图:把 计算符号(*,+,-,pow)认为是一个节点,把数据输入认为是边权。反向传播要做的事情,就是根据计算符号来计算出在这点要求的梯度,并且传递给上一层(根据求导的链式法则)
- 实践
- 一个重要案例,注意观看调用的顺序,结合原书观看,就能知道反向传播算法是如何把微分传递到上一层了
# 一个小的案例,苹果为100元,买2个,橘子为150元,买3个,消费税是1.1 # 计算各个参数的导数 apple = 100 apple_num =2 orange = 150 orange_num = 3 tax = 1.1 # 定义各个Layer mul_apple_layer = MulLayer() mul_orange_layer = MulLayer() add_apple_orange_layer = AddLayer() mul_tax_layer = MulLayer() # forward 前向传播 apple_price = mul_apple_layer.forward(apple,apple_num) # 付款=价格*数目 orange_price = mul_orange_layer.forward(orange,orange_num) all_price = add_apple_orange_layer.forward(appl_price,orange_price) price = mul_tax_layer.forward(all_price,tax) # 总价计算税费 print(price) # 这是总价 # backward 反向传播 dprice = 1 # 直接输出,所以是1 dall_price,dtax = mul_tax_layer.backward(dprice) # 倒着反向传播 dapple_price,dorange_price = add_apple_orange_layer.backward(dall_price) dorange,dorange_num = mul_orange_layer.backward(dapple_price) dapple,dapple_num = mul_apple_layer.backward(dapple_price) print("以下是各种微分结果") print(dapple_num,dapple,dorange,dorange_num,dtax)
- 一个重要案例,注意观看调用的顺序,结合原书观看,就能知道反向传播算法是如何把微分传递到上一层了
- 各类运算函数(乘法,加法,ReLu)层的实现
-
加法层
class AddLayer: def __init__(self): pass def forward(self,x,y): return x+y def backward(self,dout): dx = dout * 1 dy = dout * 1 return dx,dy -
乘法层(实现前向和反向传播)
class MulLayer: def __init__(self): self.x = None self.y = None def forward(self,x,y): self.x = x self.y = y out = x*y return out def backward(self,dout): dx = dout*self.y #二者要颠倒 dy = dout*self.x return dx,dy -
ReLu层
class ReLu: def __init__(self): self.mask = None def forward(self,x): self.mask = (x<=0) #创建一个布尔数组 mask,其中 x 中小于或等于0的元素对应的位置是 True,大于0的元素对应的位置是 False。 out =x.copy() out[self.mask] = 0 # out[self.mask] 取得的是mask中为True的值,此处为把小于0的位置全部置0 return out def backward(self,dout): dout[self.mask]=0 dx = dout return dx # 解释一下,mask的型和输入的x一致,存储的是True/False # 当x某位置的值小于等于0时,mask的值为True # 所以前向传播中,会把负数的位置的值都置为0 # 反向传播中,同理 -
Sigmoid层
class Sigmoid: def __init__(self): self.out = None def forward(self,x): out = 1/(1+np.exp(-x)) self.out = out # 保存输出y return out def backward(self,dout): return self.out*(1-self.out)*dout # 根据图片推导的公式
-
Affine层(矩阵乘法+偏置)
class Affine: def __init__(self,W,b): self.W = W self.b = b self.x = None self.dW = None self.db = None def forward(self,x): self.x=x return np.dot(x,self.W) + self.b def backword(self,dout): dx = np.dot(dout,self.W.T) # 由公式推到来 self.dW = np.dot(self.x.T,dout) self.dB = np.sum(dout,axis=0) return dx
-
Softmax-with-loss层:此处过于复杂,直接写代码
-
- 用误差传播算法构建一个两层神经网络
# coding: utf-8
import sys, os
sys.path.append(os.pardir)
import numpy as np
from common.layers import *
from common.gradient import numerical_gradient
from collections import OrderedDict
class TwoLayerNet:
def __init__(self, input_size, hidden_size, output_size, weight_init_std = 0.01):
def predict(self, x):
def loss(self, x, t):
y = self.predict(x)
return self.lastLayer.forward(y, t)
def accuracy(self, x, t):
y = self.predict(x)
y = np.argmax(y, axis=1)
if t.ndim != 1 : t = np.argmax(t, axis=1)
accuracy = np.sum(y == t) / float(x.shape[0])
return accuracy
def numerical_gradient(self, x, t):
loss_W = lambda W: self.loss(x, t)
grads = {}
grads['W1'] = numerical_gradient(loss_W, self.params['W1'])
grads['b1'] = numerical_gradient(loss_W, self.params['b1'])
grads['W2'] = numerical_gradient(loss_W, self.params['W2'])
grads['b2'] = numerical_gradient(loss_W, self.params['b2'])
return grads
def gradient(self, x, t):
# forward
self.loss(x, t)
# backward
dout = 1
dout = self.lastLayer.backward(dout)
layers = list(self.layers.values())
layers.reverse()
for layer in layers:
dout = layer.backward(dout)
# 設定
grads = {}
grads['W1'], grads['b1'] = self.layers['Affine1'].dW, self.layers['Affine1'].db
grads['W2'], grads['b2'] = self.layers['Affine2'].dW, self.layers['Affine2'].db
return grads