本文是本人学习博文(zhuanlan.zhihu.com/p/40378224)…
代码
未带激活函数
import numpy as np
class SimpleNN:
def __init__(self, lr=0.1):
# 初始化权重(随机小值)
self.w1 = 0.5
self.w2 = 1.5
self.w3 = 2.3
self.w4 = 3
self.w5 = 1
self.w6 = 1
self.lr = lr
def forward(self, x1, x2):
"""
前向传播:
h1 = w1*x1 + w2*x2 (线性,无激活)
h2 = w3*x1 + w4*x2
y = w5*h1 + w6*h2
"""
self.x1 = x1
self.x2 = x2
self.h1 = self.w1 * x1 + self.w2 * x2
self.h2 = self.w3 * x1 + self.w4 * x2
self.y = self.w5 * self.h1 + self.w6 * self.h2
return self.y
def loss(self, t):
"""平方误差损失"""
return 0.5 * (t - self.y) ** 2
def backward(self, t):
"""
反向传播:计算所有权重的梯度
使用链式法则从后往前推
"""
# 输出层梯度
dE_dy = self.y - t # ∂E/∂y
# 隐藏→输出层权重梯度
dE_dw5 = dE_dy * self.h1
dE_dw6 = dE_dy * self.h2
# 传播到隐藏层
dE_dh1 = dE_dy * self.w5
dE_dh2 = dE_dy * self.w6
# 输入→隐藏层权重梯度
dE_dw1 = dE_dh1 * self.x1
dE_dw2 = dE_dh1 * self.x2
dE_dw3 = dE_dh2 * self.x1
dE_dw4 = dE_dh2 * self.x2
# 存储梯度
self.grads = {
'w1': dE_dw1, 'w2': dE_dw2,
'w3': dE_dw3, 'w4': dE_dw4,
'w5': dE_dw5, 'w6': dE_dw6
}
def update_weights(self):
"""梯度下降更新权重"""
for key in self.grads:
setattr(self, key, getattr(self, key) - self.lr * self.grads[key])
def train_step(self, x1, x2, t):
"""完整训练一步:前向 + 损失 + 反向 + 更新"""
y_pred = self.forward(x1, x2)
E = self.loss(t)
self.backward(t)
self.update_weights()
return y_pred, E
# ========================
# 示例运行
# ========================
if __name__ == "__main__":
nn = SimpleNN(lr=0.1)
# 训练数据(单个样本)
x1, x2 = 1, 0.5
target = 2
print("初始权重:")
print(f"w1={nn.w1:.4f}, w2={nn.w2:.4f}, w3={nn.w3:.4f}, w4={nn.w4:.4f}")
print(f"w5={nn.w5:.4f}, w6={nn.w6:.4f}")
# 训练10步
for epoch in range(100):
y_pred, E = nn.train_step(x1, x2, target)
if epoch % 2 == 0:
print(f"Epoch {epoch}: y={y_pred:.4f}, Loss={E:.6f}")
print("\n最终权重:")
print(f"w1={nn.w1:.4f}, w2={nn.w2:.4f}, w3={nn.w3:.4f}, w4={nn.w4:.4f}")
print(f"w5={nn.w5:.4f}, w6={nn.w6:.4f}")
# 最终预测
final_y = nn.forward(x1, x2)
final_loss = nn.loss(target)
print(f"\n最终预测: y={final_y:.4f}, Loss={final_loss:.6f}")
带激活函数
import numpy as np
class SimpleNNWithActivation:
def __init__(self, lr=0.1):
self.w1 = np.random.randn() * 0.5
self.w2 = np.random.randn() * 0.5
self.w3 = np.random.randn() * 0.5
self.w4 = np.random.randn() * 0.5
self.w5 = np.random.randn() * 0.5
self.w6 = np.random.randn() * 0.5
self.lr = lr
def sigmoid(self, z):
return 1 / (1 + np.exp(-np.clip(z, -500, 500))) # 防止溢出
def sigmoid_deriv(self, z):
s = self.sigmoid(z)
return s * (1 - s)
def forward(self, x1, x2):
self.x1 = x1
self.x2 = x2
self.z1 = self.w1 * x1 + self.w2 * x2
self.z2 = self.w3 * x1 + self.w4 * x2
self.h1 = self.sigmoid(self.z1)
self.h2 = self.sigmoid(self.z2)
self.y = self.w5 * self.h1 + self.w6 * self.h2
return self.y
def loss(self, t):
return 0.5 * (t - self.y) ** 2
def backward(self, t):
dE_dy = self.y - t
dE_dw5 = dE_dy * self.h1
dE_dw6 = dE_dy * self.h2
dE_dh1 = dE_dy * self.w5
dE_dh2 = dE_dy * self.w6
# 关键:对激活函数求导要用原始输入 z1, z2
dE_dz1 = dE_dh1 * self.sigmoid_deriv(self.z1)
dE_dz2 = dE_dh2 * self.sigmoid_deriv(self.z2)
dE_dw1 = dE_dz1 * self.x1
dE_dw2 = dE_dz1 * self.x2
dE_dw3 = dE_dz2 * self.x1
dE_dw4 = dE_dz2 * self.x2
self.grads = {
'w1': dE_dw1, 'w2': dE_dw2,
'w3': dE_dw3, 'w4': dE_dw4,
'w5': dE_dw5, 'w6': dE_dw6
}
def update_weights(self):
for key in self.grads:
setattr(self, key, getattr(self, key) - self.lr * self.grads[key])
def train_step(self, x1, x2, t):
y_pred = self.forward(x1, x2)
E = self.loss(t)
self.backward(t)
self.update_weights()
return y_pred, E
if __name__ == "__main__":
nn = SimpleNNWithActivation(lr=0.5)
x1, x2 = 0.8, 0.3
target = 1.0
print("训练开始...")
for epoch in range(100):
y_pred, E = nn.train_step(x1, x2, target)
if epoch % 20 == 0:
print(f"Epoch {epoch}: y={y_pred:.4f}, Loss={E:.6f}")
print(f"\n最终预测: y={nn.forward(x1,x2):.4f}, Loss={nn.loss(target):.6f}")