梯度反向传播

0 阅读2分钟

本文是本人学习博文(zhuanlan.zhihu.com/p/40378224)…

代码

未带激活函数
import numpy as np

class SimpleNN:
    def __init__(self, lr=0.1):
        # 初始化权重(随机小值)
        self.w1 = 0.5
        self.w2 = 1.5
        self.w3 = 2.3
        self.w4 = 3
        self.w5 = 1
        self.w6 = 1
        self.lr = lr

    def forward(self, x1, x2):
        """
        前向传播:
        h1 = w1*x1 + w2*x2   (线性,无激活)
        h2 = w3*x1 + w4*x2
        y  = w5*h1 + w6*h2
        """
        self.x1 = x1
        self.x2 = x2

        self.h1 = self.w1 * x1 + self.w2 * x2
        self.h2 = self.w3 * x1 + self.w4 * x2

        self.y = self.w5 * self.h1 + self.w6 * self.h2

        return self.y

    def loss(self, t):
        """平方误差损失"""
        return 0.5 * (t - self.y) ** 2

    def backward(self, t):
        """
        反向传播:计算所有权重的梯度
        使用链式法则从后往前推
        """
        # 输出层梯度
        dE_dy = self.y - t  # ∂E/∂y

        # 隐藏→输出层权重梯度
        dE_dw5 = dE_dy * self.h1
        dE_dw6 = dE_dy * self.h2

        # 传播到隐藏层
        dE_dh1 = dE_dy * self.w5
        dE_dh2 = dE_dy * self.w6

        # 输入→隐藏层权重梯度
        dE_dw1 = dE_dh1 * self.x1
        dE_dw2 = dE_dh1 * self.x2
        dE_dw3 = dE_dh2 * self.x1
        dE_dw4 = dE_dh2 * self.x2

        # 存储梯度
        self.grads = {
            'w1': dE_dw1, 'w2': dE_dw2,
            'w3': dE_dw3, 'w4': dE_dw4,
            'w5': dE_dw5, 'w6': dE_dw6
        }

    def update_weights(self):
        """梯度下降更新权重"""
        for key in self.grads:
            setattr(self, key, getattr(self, key) - self.lr * self.grads[key])

    def train_step(self, x1, x2, t):
        """完整训练一步:前向 + 损失 + 反向 + 更新"""
        y_pred = self.forward(x1, x2)
        E = self.loss(t)
        self.backward(t)
        self.update_weights()
        return y_pred, E

# ========================
# 示例运行
# ========================

if __name__ == "__main__":
    nn = SimpleNN(lr=0.1)

    # 训练数据(单个样本)
    x1, x2 = 1, 0.5
    target = 2

    print("初始权重:")
    print(f"w1={nn.w1:.4f}, w2={nn.w2:.4f}, w3={nn.w3:.4f}, w4={nn.w4:.4f}")
    print(f"w5={nn.w5:.4f}, w6={nn.w6:.4f}")

    # 训练10步
    for epoch in range(100):
        y_pred, E = nn.train_step(x1, x2, target)
   
        if epoch % 2 == 0:
            print(f"Epoch {epoch}: y={y_pred:.4f}, Loss={E:.6f}")
      

    print("\n最终权重:")
    print(f"w1={nn.w1:.4f}, w2={nn.w2:.4f}, w3={nn.w3:.4f}, w4={nn.w4:.4f}")
    print(f"w5={nn.w5:.4f}, w6={nn.w6:.4f}")

    # 最终预测
    final_y = nn.forward(x1, x2)
    final_loss = nn.loss(target)
    print(f"\n最终预测: y={final_y:.4f}, Loss={final_loss:.6f}")
带激活函数
import numpy as np


class SimpleNNWithActivation:
    def __init__(self, lr=0.1):
        self.w1 = np.random.randn() * 0.5
        self.w2 = np.random.randn() * 0.5
        self.w3 = np.random.randn() * 0.5
        self.w4 = np.random.randn() * 0.5
        self.w5 = np.random.randn() * 0.5
        self.w6 = np.random.randn() * 0.5
        self.lr = lr

    def sigmoid(self, z):
        return 1 / (1 + np.exp(-np.clip(z, -500, 500)))  # 防止溢出

    def sigmoid_deriv(self, z):
        s = self.sigmoid(z)
        return s * (1 - s)

    def forward(self, x1, x2):
        self.x1 = x1
        self.x2 = x2

        self.z1 = self.w1 * x1 + self.w2 * x2
        self.z2 = self.w3 * x1 + self.w4 * x2

        self.h1 = self.sigmoid(self.z1)
        self.h2 = self.sigmoid(self.z2)

        self.y = self.w5 * self.h1 + self.w6 * self.h2

        return self.y

    def loss(self, t):
        return 0.5 * (t - self.y) ** 2

    def backward(self, t):
        dE_dy = self.y - t

        dE_dw5 = dE_dy * self.h1
        dE_dw6 = dE_dy * self.h2

        dE_dh1 = dE_dy * self.w5
        dE_dh2 = dE_dy * self.w6

        # 关键:对激活函数求导要用原始输入 z1, z2
        dE_dz1 = dE_dh1 * self.sigmoid_deriv(self.z1)
        dE_dz2 = dE_dh2 * self.sigmoid_deriv(self.z2)

        dE_dw1 = dE_dz1 * self.x1
        dE_dw2 = dE_dz1 * self.x2
        dE_dw3 = dE_dz2 * self.x1
        dE_dw4 = dE_dz2 * self.x2

        self.grads = {
            'w1': dE_dw1, 'w2': dE_dw2,
            'w3': dE_dw3, 'w4': dE_dw4,
            'w5': dE_dw5, 'w6': dE_dw6
        }

    def update_weights(self):
        for key in self.grads:
            setattr(self, key, getattr(self, key) - self.lr * self.grads[key])

    def train_step(self, x1, x2, t):
        y_pred = self.forward(x1, x2)
        E = self.loss(t)
        self.backward(t)
        self.update_weights()
        return y_pred, E


if __name__ == "__main__":
    nn = SimpleNNWithActivation(lr=0.5)

    x1, x2 = 0.8, 0.3
    target = 1.0

    print("训练开始...")
    for epoch in range(100):
        y_pred, E = nn.train_step(x1, x2, target)
        if epoch % 20 == 0:
            print(f"Epoch {epoch}: y={y_pred:.4f}, Loss={E:.6f}")

    print(f"\n最终预测: y={nn.forward(x1,x2):.4f}, Loss={nn.loss(target):.6f}")