李沐的深度学习课课程笔记2 线性回归S01E15 S01E16 线性回归的从零实现和简单实现定义损失函数计算均方误

S01E15 S01E16 线性回归

从零实现

import random
import torch
from d2l import torch as d2l  # pip install d2l
from torch.utils import data
from torch import nn


def synthetic_data(w, b, num_examples):  # @save
    """
    生成训练样本：
    生成y=Xw+b+噪声
    """
    X = torch.normal(0, 1, (num_examples, len(w)))
    y = torch.matmul(X, w) + b
    noise = torch.normal(0, 0.01, y.shape)
    y = y + noise
    return X, y.reshape((-1, 1))


def data_iter(batch_size, features, labels):
    """
    分批次读取数据集
    """
    num_examples = len(features)
    indices = list(range(num_examples))
    # 这些样本是随机读取的，没有特定的顺序
    random.shuffle(indices)
    for i in range(0, num_examples, batch_size):
        batch_indices = torch.tensor(
            indices[i: min(i + batch_size, num_examples)])
        yield features[batch_indices], labels[batch_indices]


def linreg(X, w, b):  # @save
    """
    定义模型:
    线性回归模型
    """
    return torch.matmul(X, w) + b


def squared_loss(y_hat, y):  # @save
    """
    定义损失函数:
    均方损失
    """
    return (y_hat - y.reshape(y_hat.shape)) ** 2 / 2


def sgd(params, lr, batch_size):  # @save
    """
    定义优化算法:
    小批量随机梯度下降
    """
    with torch.no_grad():
        for param in params:
            param -= lr * param.grad / batch_size
            param.grad.zero_()


def demo0(true_w, true_b):
    """
    从零实现
    """
    # 得到训练样本
    features, labels = synthetic_data(true_w, true_b, 1000)

    # 定义初始化模型参数
    w = torch.normal(0, 0.01, size=(2, 1), requires_grad=True)
    b = torch.zeros(1, requires_grad=True)
    print("训练前数据", w.reshape(true_w.shape), b)
    batch_size = 10
    # 超参数
    lr = 0.03  # 学习率
    num_epochs = 5  # 迭代周期
    net = linreg
    loss = squared_loss
    # 训练过程
    for epoch in range(num_epochs):
        for X, y in data_iter(batch_size, features, labels):
            l = loss(net(X, w, b), y)  # X和y的小批量损失
            # 因为l形状是(batch_size,1)，而不是一个标量。l中的所有元素被加到一起，
            # 并以此计算关于[w,b]的梯度
            l.sum().backward()
            sgd([w, b], lr, batch_size)  # 使用参数的梯度更新参数
        with torch.no_grad():
            train_l = loss(net(features, w, b), labels)
            print(f'epoch {epoch + 1}, loss {float(train_l.mean()):f}')
    return w, b


def load_array(data_arrays, batch_size, is_train=True):  # @save
    """构造一个PyTorch数据迭代器"""
    dataset = data.TensorDataset(*data_arrays)
    return data.DataLoader(dataset, batch_size, shuffle=is_train)


def demo1(true_w, true_b):
    """
    简洁实现
    """
    # 得到训练样本
    features, labels = d2l.synthetic_data(true_w, true_b, 1000)
    batch_size = 10
    data_iter = load_array((features, labels), batch_size)
    # 定义模型
    net = nn.Sequential(nn.Linear(2, 1))
    # 初始化模型参数
    net[0].weight.data.normal_(0, 0.01)
    net[0].bias.data.fill_(0)
    # 定义损失函数 计算均方误差使用的是MSELoss类
    loss = nn.MSELoss()
    # 定义优化算法
    trainer = torch.optim.SGD(net.parameters(), lr=0.03)
    # 训练过程
    num_epochs = 5
    for epoch in range(num_epochs):
        for X, y in data_iter:
            l = loss(net(X), y)
            trainer.zero_grad()
            l.backward()
            trainer.step()
        l = loss(net(features), labels)
        print(f'epoch {epoch + 1}, loss {l:f}')
    w = net[0].weight.data
    b = net[0].bias.data
    return w, b


if __name__ == '__main__':
    # 真实参数
    true_w = torch.tensor([2, -3.4])
    true_b = 4.2
    print("真实数据", true_w, true_b)

    w, b = demo1(true_w, true_b)
    print("训练后数据", w.reshape(true_w.shape), b)
    print(f'w的估计误差: {true_w - w.reshape(true_w.shape)}')
    print(f'b的估计误差: {true_b - b}')

S01E20 S01E21 softmax回归

从零实现

import os

import torch
from d2l import torch as d2l  # pip install d2l

num_inputs = 784
num_outputs = 10
lr = 0.1


def softmax(X):
    X_exp = torch.exp(X)
    partition = X_exp.sum(1, keepdim=True)
    return X_exp / partition  # 这里应用了广播机制


def net(X):
    """
    实现softmax回归模型
    """
    return softmax(torch.matmul(X.reshape((-1, W.shape[0])), W) + b)


def cross_entropy(y_hat, y):
    """
    交叉熵损失函数
    """
    return - torch.log(y_hat[range(len(y_hat)), y])


def accuracy(y_hat, y):  # @save
    """计算预测正确的数量"""
    if len(y_hat.shape) > 1 and y_hat.shape[1] > 1:
        y_hat = y_hat.argmax(axis=1)
    cmp = y_hat.type(y.dtype) == y
    return float(cmp.type(y.dtype).sum())


class Accumulator:  # @save
    """在n个变量上累加"""

    def __init__(self, n):
        self.data = [0.0] * n

    def add(self, *args):
        self.data = [a + float(b) for a, b in zip(self.data, args)]

    def reset(self):
        self.data = [0.0] * len(self.data)

    def __getitem__(self, idx):
        return self.data[idx]


def evaluate_accuracy(net, data_iter):  # @save
    """计算在指定数据集上模型的精度"""
    if isinstance(net, torch.nn.Module):
        net.eval()  # 将模型设置为评估模式
    metric = Accumulator(2)  # 正确预测数、预测总数
    with torch.no_grad():
        for X, y in data_iter:
            metric.add(accuracy(net(X), y), y.numel())
    return metric[0] / metric[1]


def train_epoch_ch3(net, train_iter, loss, updater):  # @save
    """训练模型一个迭代周期（定义见第3章）"""
    # 将模型设置为训练模式
    if isinstance(net, torch.nn.Module):
        net.train()
    # 训练损失总和、训练准确度总和、样本数
    metric = Accumulator(3)
    for X, y in train_iter:
        # 计算梯度并更新参数
        y_hat = net(X)
        l = loss(y_hat, y)
        if isinstance(updater, torch.optim.Optimizer):
            # 使用PyTorch内置的优化器和损失函数
            updater.zero_grad()
            l.mean().backward()
            updater.step()
        else:
            # 使用定制的优化器和损失函数
            l.sum().backward()
            updater(X.shape[0])
        metric.add(float(l.sum()), accuracy(y_hat, y), y.numel())
    # 返回训练损失和训练精度
    return metric[0] / metric[2], metric[1] / metric[2]


def train_ch3(net, train_iter, test_iter, loss, num_epochs, updater):  # @save
    """训练模型（定义见第3章）"""
    for epoch in range(num_epochs):
        train_metrics = train_epoch_ch3(net, train_iter, loss, updater)
        test_acc = evaluate_accuracy(net, test_iter)
        print(epoch + 1, "：", train_metrics + (test_acc,))
    train_loss, train_acc = train_metrics
    assert train_loss < 0.5, train_loss
    assert train_acc <= 1 and train_acc > 0.7, train_acc
    assert test_acc <= 1 and test_acc > 0.7, test_acc


def updater(batch_size):
    return d2l.sgd([W, b], lr, batch_size)


def predict_ch3(net, test_iter, n=6):  # @save
    """预测标签（定义见第3章）"""
    for X, y in test_iter:
        break
    trues = d2l.get_fashion_mnist_labels(y)
    preds = d2l.get_fashion_mnist_labels(net(X).argmax(axis=1))
    titles = ['预测值：' + pred + "\t真实值：" + true for true, pred in zip(trues, preds)]
    for t in titles:
        print(t)


if __name__ == '__main__':
    global W, b
    W = torch.normal(0, 0.01, size=(num_inputs, num_outputs), requires_grad=True)
    b = torch.zeros(num_outputs, requires_grad=True)
    # 训练数据
    batch_size = 256
    train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)
    # 训练
    softmax_file = "softmax.pth"
    if os.path.exists(softmax_file):
        loaded_tensors = torch.load(softmax_file)
        W = loaded_tensors['w']
        b = loaded_tensors['b']
    else:
        num_epochs = 10
        train_ch3(net, train_iter, test_iter, cross_entropy, num_epochs, updater)
        tensors_dict = {'w': W, 'b': b}
        torch.save(tensors_dict, softmax_file)
    # 预测
    predict_ch3(net, test_iter)

简洁实现

import torch
from d2l import torch as d2l  # pip install d2l
from torch import nn

num_inputs = 784
num_outputs = 10
lr = 0.1


def init_weights(m):
    if type(m) == nn.Linear:
        nn.init.normal_(m.weight, std=0.01)


def accuracy(y_hat, y):  # @save
    """计算预测正确的数量"""
    if len(y_hat.shape) > 1 and y_hat.shape[1] > 1:
        y_hat = y_hat.argmax(axis=1)
    cmp = y_hat.type(y.dtype) == y
    return float(cmp.type(y.dtype).sum())


class Accumulator:  # @save
    """在n个变量上累加"""

    def __init__(self, n):
        self.data = [0.0] * n

    def add(self, *args):
        self.data = [a + float(b) for a, b in zip(self.data, args)]

    def reset(self):
        self.data = [0.0] * len(self.data)

    def __getitem__(self, idx):
        return self.data[idx]


def evaluate_accuracy(net, data_iter):  # @save
    """计算在指定数据集上模型的精度"""
    if isinstance(net, torch.nn.Module):
        net.eval()  # 将模型设置为评估模式
    metric = Accumulator(2)  # 正确预测数、预测总数
    with torch.no_grad():
        for X, y in data_iter:
            metric.add(accuracy(net(X), y), y.numel())
    return metric[0] / metric[1]


def train_epoch_ch3(net, train_iter, loss, updater):  # @save
    """训练模型一个迭代周期（定义见第3章）"""
    # 将模型设置为训练模式
    if isinstance(net, torch.nn.Module):
        net.train()
    # 训练损失总和、训练准确度总和、样本数
    metric = Accumulator(3)
    for X, y in train_iter:
        # 计算梯度并更新参数
        y_hat = net(X)
        l = loss(y_hat, y)
        if isinstance(updater, torch.optim.Optimizer):
            # 使用PyTorch内置的优化器和损失函数
            updater.zero_grad()
            l.mean().backward()
            updater.step()
        else:
            # 使用定制的优化器和损失函数
            l.sum().backward()
            updater(X.shape[0])
        metric.add(float(l.sum()), accuracy(y_hat, y), y.numel())
    # 返回训练损失和训练精度
    return metric[0] / metric[2], metric[1] / metric[2]


def train_ch3(net, train_iter, test_iter, loss, num_epochs, updater):  # @save
    """训练模型（定义见第3章）"""
    for epoch in range(num_epochs):
        train_metrics = train_epoch_ch3(net, train_iter, loss, updater)
        test_acc = evaluate_accuracy(net, test_iter)
        print(epoch + 1, "：", train_metrics + (test_acc,))
    train_loss, train_acc = train_metrics
    assert train_loss < 0.5, train_loss
    assert train_acc <= 1 and train_acc > 0.7, train_acc
    assert test_acc <= 1 and test_acc > 0.7, test_acc


def predict_ch3(net, test_iter, n=6):  # @save
    """预测标签（定义见第3章）"""
    for X, y in test_iter:
        break
    trues = d2l.get_fashion_mnist_labels(y)
    preds = d2l.get_fashion_mnist_labels(net(X).argmax(axis=1))
    # titles = ['预测值：' + pred + "\t真实值：" + true for true, pred in zip(trues, preds)]
    # for t in titles:
    #     print(t)
    loss_count = 0
    count = 0
    for true, pred in zip(trues, preds):
        count += 1
        print('预测值：' + pred + "\t真实值：" + true)
        if pred != true:
            loss_count += 1
    print("错误数量", loss_count, "总量", count)


if __name__ == '__main__':
    # 训练数据
    batch_size = 256
    train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)
    model_path = "softmax_model.pth"
    if not os.path.exists(model_path):
        # 训练
        # PyTorch不会隐式地调整输入的形状。因此，我们在线性层前定义了展平层（flatten），来调整网络输入的形状
        net = nn.Sequential(nn.Flatten(), nn.Linear(num_inputs, num_outputs))
        net.apply(init_weights)
        loss = nn.CrossEntropyLoss(reduction='none')
        trainer = torch.optim.SGD(net.parameters(), lr=lr)
        num_epochs = 20
        train_ch3(net, train_iter, test_iter, loss, num_epochs, trainer)
        # 保存模型的state_dict
        torch.save(net.state_dict(), model_path)
        print(f"Model state_dict saved to {model_path}")
    else:
        net = nn.Sequential(nn.Flatten(), nn.Linear(num_inputs, num_outputs))
        # 加载模型
        net.load_state_dict(torch.load(model_path))
        # 切换到评估模式
        net.eval()
        print("测试数量:", test_iter.batch_size)
        predict_ch3(net, test_iter)

S01E24 多层感知机

从零实现

import os

import torch
from d2l import torch as d2l  # pip install d2l
from torch import nn

num_inputs = 784
num_outputs = 10
lr = 0.1


def accuracy(y_hat, y):  # @save
    """计算预测正确的数量"""
    if len(y_hat.shape) > 1 and y_hat.shape[1] > 1:
        y_hat = y_hat.argmax(axis=1)
    cmp = y_hat.type(y.dtype) == y
    return float(cmp.type(y.dtype).sum())


class Accumulator:  # @save
    """在n个变量上累加"""

    def __init__(self, n):
        self.data = [0.0] * n

    def add(self, *args):
        self.data = [a + float(b) for a, b in zip(self.data, args)]

    def reset(self):
        self.data = [0.0] * len(self.data)

    def __getitem__(self, idx):
        return self.data[idx]


def evaluate_accuracy(net, data_iter):  # @save
    """计算在指定数据集上模型的精度"""
    if isinstance(net, torch.nn.Module):
        net.eval()  # 将模型设置为评估模式
    metric = Accumulator(2)  # 正确预测数、预测总数
    with torch.no_grad():
        for X, y in data_iter:
            metric.add(accuracy(net(X), y), y.numel())
    return metric[0] / metric[1]


def train_epoch_ch3(net, train_iter, loss, updater):  # @save
    """训练模型一个迭代周期（定义见第3章）"""
    # 将模型设置为训练模式
    if isinstance(net, torch.nn.Module):
        net.train()
    # 训练损失总和、训练准确度总和、样本数
    metric = Accumulator(3)
    for X, y in train_iter:
        # 计算梯度并更新参数
        y_hat = net(X)
        l = loss(y_hat, y)
        if isinstance(updater, torch.optim.Optimizer):
            # 使用PyTorch内置的优化器和损失函数
            updater.zero_grad()
            l.mean().backward()
            updater.step()
        else:
            # 使用定制的优化器和损失函数
            l.sum().backward()
            updater(X.shape[0])
        metric.add(float(l.sum()), accuracy(y_hat, y), y.numel())
    # 返回训练损失和训练精度
    return metric[0] / metric[2], metric[1] / metric[2]


def train_ch3(net, train_iter, test_iter, loss, num_epochs, updater):  # @save
    """训练模型（定义见第3章）"""
    for epoch in range(num_epochs):
        train_metrics = train_epoch_ch3(net, train_iter, loss, updater)
        test_acc = evaluate_accuracy(net, test_iter)
        print(epoch + 1, "：", train_metrics + (test_acc,))
    train_loss, train_acc = train_metrics
    assert train_loss < 0.5, train_loss
    assert train_acc <= 1 and train_acc > 0.7, train_acc
    assert test_acc <= 1 and test_acc > 0.7, test_acc


def predict_ch3(net, test_iter, n=6):  # @save
    """预测标签（定义见第3章）"""
    for X, y in test_iter:
        break
    trues = d2l.get_fashion_mnist_labels(y)
    preds = d2l.get_fashion_mnist_labels(net(X).argmax(axis=1))
    loss_count = 0
    count = 0
    for true, pred in zip(trues, preds):
        count += 1
        print('预测值：' + pred + "\t真实值：" + true)
        if pred != true:
            loss_count += 1
    print("错误数量", loss_count, "总量", count, "准确率", 1.0 - loss_count * 1.0 / count)


def relu(X):
    a = torch.zeros_like(X)
    return torch.max(X, a)


def net(X):
    X = X.reshape((-1, num_inputs))
    H = relu(X @ W1 + b1)  # 这里“@”代表矩阵乘法
    return (H @ W2 + b2)


if __name__ == '__main__':
    # 训练数据
    batch_size = 256
    train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)
    model_path = "mlp.pth"
    if not os.path.exists(model_path):
        # 训练
        num_hiddens = 256
        W1 = nn.Parameter(torch.randn(num_inputs, num_hiddens, requires_grad=True) * 0.01)
        b1 = nn.Parameter(torch.zeros(num_hiddens, requires_grad=True))
        W2 = nn.Parameter(torch.randn(num_hiddens, num_outputs, requires_grad=True) * 0.01)
        b2 = nn.Parameter(torch.zeros(num_outputs, requires_grad=True))
        params = [W1, b1, W2, b2]

        updater = torch.optim.SGD(params, lr=lr)
        loss = nn.CrossEntropyLoss(reduction='none')
        num_epochs = 10

        train_ch3(net, train_iter, test_iter, loss, num_epochs, updater)
        torch.save(params, model_path)
    else:
        params = torch.load(model_path)
        W1 = params[0]
        b1 = params[1]
        W2 = params[2]
        b2 = params[3]
        predict_ch3(net, test_iter)

简洁实现

import os

import torch
from d2l import torch as d2l  # pip install d2l
from torch import nn

num_inputs = 784
num_outputs = 10
num_hiddens = 256
lr = 0.1


def init_weights(m):
    if type(m) == nn.Linear:
        nn.init.normal_(m.weight, std=0.01)


def accuracy(y_hat, y):  # @save
    """计算预测正确的数量"""
    if len(y_hat.shape) > 1 and y_hat.shape[1] > 1:
        y_hat = y_hat.argmax(axis=1)
    cmp = y_hat.type(y.dtype) == y
    return float(cmp.type(y.dtype).sum())


class Accumulator:  # @save
    """在n个变量上累加"""

    def __init__(self, n):
        self.data = [0.0] * n

    def add(self, *args):
        self.data = [a + float(b) for a, b in zip(self.data, args)]

    def reset(self):
        self.data = [0.0] * len(self.data)

    def __getitem__(self, idx):
        return self.data[idx]


def evaluate_accuracy(net, data_iter):  # @save
    """计算在指定数据集上模型的精度"""
    if isinstance(net, torch.nn.Module):
        net.eval()  # 将模型设置为评估模式
    metric = Accumulator(2)  # 正确预测数、预测总数
    with torch.no_grad():
        for X, y in data_iter:
            metric.add(accuracy(net(X), y), y.numel())
    return metric[0] / metric[1]


def train_epoch_ch3(net, train_iter, loss, updater):  # @save
    """训练模型一个迭代周期（定义见第3章）"""
    # 将模型设置为训练模式
    if isinstance(net, torch.nn.Module):
        net.train()
    # 训练损失总和、训练准确度总和、样本数
    metric = Accumulator(3)
    for X, y in train_iter:
        # 计算梯度并更新参数
        y_hat = net(X)
        l = loss(y_hat, y)
        if isinstance(updater, torch.optim.Optimizer):
            # 使用PyTorch内置的优化器和损失函数
            updater.zero_grad()
            l.mean().backward()
            updater.step()
        else:
            # 使用定制的优化器和损失函数
            l.sum().backward()
            updater(X.shape[0])
        metric.add(float(l.sum()), accuracy(y_hat, y), y.numel())
    # 返回训练损失和训练精度
    return metric[0] / metric[2], metric[1] / metric[2]


def train_ch3(net, train_iter, test_iter, loss, num_epochs, updater):  # @save
    """训练模型（定义见第3章）"""
    for epoch in range(num_epochs):
        train_metrics = train_epoch_ch3(net, train_iter, loss, updater)
        test_acc = evaluate_accuracy(net, test_iter)
        print(epoch + 1, "：", train_metrics + (test_acc,))
    train_loss, train_acc = train_metrics
    assert train_loss < 0.5, train_loss
    assert train_acc <= 1 and train_acc > 0.7, train_acc
    assert test_acc <= 1 and test_acc > 0.7, test_acc


def predict_ch3(net, test_iter, n=6):  # @save
    """预测标签（定义见第3章）"""
    for X, y in test_iter:
        break
    trues = d2l.get_fashion_mnist_labels(y)
    preds = d2l.get_fashion_mnist_labels(net(X).argmax(axis=1))
    loss_count = 0
    count = 0
    for true, pred in zip(trues, preds):
        count += 1
        print('预测值：' + pred + "\t真实值：" + true)
        if pred != true:
            loss_count += 1
    print("错误数量", loss_count, "总量", count, "准确率", 1.0 - loss_count * 1.0 / count)


def relu(X):
    a = torch.zeros_like(X)
    return torch.max(X, a)


if __name__ == '__main__':
    # 训练数据
    batch_size = 256
    train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)
    model_path = "mlp_nn.pth"
    net = nn.Sequential(nn.Flatten(),
                        nn.Linear(num_inputs, num_hiddens),
                        nn.ReLU(),
                        nn.Linear(num_hiddens, num_outputs))
    if not os.path.exists(model_path):
        # 训练
        net.apply(init_weights)
        num_epochs = 10
        loss = nn.CrossEntropyLoss(reduction='none')
        trainer = torch.optim.SGD(net.parameters(), lr=lr)
        train_ch3(net, train_iter, test_iter, loss, num_epochs, trainer)
        # 保存模型
        torch.save(net.state_dict(), model_path)
        print(f"Model state_dict saved to {model_path}")
    else:
        net.load_state_dict(torch.load(model_path))
        net.eval()
        predict_ch3(net, test_iter)

李沐的深度学习课 课程笔记2 线性回归

S01E15 S01E16 线性回归

从零实现

S01E20 S01E21 softmax回归

从零实现

简洁实现

S01E24 多层感知机

从零实现

简洁实现

李沐的深度学习课课程笔记2 线性回归