李沐的深度学习课 课程笔记4 卷积网络

136 阅读10分钟

s2_train.py

import torch
from d2l import torch as d2l
from torch import nn


def train_ch6(net, train_iter, test_iter, num_epochs, lr):
    """训练模型(在第六章定义)"""

    def init_weights(m):
        if type(m) == nn.Linear or type(m) == nn.Conv2d:
            nn.init.xavier_uniform_(m.weight)

    net.apply(init_weights)
    optimizer = torch.optim.SGD(net.parameters(), lr=lr)
    loss = nn.CrossEntropyLoss()
    timer = d2l.Timer()
    for epoch in range(num_epochs):
        # 训练损失之和,训练准确率之和,样本数
        metric = d2l.Accumulator(3)
        net.train()
        for i, (X, y) in enumerate(train_iter):
            timer.start()
            optimizer.zero_grad()
            y_hat = net(X)
            l = loss(y_hat, y)
            l.backward()
            optimizer.step()
            with torch.no_grad():
                metric.add(l * X.shape[0], d2l.accuracy(y_hat, y), X.shape[0])
            timer.stop()
            train_l = metric[0] / metric[2]
            train_acc = metric[1] / metric[2]
        test_acc = d2l.evaluate_accuracy_gpu(net, test_iter)
        print(f'{epoch + 1} : loss {train_l:.3f}, train acc {train_acc:.3f},  test acc {test_acc:.3f}')
    print(f'{metric[2] * num_epochs / timer.sum():.1f} examples/sec ')
    print(f'耗时 {timer.sum():.1f}s')


def predict_ch3(net, test_iter):  # @save
    """预测标签(定义见第3章)"""
    for X, y in test_iter:
        trues = d2l.get_fashion_mnist_labels(y)
        res_X = net(X)
        res_X_argmax = res_X.argmax(axis=1)
        preds = d2l.get_fashion_mnist_labels(res_X_argmax)
        loss_count = 0
        count = 0
        for true, pred in zip(trues, preds):
            count += 1
            # print(f'预测值:{pred:>10}\t真实值:{true:>10}\t准确性:{pred == true}')
            if pred != true:
                loss_count += 1
        print("错误数量", loss_count, "总量", count, "准确率", 1.0 - loss_count * 1.0 / count)

LexNet

import os

import torch
from d2l import torch as d2l
from torch import nn

from s2_train import train_ch6, predict_ch3

if __name__ == '__main__':
    batch_size = 256
    train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size=batch_size)
    # (batch_size,1,28,28)

    model_path = "lenet.pth"
    net = nn.Sequential(
        nn.Conv2d(1, 6, kernel_size=5, padding=2), nn.Sigmoid(),
        nn.AvgPool2d(kernel_size=2, stride=2),
        nn.Conv2d(6, 16, kernel_size=5), nn.Sigmoid(),
        nn.AvgPool2d(kernel_size=2, stride=2),
        nn.Flatten(),
        nn.Linear(16 * 5 * 5, 120), nn.Sigmoid(),
        nn.Linear(120, 84), nn.Sigmoid(),
        nn.Linear(84, 10))
    if not os.path.exists(model_path):
        # 训练
        lr, num_epochs = 0.9, 10
        train_ch6(net, train_iter, test_iter, num_epochs, lr)
        # 保存模型
        torch.save(net.state_dict(), model_path)
        print(f"Model state_dict saved to {model_path}")
    else:
        net.load_state_dict(torch.load(model_path, weights_only=True))
        net.eval()
        predict_ch3(net, test_iter)

LexNet for mnist

import os

import torch
import torchvision
from d2l import torch as d2l
from torch import nn
from torch.utils import data
from torchvision import transforms

from s2_train import train_ch6, predict_ch3


def load_data_mnist(batch_size, resize=None):
    """Download the Fashion-MNIST dataset and then load it into memory.
    Defined in :numref:`sec_utils`
    https://www.paddlepaddle.org.cn/tutorials/projectdetail/5562875
    """
    trans = [transforms.ToTensor()]
    if resize:
        trans.insert(0, transforms.Resize(resize))
    trans = transforms.Compose(trans)
    mnist_train = torchvision.datasets.MNIST(
        root="../data", train=True, transform=trans, download=True)
    mnist_test = torchvision.datasets.MNIST(
        root="../data", train=False, transform=trans, download=True)
    return (torch.utils.data.DataLoader(mnist_train, batch_size, shuffle=True,
                                        num_workers=d2l.get_dataloader_workers()),
            torch.utils.data.DataLoader(mnist_test, batch_size, shuffle=False,
                                        num_workers=d2l.get_dataloader_workers()))


def get_mnist_labels(labels):
    """Return text labels for the Fashion-MNIST dataset.

    Defined in :numref:`sec_utils`"""
    text_labels = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
    return [text_labels[int(i)] for i in labels]


if __name__ == '__main__':
    batch_size = 256
    train_iter, test_iter = load_data_mnist(batch_size=batch_size)
    # (batch_size,1,28,28)

    model_path = "models/lenet_mnist.pth"
    net = nn.Sequential(
        nn.Conv2d(1, 6, kernel_size=5, padding=2), nn.Sigmoid(),
        nn.AvgPool2d(kernel_size=2, stride=2),
        nn.Conv2d(6, 16, kernel_size=5), nn.Sigmoid(),
        nn.AvgPool2d(kernel_size=2, stride=2),
        nn.Flatten(),
        nn.Linear(16 * 5 * 5, 120), nn.Sigmoid(),
        nn.Linear(120, 84), nn.Sigmoid(),
        nn.Linear(84, 10))
    if not os.path.exists(model_path):
        # 训练
        lr, num_epochs = 0.9, 10
        train_ch6(net, train_iter, test_iter, num_epochs, lr)
        # 保存模型
        torch.save(net.state_dict(), model_path)
        print(f"Model state_dict saved to {model_path}")
    else:
        net.load_state_dict(torch.load(model_path, weights_only=True))
        net.eval()

        # 将 PyTorch 模型转换为 ONNX 格式
        onnx_model_path = model_path.replace(".pth", ".onnx")
        if not os.path.exists(onnx_model_path):
            ########################################
            # pip install onnx
            # pip install onnxruntime
            # 定义输入张量,需要与模型的输入张量形状相同
            input_shape = (1, 1, 28, 28)
            x = torch.randn(input_shape)
            device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
            torch.onnx.export(net, x.to(device), onnx_model_path, export_params=True)
        # 进行预测
        predict_ch3(net, test_iter)

AlexNet

import os

import torch
from d2l import torch as d2l  # pip install d2l
from torch import nn

from s2_train import train_ch6, predict_ch3

if __name__ == '__main__':
    batch_size = 128
    train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size, resize=224)
    # (batch_size,1,224,224)

    model_path = "alexnet.pth"
    net = nn.Sequential(
        # 这里使用一个11*11的更大窗口来捕捉对象。
        # 同时,步幅为4,以减少输出的高度和宽度。
        # 另外,输出通道的数目远大于LeNet
        nn.Conv2d(1, 96, kernel_size=11, stride=4, padding=1), nn.ReLU(),
        nn.MaxPool2d(kernel_size=3, stride=2),
        # 减小卷积窗口,使用填充为2来使得输入与输出的高和宽一致,且增大输出通道数
        nn.Conv2d(96, 256, kernel_size=5, padding=2), nn.ReLU(),
        nn.MaxPool2d(kernel_size=3, stride=2),
        # 使用三个连续的卷积层和较小的卷积窗口。
        # 除了最后的卷积层,输出通道的数量进一步增加。
        # 在前两个卷积层之后,汇聚层不用于减少输入的高度和宽度
        nn.Conv2d(256, 384, kernel_size=3, padding=1), nn.ReLU(),
        nn.Conv2d(384, 384, kernel_size=3, padding=1), nn.ReLU(),
        nn.Conv2d(384, 256, kernel_size=3, padding=1), nn.ReLU(),
        nn.MaxPool2d(kernel_size=3, stride=2),
        nn.Flatten(),
        # 这里,全连接层的输出数量是LeNet中的好几倍。使用dropout层来减轻过拟合
        nn.Linear(6400, 4096), nn.ReLU(),
        nn.Dropout(p=0.5),
        nn.Linear(4096, 4096), nn.ReLU(),
        nn.Dropout(p=0.5),
        # 最后是输出层。由于这里使用Fashion-MNIST,所以用类别数为10,而非论文中的1000
        nn.Linear(4096, 10))
    if not os.path.exists(model_path):
        # 训练
        lr, num_epochs = 0.01, 10
        train_ch6(net, train_iter, test_iter, num_epochs, lr)
        # 保存模型
        torch.save(net.state_dict(), model_path)
        print(f"Model state_dict saved to {model_path}")
    else:
        net.load_state_dict(torch.load(model_path, weights_only=True))
        net.eval()
        predict_ch3(net, test_iter)

使用重复块的网络(VGG)

import os

import torch
from d2l import torch as d2l
from torch import nn

from s2_train import train_ch6, predict_ch3


def vgg_block(num_convs, in_channels, out_channels):
    layers = []
    for _ in range(num_convs):
        layers.append(nn.Conv2d(in_channels, out_channels,
                                kernel_size=3, padding=1))
        layers.append(nn.ReLU())
        in_channels = out_channels
    layers.append(nn.MaxPool2d(kernel_size=2, stride=2))
    return nn.Sequential(*layers)


def vgg(conv_arch):
    conv_blks = []
    in_channels = 1
    # 卷积层部分
    for (num_convs, out_channels) in conv_arch:
        conv_blks.append(vgg_block(num_convs, in_channels, out_channels))
        in_channels = out_channels

    return nn.Sequential(
        *conv_blks, nn.Flatten(),
        # 全连接层部分
        nn.Linear(out_channels * 7 * 7, 4096), nn.ReLU(), nn.Dropout(0.5),
        nn.Linear(4096, 4096), nn.ReLU(), nn.Dropout(0.5),
        nn.Linear(4096, 10))


if __name__ == '__main__':
    lr, num_epochs, batch_size = 0.05, 10, 128
    train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size, resize=224)

    model_path = "vgg_net.pth"
    ratio = 4
    conv_arch = ((1, 64), (1, 128), (2, 256), (2, 512), (2, 512))
    small_conv_arch = [(pair[0], pair[1] // ratio) for pair in conv_arch]
    net = vgg(small_conv_arch)
    if not os.path.exists(model_path):
        # 训练
        train_ch6(net, train_iter, test_iter, num_epochs, lr)
        # 保存模型
        torch.save(net.state_dict(), model_path)
        print(f"Model state_dict saved to {model_path}")
    else:
        net.load_state_dict(torch.load(model_path, weights_only=True))
        net.eval()
        predict_ch3(net, test_iter)

网络中的网络(NiN)

import os

import torch
from d2l import torch as d2l
from torch import nn

from s2_train import train_ch6, predict_ch3


def nin_block(in_channels, out_channels, kernel_size, strides, padding):
    return nn.Sequential(
        nn.Conv2d(in_channels, out_channels, kernel_size, strides, padding),
        nn.ReLU(),
        nn.Conv2d(out_channels, out_channels, kernel_size=1), nn.ReLU(),
        nn.Conv2d(out_channels, out_channels, kernel_size=1), nn.ReLU())


if __name__ == '__main__':
    lr, num_epochs, batch_size = 0.1, 10, 128
    train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size, resize=224)

    model_path = "nin_net.pth"
    net = nn.Sequential(
        nin_block(1, 96, kernel_size=11, strides=4, padding=0),
        nn.MaxPool2d(3, stride=2),
        nin_block(96, 256, kernel_size=5, strides=1, padding=2),
        nn.MaxPool2d(3, stride=2),
        nin_block(256, 384, kernel_size=3, strides=1, padding=1),
        nn.MaxPool2d(3, stride=2),
        nn.Dropout(0.5),
        # 标签类别数是10
        nin_block(384, 10, kernel_size=3, strides=1, padding=1),
        nn.AdaptiveAvgPool2d((1, 1)),
        # 将四维的输出转成二维的输出,其形状为(批量大小,10)
        nn.Flatten())
    if not os.path.exists(model_path):
        # 训练
        train_ch6(net, train_iter, test_iter, num_epochs, lr)
        # 保存模型
        torch.save(net.state_dict(), model_path)
        print(f"Model state_dict saved to {model_path}")
    else:
        net.load_state_dict(torch.load(model_path, weights_only=True))
        net.eval()
        predict_ch3(net, test_iter)

含并行连结的网络(GoogLeNet)

import os

import torch
from d2l import torch as d2l
from torch import nn
from torch.nn import functional as F
from s2_train import train_ch6, predict_ch3


class Inception(nn.Module):
    # c1--c4是每条路径的输出通道数
    def __init__(self, in_channels, c1, c2, c3, c4, **kwargs):
        super(Inception, self).__init__(**kwargs)
        # 线路1,单1x1卷积层
        self.p1_1 = nn.Conv2d(in_channels, c1, kernel_size=1)
        # 线路2,1x1卷积层后接3x3卷积层
        self.p2_1 = nn.Conv2d(in_channels, c2[0], kernel_size=1)
        self.p2_2 = nn.Conv2d(c2[0], c2[1], kernel_size=3, padding=1)
        # 线路3,1x1卷积层后接5x5卷积层
        self.p3_1 = nn.Conv2d(in_channels, c3[0], kernel_size=1)
        self.p3_2 = nn.Conv2d(c3[0], c3[1], kernel_size=5, padding=2)
        # 线路4,3x3最大汇聚层后接1x1卷积层
        self.p4_1 = nn.MaxPool2d(kernel_size=3, stride=1, padding=1)
        self.p4_2 = nn.Conv2d(in_channels, c4, kernel_size=1)

    def forward(self, x):
        p1 = F.relu(self.p1_1(x))
        p2 = F.relu(self.p2_2(F.relu(self.p2_1(x))))
        p3 = F.relu(self.p3_2(F.relu(self.p3_1(x))))
        p4 = F.relu(self.p4_2(self.p4_1(x)))
        # 在通道维度上连结输出
        return torch.cat((p1, p2, p3, p4), dim=1)


if __name__ == '__main__':
    lr, num_epochs, batch_size = 0.1, 10, 128
    train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size, resize=96)
    # (batch_size,1,96,96)
    
    model_path = "google_net.pth"
    b1 = nn.Sequential(nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3),
                       nn.ReLU(),
                       nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
    b2 = nn.Sequential(nn.Conv2d(64, 64, kernel_size=1),
                       nn.ReLU(),
                       nn.Conv2d(64, 192, kernel_size=3, padding=1),
                       nn.ReLU(),
                       nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
    b3 = nn.Sequential(Inception(192, 64, (96, 128), (16, 32), 32),
                       Inception(256, 128, (128, 192), (32, 96), 64),
                       nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
    b4 = nn.Sequential(Inception(480, 192, (96, 208), (16, 48), 64),
                       Inception(512, 160, (112, 224), (24, 64), 64),
                       Inception(512, 128, (128, 256), (24, 64), 64),
                       Inception(512, 112, (144, 288), (32, 64), 64),
                       Inception(528, 256, (160, 320), (32, 128), 128),
                       nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
    b5 = nn.Sequential(Inception(832, 256, (160, 320), (32, 128), 128),
                       Inception(832, 384, (192, 384), (48, 128), 128),
                       nn.AdaptiveAvgPool2d((1, 1)),
                       nn.Flatten())

    net = nn.Sequential(b1, b2, b3, b4, b5, nn.Linear(1024, 10))

    if not os.path.exists(model_path):
        # 训练
        train_ch6(net, train_iter, test_iter, num_epochs, lr)
        # 保存模型
        torch.save(net.state_dict(), model_path)
        print(f"Model state_dict saved to {model_path}")
    else:
        net.load_state_dict(torch.load(model_path, weights_only=True))
        net.eval()
        predict_ch3(net, test_iter)

残差网络(ResNet)

import os

import torch
from d2l import torch as d2l
from torch import nn
from torch.nn import functional as F
from s2_train import train_ch6, predict_ch3


class Residual(nn.Module):  # @save
    def __init__(self, input_channels, num_channels,
                 use_1x1conv=False, strides=1):
        super().__init__()
        self.conv1 = nn.Conv2d(input_channels, num_channels,
                               kernel_size=3, padding=1, stride=strides)
        self.conv2 = nn.Conv2d(num_channels, num_channels,
                               kernel_size=3, padding=1)
        if use_1x1conv:
            self.conv3 = nn.Conv2d(input_channels, num_channels,
                                   kernel_size=1, stride=strides)
        else:
            self.conv3 = None
        self.bn1 = nn.BatchNorm2d(num_channels)
        self.bn2 = nn.BatchNorm2d(num_channels)

    def forward(self, X):
        Y = F.relu(self.bn1(self.conv1(X)))
        Y = self.bn2(self.conv2(Y))
        if self.conv3:
            X = self.conv3(X)
        Y += X
        return F.relu(Y)


def resnet_block(input_channels, num_channels, num_residuals,
                 first_block=False):
    blk = []
    for i in range(num_residuals):
        if i == 0 and not first_block:
            blk.append(Residual(input_channels, num_channels,
                                use_1x1conv=True, strides=2))
        else:
            blk.append(Residual(num_channels, num_channels))
    return blk


if __name__ == '__main__':
    lr, num_epochs, batch_size = 0.05, 10, 256
    train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size, resize=96)

    model_path = "res_net.pth"
    b1 = nn.Sequential(nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3),
                       nn.BatchNorm2d(64), nn.ReLU(),
                       nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
    b2 = nn.Sequential(*resnet_block(64, 64, 2, first_block=True))
    b3 = nn.Sequential(*resnet_block(64, 128, 2))
    b4 = nn.Sequential(*resnet_block(128, 256, 2))
    b5 = nn.Sequential(*resnet_block(256, 512, 2))
    net = nn.Sequential(b1, b2, b3, b4, b5,
                        nn.AdaptiveAvgPool2d((1, 1)),
                        nn.Flatten(), nn.Linear(512, 10))

    if not os.path.exists(model_path):
        # 训练
        train_ch6(net, train_iter, test_iter, num_epochs, lr)
        # 保存模型
        torch.save(net.state_dict(), model_path)
        print(f"Model state_dict saved to {model_path}")
    else:
        net.load_state_dict(torch.load(model_path, weights_only=True))
        net.eval()
        predict_ch3(net, test_iter)

稠密连接网络(DenseNet)

import os

import torch
from d2l import torch as d2l
from torch import nn
from torch.nn import functional as F
from s2_train import train_ch6, predict_ch3


def conv_block(input_channels, num_channels):
    return nn.Sequential(
        nn.BatchNorm2d(input_channels), nn.ReLU(),
        nn.Conv2d(input_channels, num_channels, kernel_size=3, padding=1))


class DenseBlock(nn.Module):
    def __init__(self, num_convs, input_channels, num_channels):
        super(DenseBlock, self).__init__()
        layer = []
        for i in range(num_convs):
            layer.append(conv_block(
                num_channels * i + input_channels, num_channels))
        self.net = nn.Sequential(*layer)

    def forward(self, X):
        for blk in self.net:
            Y = blk(X)
            # 连接通道维度上每个块的输入和输出
            X = torch.cat((X, Y), dim=1)
        return X


def transition_block(input_channels, num_channels):
    return nn.Sequential(
        nn.BatchNorm2d(input_channels), nn.ReLU(),
        nn.Conv2d(input_channels, num_channels, kernel_size=1),
        nn.AvgPool2d(kernel_size=2, stride=2))


if __name__ == '__main__':
    lr, num_epochs, batch_size = 0.1, 10, 256
    train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size, resize=96)

    model_path = "dense_net.pth"
    b1 = nn.Sequential(
        nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3),
        nn.BatchNorm2d(64), nn.ReLU(),
        nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
    # num_channels为当前的通道数
    num_channels, growth_rate = 64, 32
    num_convs_in_dense_blocks = [4, 4, 4, 4]
    blks = []
    for i, num_convs in enumerate(num_convs_in_dense_blocks):
        blks.append(DenseBlock(num_convs, num_channels, growth_rate))
        # 上一个稠密块的输出通道数
        num_channels += num_convs * growth_rate
        # 在稠密块之间添加一个转换层,使通道数量减半
        if i != len(num_convs_in_dense_blocks) - 1:
            blks.append(transition_block(num_channels, num_channels // 2))
            num_channels = num_channels // 2
    net = nn.Sequential(
        b1, *blks,
        nn.BatchNorm2d(num_channels), nn.ReLU(),
        nn.AdaptiveAvgPool2d((1, 1)),
        nn.Flatten(),
        nn.Linear(num_channels, 10))

    if not os.path.exists(model_path):
        # 训练
        train_ch6(net, train_iter, test_iter, num_epochs, lr)
        # 保存模型
        torch.save(net.state_dict(), model_path)
        print(f"Model state_dict saved to {model_path}")
    else:
        net.load_state_dict(torch.load(model_path, weights_only=True))
        net.eval()
        predict_ch3(net, test_iter)