s2_train.py
import torch
from d2l import torch as d2l
from torch import nn
def train_ch6(net, train_iter, test_iter, num_epochs, lr):
"""训练模型(在第六章定义)"""
def init_weights(m):
if type(m) == nn.Linear or type(m) == nn.Conv2d:
nn.init.xavier_uniform_(m.weight)
net.apply(init_weights)
optimizer = torch.optim.SGD(net.parameters(), lr=lr)
loss = nn.CrossEntropyLoss()
timer = d2l.Timer()
for epoch in range(num_epochs):
metric = d2l.Accumulator(3)
net.train()
for i, (X, y) in enumerate(train_iter):
timer.start()
optimizer.zero_grad()
y_hat = net(X)
l = loss(y_hat, y)
l.backward()
optimizer.step()
with torch.no_grad():
metric.add(l * X.shape[0], d2l.accuracy(y_hat, y), X.shape[0])
timer.stop()
train_l = metric[0] / metric[2]
train_acc = metric[1] / metric[2]
test_acc = d2l.evaluate_accuracy_gpu(net, test_iter)
print(f'{epoch + 1} : loss {train_l:.3f}, train acc {train_acc:.3f}, test acc {test_acc:.3f}')
print(f'{metric[2] * num_epochs / timer.sum():.1f} examples/sec ')
print(f'耗时 {timer.sum():.1f}s')
def predict_ch3(net, test_iter):
"""预测标签(定义见第3章)"""
for X, y in test_iter:
trues = d2l.get_fashion_mnist_labels(y)
res_X = net(X)
res_X_argmax = res_X.argmax(axis=1)
preds = d2l.get_fashion_mnist_labels(res_X_argmax)
loss_count = 0
count = 0
for true, pred in zip(trues, preds):
count += 1
if pred != true:
loss_count += 1
print("错误数量", loss_count, "总量", count, "准确率", 1.0 - loss_count * 1.0 / count)
LexNet
import os
import torch
from d2l import torch as d2l
from torch import nn
from s2_train import train_ch6, predict_ch3
if __name__ == '__main__':
batch_size = 256
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size=batch_size)
model_path = "lenet.pth"
net = nn.Sequential(
nn.Conv2d(1, 6, kernel_size=5, padding=2), nn.Sigmoid(),
nn.AvgPool2d(kernel_size=2, stride=2),
nn.Conv2d(6, 16, kernel_size=5), nn.Sigmoid(),
nn.AvgPool2d(kernel_size=2, stride=2),
nn.Flatten(),
nn.Linear(16 * 5 * 5, 120), nn.Sigmoid(),
nn.Linear(120, 84), nn.Sigmoid(),
nn.Linear(84, 10))
if not os.path.exists(model_path):
lr, num_epochs = 0.9, 10
train_ch6(net, train_iter, test_iter, num_epochs, lr)
torch.save(net.state_dict(), model_path)
print(f"Model state_dict saved to {model_path}")
else:
net.load_state_dict(torch.load(model_path, weights_only=True))
net.eval()
predict_ch3(net, test_iter)
LexNet for mnist
import os
import torch
import torchvision
from d2l import torch as d2l
from torch import nn
from torch.utils import data
from torchvision import transforms
from s2_train import train_ch6, predict_ch3
def load_data_mnist(batch_size, resize=None):
"""Download the Fashion-MNIST dataset and then load it into memory.
Defined in :numref:`sec_utils`
https://www.paddlepaddle.org.cn/tutorials/projectdetail/5562875
"""
trans = [transforms.ToTensor()]
if resize:
trans.insert(0, transforms.Resize(resize))
trans = transforms.Compose(trans)
mnist_train = torchvision.datasets.MNIST(
root="../data", train=True, transform=trans, download=True)
mnist_test = torchvision.datasets.MNIST(
root="../data", train=False, transform=trans, download=True)
return (torch.utils.data.DataLoader(mnist_train, batch_size, shuffle=True,
num_workers=d2l.get_dataloader_workers()),
torch.utils.data.DataLoader(mnist_test, batch_size, shuffle=False,
num_workers=d2l.get_dataloader_workers()))
def get_mnist_labels(labels):
"""Return text labels for the Fashion-MNIST dataset.
Defined in :numref:`sec_utils`"""
text_labels = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
return [text_labels[int(i)] for i in labels]
if __name__ == '__main__':
batch_size = 256
train_iter, test_iter = load_data_mnist(batch_size=batch_size)
model_path = "models/lenet_mnist.pth"
net = nn.Sequential(
nn.Conv2d(1, 6, kernel_size=5, padding=2), nn.Sigmoid(),
nn.AvgPool2d(kernel_size=2, stride=2),
nn.Conv2d(6, 16, kernel_size=5), nn.Sigmoid(),
nn.AvgPool2d(kernel_size=2, stride=2),
nn.Flatten(),
nn.Linear(16 * 5 * 5, 120), nn.Sigmoid(),
nn.Linear(120, 84), nn.Sigmoid(),
nn.Linear(84, 10))
if not os.path.exists(model_path):
lr, num_epochs = 0.9, 10
train_ch6(net, train_iter, test_iter, num_epochs, lr)
torch.save(net.state_dict(), model_path)
print(f"Model state_dict saved to {model_path}")
else:
net.load_state_dict(torch.load(model_path, weights_only=True))
net.eval()
onnx_model_path = model_path.replace(".pth", ".onnx")
if not os.path.exists(onnx_model_path):
input_shape = (1, 1, 28, 28)
x = torch.randn(input_shape)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
torch.onnx.export(net, x.to(device), onnx_model_path, export_params=True)
predict_ch3(net, test_iter)
AlexNet
import os
import torch
from d2l import torch as d2l
from torch import nn
from s2_train import train_ch6, predict_ch3
if __name__ == '__main__':
batch_size = 128
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size, resize=224)
model_path = "alexnet.pth"
net = nn.Sequential(
nn.Conv2d(1, 96, kernel_size=11, stride=4, padding=1), nn.ReLU(),
nn.MaxPool2d(kernel_size=3, stride=2),
nn.Conv2d(96, 256, kernel_size=5, padding=2), nn.ReLU(),
nn.MaxPool2d(kernel_size=3, stride=2),
nn.Conv2d(256, 384, kernel_size=3, padding=1), nn.ReLU(),
nn.Conv2d(384, 384, kernel_size=3, padding=1), nn.ReLU(),
nn.Conv2d(384, 256, kernel_size=3, padding=1), nn.ReLU(),
nn.MaxPool2d(kernel_size=3, stride=2),
nn.Flatten(),
nn.Linear(6400, 4096), nn.ReLU(),
nn.Dropout(p=0.5),
nn.Linear(4096, 4096), nn.ReLU(),
nn.Dropout(p=0.5),
nn.Linear(4096, 10))
if not os.path.exists(model_path):
lr, num_epochs = 0.01, 10
train_ch6(net, train_iter, test_iter, num_epochs, lr)
torch.save(net.state_dict(), model_path)
print(f"Model state_dict saved to {model_path}")
else:
net.load_state_dict(torch.load(model_path, weights_only=True))
net.eval()
predict_ch3(net, test_iter)
使用重复块的网络(VGG)
import os
import torch
from d2l import torch as d2l
from torch import nn
from s2_train import train_ch6, predict_ch3
def vgg_block(num_convs, in_channels, out_channels):
layers = []
for _ in range(num_convs):
layers.append(nn.Conv2d(in_channels, out_channels,
kernel_size=3, padding=1))
layers.append(nn.ReLU())
in_channels = out_channels
layers.append(nn.MaxPool2d(kernel_size=2, stride=2))
return nn.Sequential(*layers)
def vgg(conv_arch):
conv_blks = []
in_channels = 1
for (num_convs, out_channels) in conv_arch:
conv_blks.append(vgg_block(num_convs, in_channels, out_channels))
in_channels = out_channels
return nn.Sequential(
*conv_blks, nn.Flatten(),
nn.Linear(out_channels * 7 * 7, 4096), nn.ReLU(), nn.Dropout(0.5),
nn.Linear(4096, 4096), nn.ReLU(), nn.Dropout(0.5),
nn.Linear(4096, 10))
if __name__ == '__main__':
lr, num_epochs, batch_size = 0.05, 10, 128
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size, resize=224)
model_path = "vgg_net.pth"
ratio = 4
conv_arch = ((1, 64), (1, 128), (2, 256), (2, 512), (2, 512))
small_conv_arch = [(pair[0], pair[1] // ratio) for pair in conv_arch]
net = vgg(small_conv_arch)
if not os.path.exists(model_path):
train_ch6(net, train_iter, test_iter, num_epochs, lr)
torch.save(net.state_dict(), model_path)
print(f"Model state_dict saved to {model_path}")
else:
net.load_state_dict(torch.load(model_path, weights_only=True))
net.eval()
predict_ch3(net, test_iter)
网络中的网络(NiN)
import os
import torch
from d2l import torch as d2l
from torch import nn
from s2_train import train_ch6, predict_ch3
def nin_block(in_channels, out_channels, kernel_size, strides, padding):
return nn.Sequential(
nn.Conv2d(in_channels, out_channels, kernel_size, strides, padding),
nn.ReLU(),
nn.Conv2d(out_channels, out_channels, kernel_size=1), nn.ReLU(),
nn.Conv2d(out_channels, out_channels, kernel_size=1), nn.ReLU())
if __name__ == '__main__':
lr, num_epochs, batch_size = 0.1, 10, 128
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size, resize=224)
model_path = "nin_net.pth"
net = nn.Sequential(
nin_block(1, 96, kernel_size=11, strides=4, padding=0),
nn.MaxPool2d(3, stride=2),
nin_block(96, 256, kernel_size=5, strides=1, padding=2),
nn.MaxPool2d(3, stride=2),
nin_block(256, 384, kernel_size=3, strides=1, padding=1),
nn.MaxPool2d(3, stride=2),
nn.Dropout(0.5),
nin_block(384, 10, kernel_size=3, strides=1, padding=1),
nn.AdaptiveAvgPool2d((1, 1)),
nn.Flatten())
if not os.path.exists(model_path):
train_ch6(net, train_iter, test_iter, num_epochs, lr)
torch.save(net.state_dict(), model_path)
print(f"Model state_dict saved to {model_path}")
else:
net.load_state_dict(torch.load(model_path, weights_only=True))
net.eval()
predict_ch3(net, test_iter)
含并行连结的网络(GoogLeNet)
import os
import torch
from d2l import torch as d2l
from torch import nn
from torch.nn import functional as F
from s2_train import train_ch6, predict_ch3
class Inception(nn.Module):
def __init__(self, in_channels, c1, c2, c3, c4, **kwargs):
super(Inception, self).__init__(**kwargs)
self.p1_1 = nn.Conv2d(in_channels, c1, kernel_size=1)
self.p2_1 = nn.Conv2d(in_channels, c2[0], kernel_size=1)
self.p2_2 = nn.Conv2d(c2[0], c2[1], kernel_size=3, padding=1)
self.p3_1 = nn.Conv2d(in_channels, c3[0], kernel_size=1)
self.p3_2 = nn.Conv2d(c3[0], c3[1], kernel_size=5, padding=2)
self.p4_1 = nn.MaxPool2d(kernel_size=3, stride=1, padding=1)
self.p4_2 = nn.Conv2d(in_channels, c4, kernel_size=1)
def forward(self, x):
p1 = F.relu(self.p1_1(x))
p2 = F.relu(self.p2_2(F.relu(self.p2_1(x))))
p3 = F.relu(self.p3_2(F.relu(self.p3_1(x))))
p4 = F.relu(self.p4_2(self.p4_1(x)))
return torch.cat((p1, p2, p3, p4), dim=1)
if __name__ == '__main__':
lr, num_epochs, batch_size = 0.1, 10, 128
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size, resize=96)
model_path = "google_net.pth"
b1 = nn.Sequential(nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3),
nn.ReLU(),
nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
b2 = nn.Sequential(nn.Conv2d(64, 64, kernel_size=1),
nn.ReLU(),
nn.Conv2d(64, 192, kernel_size=3, padding=1),
nn.ReLU(),
nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
b3 = nn.Sequential(Inception(192, 64, (96, 128), (16, 32), 32),
Inception(256, 128, (128, 192), (32, 96), 64),
nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
b4 = nn.Sequential(Inception(480, 192, (96, 208), (16, 48), 64),
Inception(512, 160, (112, 224), (24, 64), 64),
Inception(512, 128, (128, 256), (24, 64), 64),
Inception(512, 112, (144, 288), (32, 64), 64),
Inception(528, 256, (160, 320), (32, 128), 128),
nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
b5 = nn.Sequential(Inception(832, 256, (160, 320), (32, 128), 128),
Inception(832, 384, (192, 384), (48, 128), 128),
nn.AdaptiveAvgPool2d((1, 1)),
nn.Flatten())
net = nn.Sequential(b1, b2, b3, b4, b5, nn.Linear(1024, 10))
if not os.path.exists(model_path):
train_ch6(net, train_iter, test_iter, num_epochs, lr)
torch.save(net.state_dict(), model_path)
print(f"Model state_dict saved to {model_path}")
else:
net.load_state_dict(torch.load(model_path, weights_only=True))
net.eval()
predict_ch3(net, test_iter)
残差网络(ResNet)
import os
import torch
from d2l import torch as d2l
from torch import nn
from torch.nn import functional as F
from s2_train import train_ch6, predict_ch3
class Residual(nn.Module):
def __init__(self, input_channels, num_channels,
use_1x1conv=False, strides=1):
super().__init__()
self.conv1 = nn.Conv2d(input_channels, num_channels,
kernel_size=3, padding=1, stride=strides)
self.conv2 = nn.Conv2d(num_channels, num_channels,
kernel_size=3, padding=1)
if use_1x1conv:
self.conv3 = nn.Conv2d(input_channels, num_channels,
kernel_size=1, stride=strides)
else:
self.conv3 = None
self.bn1 = nn.BatchNorm2d(num_channels)
self.bn2 = nn.BatchNorm2d(num_channels)
def forward(self, X):
Y = F.relu(self.bn1(self.conv1(X)))
Y = self.bn2(self.conv2(Y))
if self.conv3:
X = self.conv3(X)
Y += X
return F.relu(Y)
def resnet_block(input_channels, num_channels, num_residuals,
first_block=False):
blk = []
for i in range(num_residuals):
if i == 0 and not first_block:
blk.append(Residual(input_channels, num_channels,
use_1x1conv=True, strides=2))
else:
blk.append(Residual(num_channels, num_channels))
return blk
if __name__ == '__main__':
lr, num_epochs, batch_size = 0.05, 10, 256
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size, resize=96)
model_path = "res_net.pth"
b1 = nn.Sequential(nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3),
nn.BatchNorm2d(64), nn.ReLU(),
nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
b2 = nn.Sequential(*resnet_block(64, 64, 2, first_block=True))
b3 = nn.Sequential(*resnet_block(64, 128, 2))
b4 = nn.Sequential(*resnet_block(128, 256, 2))
b5 = nn.Sequential(*resnet_block(256, 512, 2))
net = nn.Sequential(b1, b2, b3, b4, b5,
nn.AdaptiveAvgPool2d((1, 1)),
nn.Flatten(), nn.Linear(512, 10))
if not os.path.exists(model_path):
train_ch6(net, train_iter, test_iter, num_epochs, lr)
torch.save(net.state_dict(), model_path)
print(f"Model state_dict saved to {model_path}")
else:
net.load_state_dict(torch.load(model_path, weights_only=True))
net.eval()
predict_ch3(net, test_iter)
稠密连接网络(DenseNet)
import os
import torch
from d2l import torch as d2l
from torch import nn
from torch.nn import functional as F
from s2_train import train_ch6, predict_ch3
def conv_block(input_channels, num_channels):
return nn.Sequential(
nn.BatchNorm2d(input_channels), nn.ReLU(),
nn.Conv2d(input_channels, num_channels, kernel_size=3, padding=1))
class DenseBlock(nn.Module):
def __init__(self, num_convs, input_channels, num_channels):
super(DenseBlock, self).__init__()
layer = []
for i in range(num_convs):
layer.append(conv_block(
num_channels * i + input_channels, num_channels))
self.net = nn.Sequential(*layer)
def forward(self, X):
for blk in self.net:
Y = blk(X)
X = torch.cat((X, Y), dim=1)
return X
def transition_block(input_channels, num_channels):
return nn.Sequential(
nn.BatchNorm2d(input_channels), nn.ReLU(),
nn.Conv2d(input_channels, num_channels, kernel_size=1),
nn.AvgPool2d(kernel_size=2, stride=2))
if __name__ == '__main__':
lr, num_epochs, batch_size = 0.1, 10, 256
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size, resize=96)
model_path = "dense_net.pth"
b1 = nn.Sequential(
nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3),
nn.BatchNorm2d(64), nn.ReLU(),
nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
num_channels, growth_rate = 64, 32
num_convs_in_dense_blocks = [4, 4, 4, 4]
blks = []
for i, num_convs in enumerate(num_convs_in_dense_blocks):
blks.append(DenseBlock(num_convs, num_channels, growth_rate))
num_channels += num_convs * growth_rate
if i != len(num_convs_in_dense_blocks) - 1:
blks.append(transition_block(num_channels, num_channels // 2))
num_channels = num_channels // 2
net = nn.Sequential(
b1, *blks,
nn.BatchNorm2d(num_channels), nn.ReLU(),
nn.AdaptiveAvgPool2d((1, 1)),
nn.Flatten(),
nn.Linear(num_channels, 10))
if not os.path.exists(model_path):
train_ch6(net, train_iter, test_iter, num_epochs, lr)
torch.save(net.state_dict(), model_path)
print(f"Model state_dict saved to {model_path}")
else:
net.load_state_dict(torch.load(model_path, weights_only=True))
net.eval()
predict_ch3(net, test_iter)