S01E15 S01E16 线性回归
从零实现
import random
import torch
from d2l import torch as d2l
from torch.utils import data
from torch import nn
def synthetic_data(w, b, num_examples):
"""
生成训练样本:
生成y=Xw+b+噪声
"""
X = torch.normal(0, 1, (num_examples, len(w)))
y = torch.matmul(X, w) + b
noise = torch.normal(0, 0.01, y.shape)
y = y + noise
return X, y.reshape((-1, 1))
def data_iter(batch_size, features, labels):
"""
分批次读取数据集
"""
num_examples = len(features)
indices = list(range(num_examples))
random.shuffle(indices)
for i in range(0, num_examples, batch_size):
batch_indices = torch.tensor(
indices[i: min(i + batch_size, num_examples)])
yield features[batch_indices], labels[batch_indices]
def linreg(X, w, b):
"""
定义模型:
线性回归模型
"""
return torch.matmul(X, w) + b
def squared_loss(y_hat, y):
"""
定义损失函数:
均方损失
"""
return (y_hat - y.reshape(y_hat.shape)) ** 2 / 2
def sgd(params, lr, batch_size):
"""
定义优化算法:
小批量随机梯度下降
"""
with torch.no_grad():
for param in params:
param -= lr * param.grad / batch_size
param.grad.zero_()
def demo0(true_w, true_b):
"""
从零实现
"""
features, labels = synthetic_data(true_w, true_b, 1000)
w = torch.normal(0, 0.01, size=(2, 1), requires_grad=True)
b = torch.zeros(1, requires_grad=True)
print("训练前数据", w.reshape(true_w.shape), b)
batch_size = 10
lr = 0.03
num_epochs = 5
net = linreg
loss = squared_loss
for epoch in range(num_epochs):
for X, y in data_iter(batch_size, features, labels):
l = loss(net(X, w, b), y)
l.sum().backward()
sgd([w, b], lr, batch_size)
with torch.no_grad():
train_l = loss(net(features, w, b), labels)
print(f'epoch {epoch + 1}, loss {float(train_l.mean()):f}')
return w, b
def load_array(data_arrays, batch_size, is_train=True):
"""构造一个PyTorch数据迭代器"""
dataset = data.TensorDataset(*data_arrays)
return data.DataLoader(dataset, batch_size, shuffle=is_train)
def demo1(true_w, true_b):
"""
简洁实现
"""
features, labels = d2l.synthetic_data(true_w, true_b, 1000)
batch_size = 10
data_iter = load_array((features, labels), batch_size)
net = nn.Sequential(nn.Linear(2, 1))
net[0].weight.data.normal_(0, 0.01)
net[0].bias.data.fill_(0)
loss = nn.MSELoss()
trainer = torch.optim.SGD(net.parameters(), lr=0.03)
num_epochs = 5
for epoch in range(num_epochs):
for X, y in data_iter:
l = loss(net(X), y)
trainer.zero_grad()
l.backward()
trainer.step()
l = loss(net(features), labels)
print(f'epoch {epoch + 1}, loss {l:f}')
w = net[0].weight.data
b = net[0].bias.data
return w, b
if __name__ == '__main__':
true_w = torch.tensor([2, -3.4])
true_b = 4.2
print("真实数据", true_w, true_b)
w, b = demo1(true_w, true_b)
print("训练后数据", w.reshape(true_w.shape), b)
print(f'w的估计误差: {true_w - w.reshape(true_w.shape)}')
print(f'b的估计误差: {true_b - b}')
S01E20 S01E21 softmax回归
从零实现
import os
import torch
from d2l import torch as d2l
num_inputs = 784
num_outputs = 10
lr = 0.1
def softmax(X):
X_exp = torch.exp(X)
partition = X_exp.sum(1, keepdim=True)
return X_exp / partition
def net(X):
"""
实现softmax回归模型
"""
return softmax(torch.matmul(X.reshape((-1, W.shape[0])), W) + b)
def cross_entropy(y_hat, y):
"""
交叉熵损失函数
"""
return - torch.log(y_hat[range(len(y_hat)), y])
def accuracy(y_hat, y):
"""计算预测正确的数量"""
if len(y_hat.shape) > 1 and y_hat.shape[1] > 1:
y_hat = y_hat.argmax(axis=1)
cmp = y_hat.type(y.dtype) == y
return float(cmp.type(y.dtype).sum())
class Accumulator:
"""在n个变量上累加"""
def __init__(self, n):
self.data = [0.0] * n
def add(self, *args):
self.data = [a + float(b) for a, b in zip(self.data, args)]
def reset(self):
self.data = [0.0] * len(self.data)
def __getitem__(self, idx):
return self.data[idx]
def evaluate_accuracy(net, data_iter):
"""计算在指定数据集上模型的精度"""
if isinstance(net, torch.nn.Module):
net.eval()
metric = Accumulator(2)
with torch.no_grad():
for X, y in data_iter:
metric.add(accuracy(net(X), y), y.numel())
return metric[0] / metric[1]
def train_epoch_ch3(net, train_iter, loss, updater):
"""训练模型一个迭代周期(定义见第3章)"""
if isinstance(net, torch.nn.Module):
net.train()
metric = Accumulator(3)
for X, y in train_iter:
y_hat = net(X)
l = loss(y_hat, y)
if isinstance(updater, torch.optim.Optimizer):
updater.zero_grad()
l.mean().backward()
updater.step()
else:
l.sum().backward()
updater(X.shape[0])
metric.add(float(l.sum()), accuracy(y_hat, y), y.numel())
return metric[0] / metric[2], metric[1] / metric[2]
def train_ch3(net, train_iter, test_iter, loss, num_epochs, updater):
"""训练模型(定义见第3章)"""
for epoch in range(num_epochs):
train_metrics = train_epoch_ch3(net, train_iter, loss, updater)
test_acc = evaluate_accuracy(net, test_iter)
print(epoch + 1, ":", train_metrics + (test_acc,))
train_loss, train_acc = train_metrics
assert train_loss < 0.5, train_loss
assert train_acc <= 1 and train_acc > 0.7, train_acc
assert test_acc <= 1 and test_acc > 0.7, test_acc
def updater(batch_size):
return d2l.sgd([W, b], lr, batch_size)
def predict_ch3(net, test_iter, n=6):
"""预测标签(定义见第3章)"""
for X, y in test_iter:
break
trues = d2l.get_fashion_mnist_labels(y)
preds = d2l.get_fashion_mnist_labels(net(X).argmax(axis=1))
titles = ['预测值:' + pred + "\t真实值:" + true for true, pred in zip(trues, preds)]
for t in titles:
print(t)
if __name__ == '__main__':
global W, b
W = torch.normal(0, 0.01, size=(num_inputs, num_outputs), requires_grad=True)
b = torch.zeros(num_outputs, requires_grad=True)
batch_size = 256
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)
softmax_file = "softmax.pth"
if os.path.exists(softmax_file):
loaded_tensors = torch.load(softmax_file)
W = loaded_tensors['w']
b = loaded_tensors['b']
else:
num_epochs = 10
train_ch3(net, train_iter, test_iter, cross_entropy, num_epochs, updater)
tensors_dict = {'w': W, 'b': b}
torch.save(tensors_dict, softmax_file)
predict_ch3(net, test_iter)
简洁实现
import torch
from d2l import torch as d2l
from torch import nn
num_inputs = 784
num_outputs = 10
lr = 0.1
def init_weights(m):
if type(m) == nn.Linear:
nn.init.normal_(m.weight, std=0.01)
def accuracy(y_hat, y):
"""计算预测正确的数量"""
if len(y_hat.shape) > 1 and y_hat.shape[1] > 1:
y_hat = y_hat.argmax(axis=1)
cmp = y_hat.type(y.dtype) == y
return float(cmp.type(y.dtype).sum())
class Accumulator:
"""在n个变量上累加"""
def __init__(self, n):
self.data = [0.0] * n
def add(self, *args):
self.data = [a + float(b) for a, b in zip(self.data, args)]
def reset(self):
self.data = [0.0] * len(self.data)
def __getitem__(self, idx):
return self.data[idx]
def evaluate_accuracy(net, data_iter):
"""计算在指定数据集上模型的精度"""
if isinstance(net, torch.nn.Module):
net.eval()
metric = Accumulator(2)
with torch.no_grad():
for X, y in data_iter:
metric.add(accuracy(net(X), y), y.numel())
return metric[0] / metric[1]
def train_epoch_ch3(net, train_iter, loss, updater):
"""训练模型一个迭代周期(定义见第3章)"""
if isinstance(net, torch.nn.Module):
net.train()
metric = Accumulator(3)
for X, y in train_iter:
y_hat = net(X)
l = loss(y_hat, y)
if isinstance(updater, torch.optim.Optimizer):
updater.zero_grad()
l.mean().backward()
updater.step()
else:
l.sum().backward()
updater(X.shape[0])
metric.add(float(l.sum()), accuracy(y_hat, y), y.numel())
return metric[0] / metric[2], metric[1] / metric[2]
def train_ch3(net, train_iter, test_iter, loss, num_epochs, updater):
"""训练模型(定义见第3章)"""
for epoch in range(num_epochs):
train_metrics = train_epoch_ch3(net, train_iter, loss, updater)
test_acc = evaluate_accuracy(net, test_iter)
print(epoch + 1, ":", train_metrics + (test_acc,))
train_loss, train_acc = train_metrics
assert train_loss < 0.5, train_loss
assert train_acc <= 1 and train_acc > 0.7, train_acc
assert test_acc <= 1 and test_acc > 0.7, test_acc
def predict_ch3(net, test_iter, n=6):
"""预测标签(定义见第3章)"""
for X, y in test_iter:
break
trues = d2l.get_fashion_mnist_labels(y)
preds = d2l.get_fashion_mnist_labels(net(X).argmax(axis=1))
loss_count = 0
count = 0
for true, pred in zip(trues, preds):
count += 1
print('预测值:' + pred + "\t真实值:" + true)
if pred != true:
loss_count += 1
print("错误数量", loss_count, "总量", count)
if __name__ == '__main__':
batch_size = 256
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)
model_path = "softmax_model.pth"
if not os.path.exists(model_path):
net = nn.Sequential(nn.Flatten(), nn.Linear(num_inputs, num_outputs))
net.apply(init_weights)
loss = nn.CrossEntropyLoss(reduction='none')
trainer = torch.optim.SGD(net.parameters(), lr=lr)
num_epochs = 20
train_ch3(net, train_iter, test_iter, loss, num_epochs, trainer)
torch.save(net.state_dict(), model_path)
print(f"Model state_dict saved to {model_path}")
else:
net = nn.Sequential(nn.Flatten(), nn.Linear(num_inputs, num_outputs))
net.load_state_dict(torch.load(model_path))
net.eval()
print("测试数量:", test_iter.batch_size)
predict_ch3(net, test_iter)
S01E24 多层感知机
从零实现
import os
import torch
from d2l import torch as d2l
from torch import nn
num_inputs = 784
num_outputs = 10
lr = 0.1
def accuracy(y_hat, y):
"""计算预测正确的数量"""
if len(y_hat.shape) > 1 and y_hat.shape[1] > 1:
y_hat = y_hat.argmax(axis=1)
cmp = y_hat.type(y.dtype) == y
return float(cmp.type(y.dtype).sum())
class Accumulator:
"""在n个变量上累加"""
def __init__(self, n):
self.data = [0.0] * n
def add(self, *args):
self.data = [a + float(b) for a, b in zip(self.data, args)]
def reset(self):
self.data = [0.0] * len(self.data)
def __getitem__(self, idx):
return self.data[idx]
def evaluate_accuracy(net, data_iter):
"""计算在指定数据集上模型的精度"""
if isinstance(net, torch.nn.Module):
net.eval()
metric = Accumulator(2)
with torch.no_grad():
for X, y in data_iter:
metric.add(accuracy(net(X), y), y.numel())
return metric[0] / metric[1]
def train_epoch_ch3(net, train_iter, loss, updater):
"""训练模型一个迭代周期(定义见第3章)"""
if isinstance(net, torch.nn.Module):
net.train()
metric = Accumulator(3)
for X, y in train_iter:
y_hat = net(X)
l = loss(y_hat, y)
if isinstance(updater, torch.optim.Optimizer):
updater.zero_grad()
l.mean().backward()
updater.step()
else:
l.sum().backward()
updater(X.shape[0])
metric.add(float(l.sum()), accuracy(y_hat, y), y.numel())
return metric[0] / metric[2], metric[1] / metric[2]
def train_ch3(net, train_iter, test_iter, loss, num_epochs, updater):
"""训练模型(定义见第3章)"""
for epoch in range(num_epochs):
train_metrics = train_epoch_ch3(net, train_iter, loss, updater)
test_acc = evaluate_accuracy(net, test_iter)
print(epoch + 1, ":", train_metrics + (test_acc,))
train_loss, train_acc = train_metrics
assert train_loss < 0.5, train_loss
assert train_acc <= 1 and train_acc > 0.7, train_acc
assert test_acc <= 1 and test_acc > 0.7, test_acc
def predict_ch3(net, test_iter, n=6):
"""预测标签(定义见第3章)"""
for X, y in test_iter:
break
trues = d2l.get_fashion_mnist_labels(y)
preds = d2l.get_fashion_mnist_labels(net(X).argmax(axis=1))
loss_count = 0
count = 0
for true, pred in zip(trues, preds):
count += 1
print('预测值:' + pred + "\t真实值:" + true)
if pred != true:
loss_count += 1
print("错误数量", loss_count, "总量", count, "准确率", 1.0 - loss_count * 1.0 / count)
def relu(X):
a = torch.zeros_like(X)
return torch.max(X, a)
def net(X):
X = X.reshape((-1, num_inputs))
H = relu(X @ W1 + b1)
return (H @ W2 + b2)
if __name__ == '__main__':
batch_size = 256
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)
model_path = "mlp.pth"
if not os.path.exists(model_path):
num_hiddens = 256
W1 = nn.Parameter(torch.randn(num_inputs, num_hiddens, requires_grad=True) * 0.01)
b1 = nn.Parameter(torch.zeros(num_hiddens, requires_grad=True))
W2 = nn.Parameter(torch.randn(num_hiddens, num_outputs, requires_grad=True) * 0.01)
b2 = nn.Parameter(torch.zeros(num_outputs, requires_grad=True))
params = [W1, b1, W2, b2]
updater = torch.optim.SGD(params, lr=lr)
loss = nn.CrossEntropyLoss(reduction='none')
num_epochs = 10
train_ch3(net, train_iter, test_iter, loss, num_epochs, updater)
torch.save(params, model_path)
else:
params = torch.load(model_path)
W1 = params[0]
b1 = params[1]
W2 = params[2]
b2 = params[3]
predict_ch3(net, test_iter)
简洁实现
import os
import torch
from d2l import torch as d2l
from torch import nn
num_inputs = 784
num_outputs = 10
num_hiddens = 256
lr = 0.1
def init_weights(m):
if type(m) == nn.Linear:
nn.init.normal_(m.weight, std=0.01)
def accuracy(y_hat, y):
"""计算预测正确的数量"""
if len(y_hat.shape) > 1 and y_hat.shape[1] > 1:
y_hat = y_hat.argmax(axis=1)
cmp = y_hat.type(y.dtype) == y
return float(cmp.type(y.dtype).sum())
class Accumulator:
"""在n个变量上累加"""
def __init__(self, n):
self.data = [0.0] * n
def add(self, *args):
self.data = [a + float(b) for a, b in zip(self.data, args)]
def reset(self):
self.data = [0.0] * len(self.data)
def __getitem__(self, idx):
return self.data[idx]
def evaluate_accuracy(net, data_iter):
"""计算在指定数据集上模型的精度"""
if isinstance(net, torch.nn.Module):
net.eval()
metric = Accumulator(2)
with torch.no_grad():
for X, y in data_iter:
metric.add(accuracy(net(X), y), y.numel())
return metric[0] / metric[1]
def train_epoch_ch3(net, train_iter, loss, updater):
"""训练模型一个迭代周期(定义见第3章)"""
if isinstance(net, torch.nn.Module):
net.train()
metric = Accumulator(3)
for X, y in train_iter:
y_hat = net(X)
l = loss(y_hat, y)
if isinstance(updater, torch.optim.Optimizer):
updater.zero_grad()
l.mean().backward()
updater.step()
else:
l.sum().backward()
updater(X.shape[0])
metric.add(float(l.sum()), accuracy(y_hat, y), y.numel())
return metric[0] / metric[2], metric[1] / metric[2]
def train_ch3(net, train_iter, test_iter, loss, num_epochs, updater):
"""训练模型(定义见第3章)"""
for epoch in range(num_epochs):
train_metrics = train_epoch_ch3(net, train_iter, loss, updater)
test_acc = evaluate_accuracy(net, test_iter)
print(epoch + 1, ":", train_metrics + (test_acc,))
train_loss, train_acc = train_metrics
assert train_loss < 0.5, train_loss
assert train_acc <= 1 and train_acc > 0.7, train_acc
assert test_acc <= 1 and test_acc > 0.7, test_acc
def predict_ch3(net, test_iter, n=6):
"""预测标签(定义见第3章)"""
for X, y in test_iter:
break
trues = d2l.get_fashion_mnist_labels(y)
preds = d2l.get_fashion_mnist_labels(net(X).argmax(axis=1))
loss_count = 0
count = 0
for true, pred in zip(trues, preds):
count += 1
print('预测值:' + pred + "\t真实值:" + true)
if pred != true:
loss_count += 1
print("错误数量", loss_count, "总量", count, "准确率", 1.0 - loss_count * 1.0 / count)
def relu(X):
a = torch.zeros_like(X)
return torch.max(X, a)
if __name__ == '__main__':
batch_size = 256
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)
model_path = "mlp_nn.pth"
net = nn.Sequential(nn.Flatten(),
nn.Linear(num_inputs, num_hiddens),
nn.ReLU(),
nn.Linear(num_hiddens, num_outputs))
if not os.path.exists(model_path):
net.apply(init_weights)
num_epochs = 10
loss = nn.CrossEntropyLoss(reduction='none')
trainer = torch.optim.SGD(net.parameters(), lr=lr)
train_ch3(net, train_iter, test_iter, loss, num_epochs, trainer)
torch.save(net.state_dict(), model_path)
print(f"Model state_dict saved to {model_path}")
else:
net.load_state_dict(torch.load(model_path))
net.eval()
predict_ch3(net, test_iter)