微调
迁移学习(transfer learning)将从源数据集学到的知识迁移到目标数据集
迁移学习中的常见技巧:微调(fine-tuning)。微调包括以下四个步骤。
- 在源数据集(例如ImageNet数据集)上预训练神经网络模型,即源模型。
- 创建一个新的神经网络模型,即目标模型。这将复制源模型上的所有模型设计及其参数(输出层除外)。我们假定这些模型参数包含从源数据集中学到的知识,这些知识也将适用于目标数据集。我们还假设源模型的输出层与源数据集的标签密切相关;因此不在目标模型中使用该层。
- 向目标模型添加输出层,其输出数是目标数据集中的类别数。然后随机初始化该层的模型参数。
- 在目标数据集(如椅子数据集)上训练目标模型。输出层将从头开始进行训练,而所有其他层的参数将根据源模型的参数进行微调。
import os
import torch
import torchvision
from torch import nn
from d2l import torch as d2l
d2l.DATA_HUB['hotdog'] = (d2l.DATA_URL + 'hotdog.zip', 'fba480ffa8aa7e0febbb511d181409f899b9baa5')
data_dir = d2l.download_extract('hotdog')
def load_image():
train_imgs = torchvision.datasets.ImageFolder(os.path.join(data_dir, 'train'))
test_imgs = torchvision.datasets.ImageFolder(os.path.join(data_dir, 'test'))
return train_imgs, test_imgs
def load_data(batch_size):
# 使用RGB通道的均值和标准差,以标准化每个通道
normalize = torchvision.transforms.Normalize(
[0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
train_transform = torchvision.transforms.Compose([
torchvision.transforms.RandomResizedCrop(224),
torchvision.transforms.RandomHorizontalFlip(),
torchvision.transforms.ToTensor(),
normalize])
test_transform = torchvision.transforms.Compose([
torchvision.transforms.Resize([256, 256]),
torchvision.transforms.CenterCrop(224),
torchvision.transforms.ToTensor(),
normalize])
train_DataLoader = torch.utils.data.DataLoader(torchvision.datasets.ImageFolder(
os.path.join(data_dir, 'train'), transform=train_transform),
batch_size=batch_size, shuffle=True)
test_DataLoader = torch.utils.data.DataLoader(torchvision.datasets.ImageFolder(
os.path.join(data_dir, 'test'), transform=test_transform),
batch_size=batch_size)
return train_DataLoader, test_DataLoader
def train_batch_ch13(net, X, y, loss, trainer):
net.train()
trainer.zero_grad()
pred = net(X)
l = loss(pred, y)
l.sum().backward()
trainer.step()
train_loss_sum = l.sum()
train_acc_sum = d2l.accuracy(pred, y)
return train_loss_sum, train_acc_sum
def train_ch13(net, train_iter, test_iter, loss, trainer, num_epochs):
timer, num_batches = d2l.Timer(), len(train_iter)
for epoch in range(num_epochs):
metric = d2l.Accumulator(4)
for i, (features, labels) in enumerate(train_iter):
timer.start()
l, acc = train_batch_ch13(net, features, labels, loss, trainer)
metric.add(l, acc, labels.shape[0], labels.numel())
timer.stop()
test_acc = d2l.evaluate_accuracy_gpu(net, test_iter)
print(f'loss {metric[0] / metric[2]:.3f}, train acc '
f'{metric[1] / metric[3]:.3f}, test acc {test_acc:.3f}')
print(f'{metric[2] * num_epochs / timer.sum():.1f} examples/sec ')
def train_fine_tuning(net, learning_rate, batch_size=128, num_epochs=5, param_group=True):
"""
:param net: 网络模型
:param learning_rate: 学习率
:param batch_size: 批次数量
:param num_epochs: 训练轮次
:param param_group: 如果param_group=True,输出层中的模型参数将使用十倍的学习率
:return:
"""
train_dl, test_dl = load_data(batch_size)
loss = nn.CrossEntropyLoss(reduction="none")
if param_group:
params_1x = [param for name, param in net.named_parameters() if name not in ["fc.weight", "fc.bias"]]
params = [{'params': params_1x}, {'params': net.fc.parameters(), 'lr': learning_rate * 10}]
trainer = torch.optim.SGD(params, lr=learning_rate, weight_decay=0.001)
else:
trainer = torch.optim.SGD(net.parameters(), lr=learning_rate, weight_decay=0.001)
train_ch13(net, train_dl, test_dl, loss, trainer, num_epochs)
if __name__ == '__main__':
# 使用在ImageNet数据集上预训练的ResNet-18作为源模型
# 指定pretrained=True以自动下载预训练的模型参数
finetune_net = torchvision.models.resnet18(pretrained=True)
# 预训练的源模型实例包含许多特征层和一个 输出层fc
finetune_net.fc = nn.Linear(finetune_net.fc.in_features, 2)
# 在ResNet的全局平均汇聚层后,全连接层转换为ImageNet数据集的1000个类输出
# 这里out_features修改为2表示我们的模型是2分类模型
nn.init.xavier_uniform_(finetune_net.fc.weight)
train_fine_tuning(finetune_net, 5e-5, num_epochs=3)