1.2 数据预处理
- 清洗:删除质量差或无关的图片。
- 增强:使用数据增强技术增加数据多样性。
- 标准化:对图像进行归一化处理。
import torchvision.transforms as transforms
# 定义数据预处理
transform = transforms.Compose([
transforms.Resize((224, 224)), # 调整图像大小
transforms.RandomHorizontalFlip(), # 随机水平翻转
transforms.RandomRotation(10), # 随机旋转
transforms.ToTensor(), # 转换为Tensor
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # 标准化
])
1.3 数据加载
- 数据集划分:将数据集划分为训练集、验证集和测试集。
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader
# 加载数据集
train_dataset = ImageFolder('path/to/train', transform=transform)
val_dataset = ImageFolder('path/to/val', transform=transform)
test_dataset = ImageFolder('path/to/test', transform=transform)
# 创建数据加载器
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=4)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=4)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=4)
2. 模型准备
2.1 加载预训练模型
- 选择模型:选择一个预训练的ResNet模型,如ResNet50、ResNet101等。
- 加载权重:使用预训练模型的权重作为初始权重。
import torch
import torch.nn as nn
import torchvision.models as models
# 加载预训练的ResNet50模型
model = models.resnet50(pretrained=True)
2.2 修改模型结构
- 冻结部分层:为了保留预训练模型的通用特征提取能力,可以冻结模型的前几层。
- 添加自定义层:在预训练模型的顶部添加自定义的全连接层或分类层。
# 冻结所有层
for param in model.parameters():
param.requires_grad = False
# 替换最后一层
num_features = model.fc.in_features
num_classes = 10 # 假设有10个类别
model.fc = nn.Sequential(
nn.Linear(num_features, 256),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(256, num_classes)
)
# 将模型移到GPU(如果有)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)
3. 训练模型
3.1 定义损失函数和优化器
- 损失函数:使用交叉熵损失函数(
CrossEntropyLoss)。 - 优化器:使用Adam或SGD优化器。
import torch.optim as optim
# 定义损失函数
criterion = nn.CrossEntropyLoss()
# 定义优化器
optimizer = optim.Adam(model.fc.parameters(), lr=0.001)
3.2 训练循环
- 数据加载器:使用PyTorch的
DataLoader来加载数据。 - 训练循环:编写训练循环,包括前向传播、计算损失、反向传播和优化步骤。
# 训练循环
num_epochs = 10
best_val_accuracy = 0.0
for epoch in range(num_epochs):
model.train()
running_loss = 0.0
for inputs, labels in train_loader:
inputs, labels = inputs.to(device), labels.to(device)
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
running_loss += loss.item()
print(f'Epoch {epoch+1}/{num_epochs}, Loss: {running_loss/len(train_loader)}')
# 验证
model.eval()
val_loss = 0.0
correct = 0
with torch.no_grad():
for inputs, labels in val_loader:
inputs, labels = inputs.to(device), labels.to(device)
outputs = model(inputs)
loss = criterion(outputs, labels)
val_loss += loss.item()
_, predicted = torch.max(outputs, 1)
correct += (predicted == labels).sum().item()
val_accuracy = correct / len(val_dataset)
print(f'Validation Loss: {val_loss/len(val_loader)}, Accuracy: {val_accuracy}')
# 保存最佳模型
if val_accuracy > best_val_accuracy:
best_val_accuracy = val_accuracy
torch.save(model.state_dict(), 'resnet50_best.pth')
4. 评估模型
4.1 测试集评估
- 测试集:使用独立的测试集评估模型的性能。
- 指标:计算准确率、召回率、F1分数等指标。
# 测试集评估
model.load_state_dict(torch.load('resnet50_best.pth'))
model.eval()
test_loss = 0.0
correct = 0
all_labels = []
all_predictions = []
with torch.no_grad():
for inputs, labels in test_loader:
inputs, labels = inputs.to(device), labels.to(device)
outputs = model(inputs)
loss = criterion(outputs, labels)
test_loss += loss.item()
_, predicted = torch.max(outputs, 1)
correct += (predicted == labels).sum().item()
all_labels.extend(labels.cpu().numpy())
all_predictions.extend(predicted.cpu().numpy())
test_accuracy = correct / len(test_dataset)
print(f'Test Loss: {test_loss/len(test_loader)}, Accuracy: {test_accuracy}')
# 计算其他指标
from sklearn.metrics import classification_report
print(classification_report(all_labels, all_predictions))
5. 模型优化
5.1 正则化
- L1/L2正则化:在损失函数中添加正则化项。
- Dropout:在模型中添加Dropout层。
# 在模型中添加Dropout层
model.fc = nn.Sequential(
nn.Linear(num_features, 256),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(256, num_classes)
)
5.2 数据增强
- 进一步增强:增加更多的数据增强技术,如随机裁剪、颜色抖动等。
# 进一步的数据增强
transform = transforms.Compose([
transforms.Resize((256, 256)), # 调整图像大小
transforms.RandomCrop(224), # 随机裁剪
transforms.RandomHorizontalFlip(), # 随机水平翻转
transforms.RandomRotation(10), # 随机旋转
transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1), # 颜色抖动
transforms.ToTensor(), # 转换为Tensor
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # 标准化
])
5.3 超参数调优
- 网格搜索:使用网格搜索或随机搜索调整学习率、批量大小等超参数。
from sklearn.model_selection import GridSearchCV
from torch.optim.lr_scheduler import ReduceLROnPlateau
# 定义超参数网格
param_grid = {
'lr': [0.001, 0.0001],
'batch_size': [32, 64]
}
# 定义训练函数
def train_model(lr, batch_size):
# 重新加载数据集
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=4)
# 重新初始化模型和优化器
model = models.resnet50(pretrained=True)
for param in model.parameters():
param.requires_grad = False
model.fc = nn.Sequential(
nn.Linear(num_features, 256),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(256, num_classes)
)
model = model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.fc.parameters(), lr=lr)
scheduler = ReduceLROnPlateau(optimizer, 'min', patience=2)
best_val_accuracy = 0.0
for epoch in range(num_epochs):
model.train()
running_loss = 0.0
for inputs, labels in train_loader:
inputs, labels = inputs.to(device), labels.to(device)
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
running_loss += loss.item()
print(f'Epoch {epoch+1}/{num_epochs}, Loss: {running_loss/len(train_loader)}')
# 验证
model.eval()
val_loss = 0.0
correct = 0
with torch.no_grad():
for inputs, labels in val_loader:
inputs, labels = inputs.to(device), labels.to(device)
outputs = model(inputs)
loss = criterion(outputs, labels)
val_loss += loss.item()
_, predicted = torch.max(outputs, 1)
correct += (predicted == labels).sum().item()
val_accuracy = correct / len(val_dataset)
print(f'Validation Loss: {val_loss/len(val_loader)}, Accuracy: {val_accuracy}')
# 更新学习率
scheduler.step(val_loss)
# 保存最佳模型
if val_accuracy > best_val_accuracy:
best_val_accuracy = val_accuracy
torch.save(model.state_dict(), 'resnet50_best.pth')
return best_val_accuracy
# 执行网格搜索
best_params = None
best_accuracy = 0.0
for params in ParameterGrid(param_grid):
accuracy = train_model(params['lr'], params['batch_size'])
if accuracy > best_accuracy:
best_accuracy = accuracy
best_params = params
print(f'Best parameters: {best_params}, Best accuracy: {best_accuracy}')
6. 模型保存与部署
6.1 保存模型
- 保存模型:将训练好的模型保存为文件,方便后续使用。
torch.save(model.state_dict(), 'resnet50_final.pth')
6.2 部署
- 模型导出:将模型导出为适合部署的格式,如ONNX、TensorFlow SavedModel等。
- 推理优化:针对具体硬件平台进行优化,如使用TensorRT、OpenVINO等工具。
# 导出为ONNX格式
dummy_input = torch.randn(1, 3, 224, 224, device=device)
torch.onnx.export(model, dummy_input, "resnet50_final.onnx", export_params=True, opset_version=10)
希望这些详细的步骤能帮助你在已有的ResNet模型基础上成功训练出适用于垂直领域图像识别的模型。如果有任何具体问题或需要进一步的帮助,请联系!