如何在ResNet模型上补充垂直领域图像分类?

278 阅读5分钟

image.png

1.2 数据预处理

  • 清洗:删除质量差或无关的图片。
  • 增强:使用数据增强技术增加数据多样性。
  • 标准化:对图像进行归一化处理。
import torchvision.transforms as transforms

# 定义数据预处理
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # 调整图像大小
    transforms.RandomHorizontalFlip(),  # 随机水平翻转
    transforms.RandomRotation(10),  # 随机旋转
    transforms.ToTensor(),  # 转换为Tensor
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # 标准化
])

1.3 数据加载

  • 数据集划分:将数据集划分为训练集、验证集和测试集。
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader

# 加载数据集
train_dataset = ImageFolder('path/to/train', transform=transform)
val_dataset = ImageFolder('path/to/val', transform=transform)
test_dataset = ImageFolder('path/to/test', transform=transform)

# 创建数据加载器
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=4)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=4)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=4)

2. 模型准备

2.1 加载预训练模型

  • 选择模型:选择一个预训练的ResNet模型,如ResNet50、ResNet101等。
  • 加载权重:使用预训练模型的权重作为初始权重。
import torch
import torch.nn as nn
import torchvision.models as models

# 加载预训练的ResNet50模型
model = models.resnet50(pretrained=True)

2.2 修改模型结构

  • 冻结部分层:为了保留预训练模型的通用特征提取能力,可以冻结模型的前几层。
  • 添加自定义层:在预训练模型的顶部添加自定义的全连接层或分类层。
# 冻结所有层
for param in model.parameters():
    param.requires_grad = False

# 替换最后一层
num_features = model.fc.in_features
num_classes = 10  # 假设有10个类别
model.fc = nn.Sequential(
    nn.Linear(num_features, 256),
    nn.ReLU(),
    nn.Dropout(0.5),
    nn.Linear(256, num_classes)
)

# 将模型移到GPU(如果有)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

3. 训练模型

3.1 定义损失函数和优化器

  • 损失函数:使用交叉熵损失函数(CrossEntropyLoss)。
  • 优化器:使用Adam或SGD优化器。
import torch.optim as optim

# 定义损失函数
criterion = nn.CrossEntropyLoss()

# 定义优化器
optimizer = optim.Adam(model.fc.parameters(), lr=0.001)

3.2 训练循环

  • 数据加载器:使用PyTorch的DataLoader来加载数据。
  • 训练循环:编写训练循环,包括前向传播、计算损失、反向传播和优化步骤。
# 训练循环
num_epochs = 10
best_val_accuracy = 0.0

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
    
    print(f'Epoch {epoch+1}/{num_epochs}, Loss: {running_loss/len(train_loader)}')
    
    # 验证
    model.eval()
    val_loss = 0.0
    correct = 0
    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            correct += (predicted == labels).sum().item()
    
    val_accuracy = correct / len(val_dataset)
    print(f'Validation Loss: {val_loss/len(val_loader)}, Accuracy: {val_accuracy}')
    
    # 保存最佳模型
    if val_accuracy > best_val_accuracy:
        best_val_accuracy = val_accuracy
        torch.save(model.state_dict(), 'resnet50_best.pth')

4. 评估模型

4.1 测试集评估

  • 测试集:使用独立的测试集评估模型的性能。
  • 指标:计算准确率、召回率、F1分数等指标。
# 测试集评估
model.load_state_dict(torch.load('resnet50_best.pth'))
model.eval()

test_loss = 0.0
correct = 0
all_labels = []
all_predictions = []

with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        test_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        correct += (predicted == labels).sum().item()
        
        all_labels.extend(labels.cpu().numpy())
        all_predictions.extend(predicted.cpu().numpy())

test_accuracy = correct / len(test_dataset)
print(f'Test Loss: {test_loss/len(test_loader)}, Accuracy: {test_accuracy}')

# 计算其他指标
from sklearn.metrics import classification_report

print(classification_report(all_labels, all_predictions))

5. 模型优化

5.1 正则化

  • L1/L2正则化:在损失函数中添加正则化项。
  • Dropout:在模型中添加Dropout层。
# 在模型中添加Dropout层
model.fc = nn.Sequential(
    nn.Linear(num_features, 256),
    nn.ReLU(),
    nn.Dropout(0.5),
    nn.Linear(256, num_classes)
)

5.2 数据增强

  • 进一步增强:增加更多的数据增强技术,如随机裁剪、颜色抖动等。
# 进一步的数据增强
transform = transforms.Compose([
    transforms.Resize((256, 256)),  # 调整图像大小
    transforms.RandomCrop(224),  # 随机裁剪
    transforms.RandomHorizontalFlip(),  # 随机水平翻转
    transforms.RandomRotation(10),  # 随机旋转
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),  # 颜色抖动
    transforms.ToTensor(),  # 转换为Tensor
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # 标准化
])

5.3 超参数调优

  • 网格搜索:使用网格搜索或随机搜索调整学习率、批量大小等超参数。
from sklearn.model_selection import GridSearchCV
from torch.optim.lr_scheduler import ReduceLROnPlateau

# 定义超参数网格
param_grid = {
    'lr': [0.001, 0.0001],
    'batch_size': [32, 64]
}

# 定义训练函数
def train_model(lr, batch_size):
    # 重新加载数据集
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=4)
    
    # 重新初始化模型和优化器
    model = models.resnet50(pretrained=True)
    for param in model.parameters():
        param.requires_grad = False
    model.fc = nn.Sequential(
        nn.Linear(num_features, 256),
        nn.ReLU(),
        nn.Dropout(0.5),
        nn.Linear(256, num_classes)
    )
    model = model.to(device)
    
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.fc.parameters(), lr=lr)
    scheduler = ReduceLROnPlateau(optimizer, 'min', patience=2)
    
    best_val_accuracy = 0.0
    
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item()
        
        print(f'Epoch {epoch+1}/{num_epochs}, Loss: {running_loss/len(train_loader)}')
        
        # 验证
        model.eval()
        val_loss = 0.0
        correct = 0
        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                val_loss += loss.item()
                _, predicted = torch.max(outputs, 1)
                correct += (predicted == labels).sum().item()
        
        val_accuracy = correct / len(val_dataset)
        print(f'Validation Loss: {val_loss/len(val_loader)}, Accuracy: {val_accuracy}')
        
        # 更新学习率
        scheduler.step(val_loss)
        
        # 保存最佳模型
        if val_accuracy > best_val_accuracy:
            best_val_accuracy = val_accuracy
            torch.save(model.state_dict(), 'resnet50_best.pth')
    
    return best_val_accuracy

# 执行网格搜索
best_params = None
best_accuracy = 0.0

for params in ParameterGrid(param_grid):
    accuracy = train_model(params['lr'], params['batch_size'])
    if accuracy > best_accuracy:
        best_accuracy = accuracy
        best_params = params

print(f'Best parameters: {best_params}, Best accuracy: {best_accuracy}')

6. 模型保存与部署

6.1 保存模型

  • 保存模型:将训练好的模型保存为文件,方便后续使用。
torch.save(model.state_dict(), 'resnet50_final.pth')

6.2 部署

  • 模型导出:将模型导出为适合部署的格式,如ONNX、TensorFlow SavedModel等。
  • 推理优化:针对具体硬件平台进行优化,如使用TensorRT、OpenVINO等工具。
# 导出为ONNX格式
dummy_input = torch.randn(1, 3, 224, 224, device=device)
torch.onnx.export(model, dummy_input, "resnet50_final.onnx", export_params=True, opset_version=10)

希望这些详细的步骤能帮助你在已有的ResNet模型基础上成功训练出适用于垂直领域图像识别的模型。如果有任何具体问题或需要进一步的帮助,请联系!