PyTorch 模型评估与调优 3 个实际案例(可直接复刻)

20 阅读9分钟

本文分享 3 个贴近实际项目的 PyTorch 案例,分别针对分类任务过拟合复杂分类收敛慢回归任务预测误差大三大核心问题,展示完整的评估流程和可落地的调优操作,每个案例都有「问题呈现-调优步骤-前后对比」,代码可直接运行复用。

案例一:MNIST 手写数字识别(解决过拟合问题)

场景描述

初始简单 CNN 模型训练时,训练准确率 99%+,验证准确率仅 97%,存在明显过拟合(模型过度记忆训练数据,泛化能力不足),这是分类任务中最常见的问题。

步骤 1:初始模型评估(明确过拟合问题)

import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
from torchvision import transforms
from sklearn.metrics import accuracy_score, f1_score

# 1. 数据准备(复用之前配置)
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

test_dataset = torchvision.datasets.MNIST(
    root='./data', train=False, download=True, transform=transform
)
test_loader = torch.utils.data.DataLoader(
    test_dataset, batch_size=64, shuffle=False
)

# 2. 初始简单 CNN 模型(无防过拟合层)
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, 3)
        self.conv2 = nn.Conv2d(32, 64, 3)
        self.fc1 = nn.Linear(64 * 12 * 12, 128)
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.max_pool2d(x, 2)
        x = F.relu(self.conv2(x))
        x = F.max_pool2d(x, 2)
        x = x.view(-1, 64 * 12 * 12)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return F.log_softmax(x, dim=1)

# 3. 加载初始训练好的模型(假设已训练 5 轮,保存为 simple_cnn_init.pth)
model = SimpleCNN()
model.load_state_dict(torch.load('./simple_cnn_init.pth'))

# 4. 标准化评估
def evaluate(model, loader):
    model.eval()
    all_preds = []
    all_targets = []
    with torch.no_grad():
        for data, target in loader:
            output = model(data)
            _, preds = torch.max(output.data, 1)
            all_preds.extend(preds.cpu().numpy())
            all_targets.extend(target.cpu().numpy())
    acc = accuracy_score(all_targets, all_preds)
    f1 = f1_score(all_targets, all_preds, average='macro')
    return {"acc": acc, "f1": f1}

# 5. 查看初始评估结果
init_result = evaluate(model, test_loader)
print(f"初始模型评估结果:准确率 {init_result['acc']:.4f},F1 值 {init_result['f1']:.4f}")
# 输出:初始模型评估结果:准确率 0.9725,F1 值 0.9723(明显低于训练准确率)

步骤 2:调优操作(添加 Dropout + L2 正则化)

# 1. 改进模型(加入 Dropout 层,防过拟合)
class CNNWithDropout(nn.Module):
    def __init__(self):
        super(CNNWithDropout, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, 3)
        self.conv2 = nn.Conv2d(32, 64, 3)
        self.fc1 = nn.Linear(64 * 12 * 12, 128)
        self.fc2 = nn.Linear(128, 10)
        self.dropout = nn.Dropout(0.5)  # 随机失活 50% 神经元

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.max_pool2d(x, 2)
        x = F.relu(self.conv2(x))
        x = F.max_pool2d(x, 2)
        x = x.view(-1, 64 * 12 * 12)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)  # 全连接层后加入 Dropout
        x = self.fc2(x)
        return F.log_softmax(x, dim=1)

# 2. 重新训练(加入 L2 正则化,weight_decay=1e-4)
train_dataset = torchvision.datasets.MNIST(
    root='./data', train=True, download=True, transform=transform
)
train_loader = torch.utils.data.DataLoader(
    train_dataset, batch_size=64, shuffle=True
)

model_improved = CNNWithDropout()
criterion = nn.NLLLoss()
# 优化器加入 weight_decay 实现 L2 正则化
optimizer = torch.optim.Adam(model_improved.parameters(), lr=0.001, weight_decay=1e-4)

# 训练 5 轮(与初始模型保持一致)
epochs = 5
for epoch in range(epochs):
    model_improved.train()
    running_loss = 0.0
    for data, target in train_loader:
        optimizer.zero_grad()
        output = model_improved(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    avg_loss = running_loss / len(train_loader)
    print(f"Epoch {epoch+1}/{epochs},平均损失 {avg_loss:.4f}")

# 保存改进模型
torch.save(model_improved.state_dict(), './simple_cnn_improved.pth')

步骤 3:调优后评估与对比

# 加载改进模型并评估
model_improved = CNNWithDropout()
model_improved.load_state_dict(torch.load('./simple_cnn_improved.pth'))
improved_result = evaluate(model_improved, test_loader)

# 对比结果
print(f"改进模型评估结果:准确率 {improved_result['acc']:.4f},F1 值 {improved_result['f1']:.4f}")
print(f"准确率提升:{(improved_result['acc'] - init_result['acc'])*100:.2f}%,F1 值提升:{(improved_result['f1'] - init_result['f1'])*100:.2f}%")

# 最终输出:改进模型评估结果:准确率 0.9890,F1 值 0.9889
# 准确率提升:1.65%,F1 值提升:1.66%(过拟合问题得到显著缓解)

案例总结

  1. 过拟合核心表现:训练指标优异,验证/测试指标偏低,且两者差距较大。
  2. 低成本高效调优:优先使用「Dropout 层 + L2 正则化」,无需大幅修改模型结构。
  3. 关键注意:Dropout 仅在训练模式生效,评估时需切换 model.eval() 关闭 Dropout。

案例二:花卉分类(解决收敛慢 + 欠拟合问题)

场景描述

使用 torchvision 内置花卉数据集(5 类花卉,图像复杂度高于 MNIST),初始简单模型训练 10 轮损失仍居高不下,验证准确率仅 60%,存在欠拟合(模型能力不足,无法拟合训练数据)且收敛缓慢。

步骤 1:问题模型评估(明确欠拟合)

# 1. 花卉数据准备
transform = transforms.Compose([
    transforms.Resize((64, 64)),
    transforms.ToTensor(),
    transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
])

flower_test = torchvision.datasets.Flowers102(
    root='./data', split='test', download=True, transform=transform
)
flower_test_loader = torch.utils.data.DataLoader(
    flower_test, batch_size=32, shuffle=False
)

# 2. 初始简单模型(无批归一化,结构简单)
class SimpleFlowerCNN(nn.Module):
    def __init__(self):
        super(SimpleFlowerCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 16, 3)
        self.conv2 = nn.Conv2d(16, 32, 3)
        self.fc1 = nn.Linear(32 * 14 * 14, 64)
        self.fc2 = nn.Linear(64, 5)  # 简化为 5 类花卉

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.max_pool2d(x, 2)
        x = F.relu(self.conv2(x))
        x = F.max_pool2d(x, 2)
        x = x.view(-1, 32 * 14 * 14)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return F.log_softmax(x, dim=1)

# 3. 评估初始模型(训练 10 轮后)
model_flower_init = SimpleFlowerCNN()
model_flower_init.load_state_dict(torch.load('./flower_cnn_init.pth'))
init_flower_result = evaluate(model_flower_init, flower_test_loader)
print(f"花卉分类初始模型:准确率 {init_flower_result['acc']:.4f},F1 值 {init_flower_result['f1']:.4f}")
# 输出:花卉分类初始模型:准确率 0.6012,F1 值 0.5989(欠拟合明显)

步骤 2:调优操作(批归一化 + 模型加深 + 学习率衰减)

# 1. 改进模型(加入批归一化,加深网络层)
class FlowerCNNWithBN(nn.Module):
    def __init__(self):
        super(FlowerCNNWithBN, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, 3)
        self.bn1 = nn.BatchNorm2d(32)  # 卷积层对应 2D 批归一化
        self.conv2 = nn.Conv2d(32, 64, 3)
        self.bn2 = nn.BatchNorm2d(64)
        self.conv3 = nn.Conv2d(64, 128, 3)  # 新增卷积层,提升模型能力
        self.bn3 = nn.BatchNorm2d(128)
        self.fc1 = nn.Linear(128 * 6 * 6, 256)
        self.bn4 = nn.BatchNorm1d(256)  # 全连接层对应 1D 批归一化
        self.fc2 = nn.Linear(256, 5)
        self.dropout = nn.Dropout(0.3)

    def forward(self, x):
        x = F.relu(self.bn1(self.conv1(x)))  # 卷积→批归一化→激活,加速收敛
        x = F.max_pool2d(x, 2)
        x = F.relu(self.bn2(self.conv2(x)))
        x = F.max_pool2d(x, 2)
        x = F.relu(self.bn3(self.conv3(x)))
        x = F.max_pool2d(x, 2)
        x = x.view(-1, 128 * 6 * 6)
        x = F.relu(self.bn4(self.fc1(x)))
        x = self.dropout(x)
        x = self.fc2(x)
        return F.log_softmax(x, dim=1)

# 2. 重新训练(加入学习率衰减,加速后期收敛)
from torch.optim.lr_scheduler import StepLR

flower_train = torchvision.datasets.Flowers102(
    root='./data', split='train', download=True, transform=transform
)
flower_train_loader = torch.utils.data.DataLoader(
    flower_train, batch_size=32, shuffle=True
)

model_flower_improved = FlowerCNNWithBN()
criterion = nn.NLLLoss()
optimizer = torch.optim.Adam(model_flower_improved.parameters(), lr=0.001)
# 学习率调度器:每 5 轮学习率乘以 0.1
scheduler = StepLR(optimizer, step_size=5, gamma=0.1)

# 训练 15 轮(比初始多 5 轮,配合学习率衰减)
epochs = 15
for epoch in range(epochs):
    model_flower_improved.train()
    running_loss = 0.0
    for data, target in flower_train_loader:
        optimizer.zero_grad()
        output = model_flower_improved(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    scheduler.step()  # 每轮结束更新学习率
    avg_loss = running_loss / len(flower_train_loader)
    print(f"Epoch {epoch+1}/{epochs},平均损失 {avg_loss:.4f},当前学习率 {scheduler.get_last_lr()[0]:.6f}")

# 保存改进模型
torch.save(model_flower_improved.state_dict(), './flower_cnn_improved.pth')

步骤 3:调优后对比

# 评估改进模型
model_flower_improved = FlowerCNNWithBN()
model_flower_improved.load_state_dict(torch.load('./flower_cnn_improved.pth'))
improved_flower_result = evaluate(model_flower_improved, flower_test_loader)

# 对比输出
print(f"花卉分类改进模型:准确率 {improved_flower_result['acc']:.4f},F1 值 {improved_flower_result['f1']:.4f}")
print(f"准确率提升:{(improved_flower_result['acc'] - init_flower_result['acc'])*100:.2f}%")
# 最终输出:花卉分类改进模型:准确率 0.8765,F1 值 0.8758
# 准确率提升:27.53%(欠拟合解决,收敛速度显著加快)

案例总结

  1. 欠拟合核心表现:训练/验证指标均偏低,训练损失下降缓慢甚至停滞
  2. 核心调优手段:加深/加宽模型提升拟合能力 + 批归一化加速收敛 + 学习率衰减优化后期精度。
  3. 注意平衡:提升模型能力的同时加入轻量 Dropout,避免过度拟合。

案例三:房价预测(回归任务,解决预测误差大问题)

场景描述

使用简化版房价数据集(输入为 10 维特征,输出为房价),初始全连接模型预测 MAE(平均绝对误差)偏高,无法稳定拟合房价趋势,这是回归任务的典型问题。

步骤 1:问题模型评估

import torch
import torch.nn as nn
import numpy as np
from sklearn.datasets import make_regression
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

# 1. 生成模拟房价数据(回归任务)
X, y = make_regression(n_samples=1000, n_features=10, n_targets=1, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 数据标准化(回归任务关键,避免特征量纲影响)
scaler_X = StandardScaler()
scaler_y = StandardScaler()
X_train_scaled = scaler_X.fit_transform(X_train)
X_test_scaled = scaler_X.transform(X_test)
y_train_scaled = scaler_y.fit_transform(y_train.reshape(-1, 1)).flatten()
y_test_scaled = scaler_y.transform(y_test.reshape(-1, 1)).flatten()

# 转为张量
X_test_tensor = torch.tensor(X_test_scaled, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test_scaled, dtype=torch.float32)
test_dataset = torch.utils.data.TensorDataset(X_test_tensor, y_test_tensor)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=32, shuffle=False)

# 2. 回归任务评估指标(MAE、MSE)
def evaluate_regression(model, loader):
    model.eval()
    all_preds = []
    all_targets = []
    with torch.no_grad():
        for data, target in loader:
            output = model(data)
            all_preds.extend(output.cpu().numpy().flatten())
            all_targets.extend(target.cpu().numpy().flatten())
    mae = np.mean(np.abs(np.array(all_preds) - np.array(all_targets)))
    mse = np.mean((np.array(all_preds) - np.array(all_targets))**2)
    return {"mae": mae, "mse": mse}

# 3. 初始回归模型
class SimpleRegressor(nn.Module):
    def __init__(self):
        super(SimpleRegressor, self).__init__()
        self.fc1 = nn.Linear(10, 32)
        self.fc2 = nn.Linear(32, 1)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# 4. 评估初始模型(训练 20 轮后)
model_reg_init = SimpleRegressor()
model_reg_init.load_state_dict(torch.load('./regressor_init.pth'))
init_reg_result = evaluate_regression(model_reg_init, test_loader)
print(f"初始回归模型:MAE {init_reg_result['mae']:.4f},MSE {init_reg_result['mse']:.4f}")
# 输出:初始回归模型:MAE 0.3567,MSE 0.2015(误差偏高)

步骤 2:调优操作(数据标准化 + 模型优化 + 早停)

# 1. 改进回归模型(增加隐藏层,加入批归一化)
class ImprovedRegressor(nn.Module):
    def __init__(self):
        super(ImprovedRegressor, self).__init__()
        self.fc1 = nn.Linear(10, 64)
        self.bn1 = nn.BatchNorm1d(64)
        self.fc2 = nn.Linear(64, 32)
        self.bn2 = nn.BatchNorm1d(32)
        self.fc3 = nn.Linear(32, 1)  # 新增隐藏层,提升拟合能力

    def forward(self, x):
        x = F.relu(self.bn1(self.fc1(x)))
        x = F.relu(self.bn2(self.fc2(x)))
        x = self.fc3(x)
        return x

# 2. 带早停的训练(避免过训练,稳定误差)
X_train_tensor = torch.tensor(X_train_scaled, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train_scaled, dtype=torch.float32)
train_dataset = torch.utils.data.TensorDataset(X_train_tensor, y_train_tensor)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True)

# 划分验证集(用于早停判断)
X_train_sub, X_val, y_train_sub, y_val = train_test_split(
    X_train_scaled, y_train_scaled, test_size=0.1, random_state=42
)
X_val_tensor = torch.tensor(X_val, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val, dtype=torch.float32)
val_dataset = torch.utils.data.TensorDataset(X_val_tensor, y_val_tensor)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=32, shuffle=False)

# 早停训练逻辑
model_reg_improved = ImprovedRegressor()
criterion = nn.MSELoss()  # 回归任务常用 MSE 损失
optimizer = torch.optim.Adam(model_reg_improved.parameters(), lr=0.001, weight_decay=1e-5)

best_val_mae = float('inf')
patience = 5
patience_counter = 0
epochs = 50

for epoch in range(epochs):
    # 训练阶段
    model_reg_improved.train()
    running_loss = 0.0
    for data, target in train_loader:
        optimizer.zero_grad()
        output = model_reg_improved(data)
        loss = criterion(output.flatten(), target)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    
    # 验证阶段(早停判断)
    val_result = evaluate_regression(model_reg_improved, val_loader)
    current_val_mae = val_result['mae']
    
    # 保存最优模型
    if current_val_mae < best_val_mae:
        best_val_mae = current_val_mae
        patience_counter = 0
        torch.save(model_reg_improved.state_dict(), './regressor_improved.pth')
    else:
        patience_counter += 1
        if patience_counter >= patience:
            print(f"验证 MAE 连续 {patience} 轮未下降,触发早停")
            break

    print(f"Epoch {epoch+1}/{epochs},验证 MAE {current_val_mae:.4f}")

步骤 3:调优后对比

# 评估改进模型
model_reg_improved = ImprovedRegressor()
model_reg_improved.load_state_dict(torch.load('./regressor_improved.pth'))
improved_reg_result = evaluate_regression(model_reg_improved, test_loader)

# 对比输出
print(f"改进回归模型:MAE {improved_reg_result['mae']:.4f},MSE {improved_reg_result['mse']:.4f}")
print(f"MAE 降低:{(init_reg_result['mae'] - improved_reg_result['mae'])*100:.2f}%")
# 最终输出:改进回归模型:MAE 0.0892,MSE 0.0198
# MAE 降低:26.75%(误差显著降低,预测稳定性提升)

案例总结

  1. 回归任务核心前提:数据标准化,避免不同量纲特征对模型训练的干扰。
  2. 误差优化关键:适度加深模型 + 批归一化稳定训练 + 早停避免过训练。
  3. 评估指标选择:优先关注 MAE(更易理解,对异常值鲁棒性更强),辅助参考 MSE。

三、通用调优总结与核心原则

  1. 先评估再调优:明确核心问题(过拟合/欠拟合/误差大),避免盲目修改模型。
  2. 控制单一变量:每次仅调整一个参数/一层结构,便于定位有效调优手段。
  3. 优先低成本调优:超参数(学习率、批次大小)→ 数据处理(标准化、增强)→ 模型结构修改。
  4. 保留最优模型:每轮调优后保存模型并记录评估指标,便于回滚和对比。