图像分类:CIFAR10/ImageNet实战

8 阅读4分钟

图像分类:CIFAR10/ImageNet实战

1. CIFAR10分类实战

1.1 数据集准备与预处理

import torch
from torchvision import datasets, transforms

# 数据增强策略
train_transform = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
])

test_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
])

# 加载数据集
train_dataset = datasets.CIFAR10(
    root='./data', 
    train=True,
    download=True,
    transform=train_transform
)

test_dataset = datasets.CIFAR10(
    root='./data',
    train=False,
    transform=test_transform
)

# 创建数据加载器
train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True, num_workers=4)
test_loader = DataLoader(test_dataset, batch_size=100, shuffle=False, num_workers=2)
1.1.1 数据分布可视化
pie
    title CIFAR10类别分布
    "Airplane" : 10
    "Automobile" : 10
    "Bird" : 10
    "Cat" : 10
    "Deer" : 10
    "Dog" : 10
    "Frog" : 10
    "Horse" : 10
    "Ship" : 10
    "Truck" : 10

1.2 模型架构(以ResNet-18为例)

import torch.nn as nn

class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_planes, planes, stride=1):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(
            in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
                               stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes,
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion*planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out

class ResNet(nn.Module):
    def __init__(self, block, num_blocks, num_classes=10):
        super(ResNet, self).__init__()
        self.in_planes = 64

        self.conv1 = nn.Conv2d(3, 64, kernel_size=3,
                               stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
        self.linear = nn.Linear(512*block.expansion, num_classes)

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1]*(num_blocks-1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = F.adaptive_avg_pool2d(out, (1, 1))
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out

model = ResNet(BasicBlock, [2, 2, 2, 2])

1.3 训练配置与结果

optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=5e-4)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=200)
criterion = nn.CrossEntropyLoss()

# 训练循环
for epoch in range(200):
    model.train()
    for inputs, targets in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
    scheduler.step()

    # 验证集评估
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, targets in test_loader:
            outputs = model(inputs)
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()
    
    print(f'Epoch {epoch+1} | Test Acc: {100.*correct/total:.2f}%')
1.3.1 性能对比
模型参数量准确率训练时间(单卡V100)
ResNet-1811M94.5%25分钟
ResNet-5025M95.2%45分钟
EfficientNet-B05M95.8%35分钟

2. ImageNet大规模分类实战

2.1 数据准备与分布式加载

from torchvision.datasets import ImageNet
from torch.utils.data.distributed import DistributedSampler

# 数据增强(更复杂)
train_transform = transforms.Compose([
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

# 分布式数据加载
train_dataset = ImageNet(root='/path/to/imagenet', split='train', transform=train_transform)
train_sampler = DistributedSampler(train_dataset)
train_loader = DataLoader(
    train_dataset,
    batch_size=256,
    sampler=train_sampler,
    num_workers=8,
    pin_memory=True
)

2.2 高效训练策略

2.2.1 混合精度训练
from torch.cuda.amp import GradScaler, autocast

scaler = GradScaler()

for inputs, targets in train_loader:
    inputs = inputs.cuda(non_blocking=True)
    targets = targets.cuda(non_blocking=True)
    
    optimizer.zero_grad()
    
    with autocast():
        outputs = model(inputs)
        loss = criterion(outputs, targets)
    
    scaler.scale(loss).backward()
    scaler.step(optimizer)
    scaler.update()
2.2.2 学习率策略
# 线性预热+余弦退火
warmup_epochs = 5
scheduler = torch.optim.lr_scheduler.SequentialLR(
    optimizer,
    [
        torch.optim.lr_scheduler.LinearLR(
            optimizer, start_factor=0.01, total_iters=warmup_epochs),
        torch.optim.lr_scheduler.CosineAnnealingLR(
            optimizer, T_max=100 - warmup_epochs)
    ],
    milestones=[warmup_epochs]
)

2.3 模型验证与指标

def validate(model, val_loader):
    top1 = AverageMeter('Acc@1', ':6.2f')
    top5 = AverageMeter('Acc@5', ':6.2f')
    
    model.eval()
    with torch.no_grad():
        for inputs, targets in val_loader:
            inputs = inputs.cuda()
            targets = targets.cuda()
            
            outputs = model(inputs)
            acc1, acc5 = accuracy(outputs, targets, topk=(1, 5))
            
            top1.update(acc1[0], inputs.size(0))
            top5.update(acc5[0], inputs.size(0))
    
    print(f' * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}')
    return top1.avg

def accuracy(output, target, topk=(1,)):
    maxk = max(topk)
    batch_size = target.size(0)

    _, pred = output.topk(maxk, 1, True, True)
    pred = pred.t()
    correct = pred.eq(target.view(1, -1).expand_as(pred))

    res = []
    for k in topk:
        correct_k = correct[:k].reshape(-1).float().sum(0)
        res.append(correct_k.mul_(100.0 / batch_size))
    return res
2.3.1 主流模型性能
模型Top-1 Acc参数量训练周期硬件需求
ResNet-5076.5%25M904×V100
EfficientNet-B482.9%19M3508×TPUv3
ViT-Base85.2%86M30016×TPUv3

3. 实际部署与优化

3.1 TorchScript导出

# 导出为可部署格式
model = model.eval()
example_input = torch.rand(1, 3, 224, 224)
traced_model = torch.jit.trace(model, example_input)
traced_model.save("imagenet_model.pt")

3.2 量化加速

# 动态量化
quantized_model = torch.quantization.quantize_dynamic(
    model,
    {nn.Linear, nn.Conv2d},
    dtype=torch.qint8
)

# 测试量化效果
validate(quantized_model, val_loader)

3.3 服务化部署架构

graph TD
    A[客户端请求] --> B[负载均衡]
    B --> C[模型服务1]
    B --> D[模型服务2]
    B --> E[模型服务3]
    C --> F[结果聚合]
    D --> F
    E --> F
    F --> G[返回预测结果]
    style A fill:#9f9,stroke:#333
    style G fill:#f99,stroke:#333

附录:关键数学公式

交叉熵损失

L=i=1Nyilog(pi)L = -\sum_{i=1}^N y_i \log(p_i)

余弦学习率调度

ηt=ηmin+12(ηmaxηmin)(1+cos(TcurTmaxπ))\eta_t = \eta_{min} + \frac{1}{2}(\eta_{max} - \eta_{min})(1 + \cos(\frac{T_{cur}}{T_{max}}\pi))

Top-k准确率

Top-k=1Ni=1NI(真实标签预测前k个结果)\text{Top-k} = \frac{1}{N} \sum_{i=1}^N \mathbb{I}(\text{真实标签} \in \text{预测前k个结果})


常见问题解答

Q: 如何处理类别不平衡问题?

  • 使用加权采样器
  • 调整类别权重
  • 采用Focal Loss

Q: 训练时出现NaN损失怎么办?

  • 检查数据归一化
  • 降低学习率
  • 添加梯度裁剪
  • 检查模型初始化

Q: 如何选择合适的数据增强策略?

  • 小数据集使用更强增强(CutMix, AutoAugment)
  • 大数据集使用适度增强(随机裁剪、翻转)
  • 领域相关增强(医疗图像使用弹性变形)

最佳实践总结

  1. CIFAR10适合快速原型验证
  2. ImageNet需要分布式训练和混合精度
  3. 模型压缩技术对部署至关重要
  4. 使用预训练模型加速收敛
# 使用预训练模型示例
from torchvision.models import resnet50

model = resnet50(weights='IMAGENET1K_V2')
# 修改最后一层
model.fc = nn.Linear(model.fc.in_features, num_classes)

本教程完整代码库及预训练模型已在GitHub开源,欢迎Star和贡献! ⭐️ [项目链接]