实例一 - AlexNet

339 阅读4分钟

复现AlexNet

AlexNet是一个分类网络,深度学习之始。本次复现基于鲜花数据集,使用方式:

  1. 在data_set文件夹下创建新文件夹"flower_data"
  2. 解压数据集到flower_data文件夹下
  3. 执行"split_data.py"脚本自动将数据集划分成训练集train和验证集val
  4. 文件目录
    • ├── flower_data
      • ├── flower_photos(解压的数据集文件夹,3670个样本)
        ├── train(生成的训练集,3306个样本)
        └── val(生成的验证集,364个样本)

一、导入数据

数据已下载完毕,放在同目录的flower_photos下,其中train时训练集,test是测试集。

我们需要将split_data.py脚本放在flowerdata的同层,内含flower_photos,然后运行脚本分割数据。

二、导包

import torch
import torch.nn as nn
import torchvision
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline

三、总体结构框架

download.png

四、定义模型骨架

class Network(nn.Module):
    def __init__(self, num_classes, initweight = False):
        # 固定操作
        self.num_classes = num_classes
        self.initweight = initweight
        super(Network, self).__init__()
        # 定义feature类变量,用于存储网络中的卷积层
        # Squential的实例虽然在定义中没有接受参数但是可以直接传入参数使用
        self.feature = nn.Sequential(
            # 注意此处的参数顺序和含义
            nn.Conv2d(3,48,kernel_size = 11,stride = 4, padding = 2),
            nn.ReLU(inplace = True),
            # 池化不会改变通道数
            nn.MaxPool2d(kernel_size = 3,stride = 2),
            
            # 默认步长为 1
            nn.Conv2d(48,128,kernel_size = 5, padding = 2),
            nn.ReLU(inplace = True),
            nn.MaxPool2d(kernel_size = 3,stride = 2),
            
            # 注意这里的kernel_size
            nn.Conv2d(128,192,kernel_size = 3,stride = 1, padding = 1),
            nn.ReLU(inplace = True),
            
            nn.Conv2d(192,192,kernel_size = 3,stride = 1, padding = 1),
            nn.ReLU(inplace = True),
            
            nn.Conv2d(192,128,kernel_size = 3,stride = 1, padding = 1),
            nn.ReLU(inplace = True),
            # 池化不会改变通道数
            nn.MaxPool2d(kernel_size = 3,stride = 2),
        )
        # 定义classifier,存储网络中的MLP层
        self.classifier = nn.Sequential(
            # 失活并非将对应的神经元删除,只是将其输出置为0
            nn.Dropout(p = 0.5),
            # 128通道数,6是图像高宽
            nn.Linear(128 * 6 *6 , 2048),
            nn.ReLU(inplace = True),
            
            nn.Dropout(p = 0.5),
            # 128通道数,6是图像高宽
            nn.Linear(2048 , 2048),
            nn.ReLU(inplace = True),
            
            nn.Linear(2048 , self.num_classes),
            
        )
        if self.initweight:
            self.__initWeight__()
    def forward(self, x):
        x = self.feature(x)
        # 展平
        x = torch.flatten(x, start_dim = 1)
        y = self.classifier(x)
        # 需要返回结果,便于计算损失函数
        return y
        
    def __initWeight__(self):
        for m in self.modules():
            # 模块组件没有isinstance属性,我们直接传入即可!
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight,0,0.01)
                nn.init.constant_(m.bias,0)

五、train模块

1. 导包

from torchvision import transforms,datasets,utils
import torch.optim as optim
import os
import json
import time
# 需要导入才能直接调用
from tqdm import tqdm
import sys

2. 预处理

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print("使用{}进行运算".format(device))
# 数据转换模式
data_transform = {
        "train": transforms.Compose([transforms.RandomResizedCrop(224),
                                     transforms.RandomHorizontalFlip(),
                                     transforms.ToTensor(),
                                     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]),
        "val": transforms.Compose([transforms.Resize((224, 224)),  # cannot 224, must (224, 224)
                                   transforms.ToTensor(),
                                   transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])}
使用cuda:0进行运算
abspath = os.path.abspath(os.path.join(os.getcwd()))
flower_path = abspath+'/flower_data'
trainset = datasets.ImageFolder(root = flower_path+'/train', transform = data_transform['train'])
num_train = len(trainset)
num_train
3306
# 转换标号-分类字典,便于之后查询
# {'daisy':0, 'dandelion':1, 'roses':2, 'sunflower':3, 'tulips':4}
flower_list = trainset.class_to_idx
cla_dict = dict((val, key) for key, val in flower_list.items())
# write dict into json file
json_str = json.dumps(cla_dict, indent=4)
with open('class_indices.json', 'w') as json_file:
    json_file.write(json_str)
# 子集大小
batchsize = 32
# 线程个数
nw = 8
# 训练数据导入器
trainloader = torch.utils.data.DataLoader(
    trainset, 
    batch_size = batchsize,
    shuffle = True,
    num_workers = nw
)
# 先导入验证数据
validset = datasets.ImageFolder(
    root = flower_path+'/val',
    transform = data_transform['val']
)
# 验证数据导入器
validloader = torch.utils.data.DataLoader(validset,
                                                  batch_size=4, shuffle=False,
                                                  num_workers=nw)
# 验证集大小应该传入验证集!!!
num_val = len(validset)
print('我们会使用{}个训练样本进行训练,{}个验证样本进行验证(用于调整超参数)'.format(num_train,num_val))
我们会使用3306个训练样本进行训练,364个验证样本进行验证(用于调整超参数)

3. 训练函数

def train():
    model = Network(num_classes = 5, initweight = True)
    # 注意调用方式
    model.to(device)
    Loss = nn.CrossEntropyLoss()
    # 学习率可以调整,但是0.0002为佳
    # 使用Adam的默认参数,取出参数进行迭代()
    optimizer = optim.Adam(model.parameters(),lr = 0.0002)
    epochs = 10
    # 将模型参数保存路径设置为当前路径
    model_path = './AlexNet.pth'
    best_acc = 0.0
    # 获取每个Epoch需要读取多少次训练集,也就是分别计算几次loss
    # 计算平均loss时需要用到
    train_steps = len(trainloader)
    # 定义一个用于存储当前累计loss的变量,用于显示平均loss
    totalloss = 0.0
    
    for epoch in range(epochs):
        # 训练
        model.train()
        totalloss = 0
        train_bar = tqdm(trainloader,file = sys.stdout)
        # 每次获取一批数据
        # 注意此处的枚举转换
        for step,data in enumerate(train_bar):
            imgs,labels = data
            optimizer.zero_grad()
            # 传入GPU
            y = model(imgs.to(device))
            loss = Loss(y.to(device), labels.to(device))
            # 反向传播指的是损失函数值的反向传播
            loss.backward()
            # 更新参数
            optimizer.step()
            # 累计损失
            totalloss += loss.item()
            
            # print('当前Epoch:{},一共{}Epochs,损失为:{}'.format(epoch+1, epochs, totalloss/(epoch)))
            train_bar.desc = "train epoch[{}/{}] loss:{:.3f}".format(epoch + 1,
                                                                     epochs,
                                                                     loss)
        # 验证
        model.eval()
        # 用于统计预测正确的验证样本个数
        acc = 0.0
        with torch.no_grad():
            val_bar = tqdm(validloader,file = sys.stdout)
            for val_data in val_bar:
                val_imgs,val_labels = val_data
                # 同样需要送入GPU
                y = model(val_imgs.to(device))
                predict_y = torch.max(y, dim = 1)[1]
                # 累加正确样本
                acc += torch.eq(predict_y,val_labels.to(device)).sum().item()     
        acc = acc/num_val
        if best_acc < acc:
            best_acc = acc
            torch.save(model.state_dict(),'./bestparameters.pth')
        
        
        print('当前为:epoch{},平均损失为:{},验证精度为:{}'.format(epoch+1, totalloss/train_steps, acc))

# 训练
train()
print('Finished Training')
train epoch[1/10] loss:0.873: 100%|███████████| 104/104 [00:02<00:00, 48.88it/s]
100%|██████████████████████████████████████████| 91/91 [00:00<00:00, 145.12it/s]
当前为:epoch1,平均损失为:1.3640630927223425,验证精度为:0.4807692307692308
train epoch[2/10] loss:1.276: 100%|███████████| 104/104 [00:02<00:00, 49.30it/s]
100%|██████████████████████████████████████████| 91/91 [00:00<00:00, 156.81it/s]
当前为:epoch2,平均损失为:1.1749353145177548,验证精度为:0.5164835164835165
train epoch[3/10] loss:1.016: 100%|███████████| 104/104 [00:02<00:00, 49.08it/s]
100%|██████████████████████████████████████████| 91/91 [00:00<00:00, 142.30it/s]
当前为:epoch3,平均损失为:1.0994601920247078,验证精度为:0.5604395604395604
train epoch[4/10] loss:0.697: 100%|███████████| 104/104 [00:02<00:00, 49.05it/s]
100%|██████████████████████████████████████████| 91/91 [00:00<00:00, 156.53it/s]
当前为:epoch4,平均损失为:1.0487159920426516,验证精度为:0.6043956043956044
train epoch[5/10] loss:1.281: 100%|███████████| 104/104 [00:02<00:00, 49.17it/s]
100%|██████████████████████████████████████████| 91/91 [00:00<00:00, 136.08it/s]
当前为:epoch5,平均损失为:0.9796863771401919,验证精度为:0.6675824175824175
train epoch[6/10] loss:1.105: 100%|███████████| 104/104 [00:02<00:00, 47.53it/s]
100%|██████████████████████████████████████████| 91/91 [00:00<00:00, 147.78it/s]
当前为:epoch6,平均损失为:0.9479855539707037,验证精度为:0.6346153846153846
train epoch[7/10] loss:1.644: 100%|███████████| 104/104 [00:02<00:00, 47.39it/s]
100%|██████████████████████████████████████████| 91/91 [00:00<00:00, 153.10it/s]
当前为:epoch7,平均损失为:0.9153569355033911,验证精度为:0.6373626373626373
train epoch[8/10] loss:0.868: 100%|███████████| 104/104 [00:02<00:00, 49.89it/s]
100%|██████████████████████████████████████████| 91/91 [00:00<00:00, 130.98it/s]
当前为:epoch8,平均损失为:0.8855450107501104,验证精度为:0.7087912087912088
train epoch[9/10] loss:0.563: 100%|███████████| 104/104 [00:02<00:00, 48.40it/s]
100%|██████████████████████████████████████████| 91/91 [00:00<00:00, 153.31it/s]
当前为:epoch9,平均损失为:0.8499517134175851,验证精度为:0.7060439560439561
train epoch[10/10] loss:0.907: 100%|██████████| 104/104 [00:02<00:00, 48.59it/s]
100%|██████████████████████████████████████████| 91/91 [00:00<00:00, 118.99it/s]
当前为:epoch10,平均损失为:0.8140908138683209,验证精度为:0.717032967032967
Finished Training

六、预测模块

1. 导包

import os
import json
from PIL import Image
from torchvision import transforms

2. 预处理

device = device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print("使用{}进行运算".format(device))

test_transforms = transforms.Compose([transforms.Resize((224, 224)),  # cannot 224, must (224, 224)
                                   transforms.ToTensor(),
                                   transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
testimagepath = './my_test/qiangwei.png'
# PIL中的图像读取函数读取出来的图像就是RGB图像,不像OpenCV中读取出来的是BGR图像
test_img = Image.open(testimagepath)
plt.xticks([]),plt.yticks([])
plt.imshow(test_img)
使用cuda:0进行运算





<matplotlib.image.AxesImage at 0x7f78ce54fc10>




output_26_2.png

# 直接送入转换类,进行pytorch转换
timg = test_transforms(test_img)
# 扩充到四维
timg = torch.unsqueeze(timg, dim = 0)

# 读取json文件
json_path = './class_indices.json'
assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path)

with open(json_path, "r") as f:
    class_indict = json.load(f)

3. 实例化模型并测试

# 传入分类个数并导入训练好的参数
net = Network(num_classes = 5).cuda()
# 导入参数时不能送入GPU,所以我们应该先将模型送入GPU后再导入参数
net.load_state_dict(torch.load('./bestparameters.pth'))
<All keys matched successfully>
net.eval()
with torch.no_grad():
    output = net(timg.to(device)).squeeze().cpu()
    y = torch.softmax(output,dim = 0)
    predict_cla = torch.argmax(y).numpy()
# 打印预测信息
print_res = "class: {}   prob: {:.3}".format(class_indict[str(predict_cla)],
                                                 y[predict_cla].numpy())
plt.title(print_res)
for i in range(len(y)):
    print("class: {:10}   prob: {:.3}".format(class_indict[str(i)],
                                                  y[i].numpy()))
plt.imshow(test_img)
plt.show()
class: daisy        prob: 0.0067
class: dandelion    prob: 0.00137
class: roses        prob: 0.786
class: sunflowers   prob: 0.00314
class: tulips       prob: 0.202



output_31_1.png