复现AlexNet
AlexNet是一个分类网络,深度学习之始。本次复现基于鲜花数据集,使用方式:
- 在data_set文件夹下创建新文件夹"flower_data"
- 解压数据集到flower_data文件夹下
- 执行"split_data.py"脚本自动将数据集划分成训练集train和验证集val
- 文件目录
- ├── flower_data
- ├── flower_photos(解压的数据集文件夹,3670个样本)
├── train(生成的训练集,3306个样本)
└── val(生成的验证集,364个样本)
- ├── flower_photos(解压的数据集文件夹,3670个样本)
- ├── flower_data
一、导入数据
数据已下载完毕,放在同目录的flower_photos下,其中train时训练集,test是测试集。
我们需要将split_data.py脚本放在flowerdata的同层,内含flower_photos,然后运行脚本分割数据。
二、导包
import torch
import torch.nn as nn
import torchvision
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline
三、总体结构框架
四、定义模型骨架
class Network(nn.Module):
def __init__(self, num_classes, initweight = False):
# 固定操作
self.num_classes = num_classes
self.initweight = initweight
super(Network, self).__init__()
# 定义feature类变量,用于存储网络中的卷积层
# Squential的实例虽然在定义中没有接受参数但是可以直接传入参数使用
self.feature = nn.Sequential(
# 注意此处的参数顺序和含义
nn.Conv2d(3,48,kernel_size = 11,stride = 4, padding = 2),
nn.ReLU(inplace = True),
# 池化不会改变通道数
nn.MaxPool2d(kernel_size = 3,stride = 2),
# 默认步长为 1
nn.Conv2d(48,128,kernel_size = 5, padding = 2),
nn.ReLU(inplace = True),
nn.MaxPool2d(kernel_size = 3,stride = 2),
# 注意这里的kernel_size
nn.Conv2d(128,192,kernel_size = 3,stride = 1, padding = 1),
nn.ReLU(inplace = True),
nn.Conv2d(192,192,kernel_size = 3,stride = 1, padding = 1),
nn.ReLU(inplace = True),
nn.Conv2d(192,128,kernel_size = 3,stride = 1, padding = 1),
nn.ReLU(inplace = True),
# 池化不会改变通道数
nn.MaxPool2d(kernel_size = 3,stride = 2),
)
# 定义classifier,存储网络中的MLP层
self.classifier = nn.Sequential(
# 失活并非将对应的神经元删除,只是将其输出置为0
nn.Dropout(p = 0.5),
# 128通道数,6是图像高宽
nn.Linear(128 * 6 *6 , 2048),
nn.ReLU(inplace = True),
nn.Dropout(p = 0.5),
# 128通道数,6是图像高宽
nn.Linear(2048 , 2048),
nn.ReLU(inplace = True),
nn.Linear(2048 , self.num_classes),
)
if self.initweight:
self.__initWeight__()
def forward(self, x):
x = self.feature(x)
# 展平
x = torch.flatten(x, start_dim = 1)
y = self.classifier(x)
# 需要返回结果,便于计算损失函数
return y
def __initWeight__(self):
for m in self.modules():
# 模块组件没有isinstance属性,我们直接传入即可!
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
if m.bias is not None:
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.Linear):
nn.init.normal_(m.weight,0,0.01)
nn.init.constant_(m.bias,0)
五、train模块
1. 导包
from torchvision import transforms,datasets,utils
import torch.optim as optim
import os
import json
import time
# 需要导入才能直接调用
from tqdm import tqdm
import sys
2. 预处理
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print("使用{}进行运算".format(device))
# 数据转换模式
data_transform = {
"train": transforms.Compose([transforms.RandomResizedCrop(224),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]),
"val": transforms.Compose([transforms.Resize((224, 224)), # cannot 224, must (224, 224)
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])}
使用cuda:0进行运算
abspath = os.path.abspath(os.path.join(os.getcwd()))
flower_path = abspath+'/flower_data'
trainset = datasets.ImageFolder(root = flower_path+'/train', transform = data_transform['train'])
num_train = len(trainset)
num_train
3306
# 转换标号-分类字典,便于之后查询
# {'daisy':0, 'dandelion':1, 'roses':2, 'sunflower':3, 'tulips':4}
flower_list = trainset.class_to_idx
cla_dict = dict((val, key) for key, val in flower_list.items())
# write dict into json file
json_str = json.dumps(cla_dict, indent=4)
with open('class_indices.json', 'w') as json_file:
json_file.write(json_str)
# 子集大小
batchsize = 32
# 线程个数
nw = 8
# 训练数据导入器
trainloader = torch.utils.data.DataLoader(
trainset,
batch_size = batchsize,
shuffle = True,
num_workers = nw
)
# 先导入验证数据
validset = datasets.ImageFolder(
root = flower_path+'/val',
transform = data_transform['val']
)
# 验证数据导入器
validloader = torch.utils.data.DataLoader(validset,
batch_size=4, shuffle=False,
num_workers=nw)
# 验证集大小应该传入验证集!!!
num_val = len(validset)
print('我们会使用{}个训练样本进行训练,{}个验证样本进行验证(用于调整超参数)'.format(num_train,num_val))
我们会使用3306个训练样本进行训练,364个验证样本进行验证(用于调整超参数)
3. 训练函数
def train():
model = Network(num_classes = 5, initweight = True)
# 注意调用方式
model.to(device)
Loss = nn.CrossEntropyLoss()
# 学习率可以调整,但是0.0002为佳
# 使用Adam的默认参数,取出参数进行迭代()
optimizer = optim.Adam(model.parameters(),lr = 0.0002)
epochs = 10
# 将模型参数保存路径设置为当前路径
model_path = './AlexNet.pth'
best_acc = 0.0
# 获取每个Epoch需要读取多少次训练集,也就是分别计算几次loss
# 计算平均loss时需要用到
train_steps = len(trainloader)
# 定义一个用于存储当前累计loss的变量,用于显示平均loss
totalloss = 0.0
for epoch in range(epochs):
# 训练
model.train()
totalloss = 0
train_bar = tqdm(trainloader,file = sys.stdout)
# 每次获取一批数据
# 注意此处的枚举转换
for step,data in enumerate(train_bar):
imgs,labels = data
optimizer.zero_grad()
# 传入GPU
y = model(imgs.to(device))
loss = Loss(y.to(device), labels.to(device))
# 反向传播指的是损失函数值的反向传播
loss.backward()
# 更新参数
optimizer.step()
# 累计损失
totalloss += loss.item()
# print('当前Epoch:{},一共{}Epochs,损失为:{}'.format(epoch+1, epochs, totalloss/(epoch)))
train_bar.desc = "train epoch[{}/{}] loss:{:.3f}".format(epoch + 1,
epochs,
loss)
# 验证
model.eval()
# 用于统计预测正确的验证样本个数
acc = 0.0
with torch.no_grad():
val_bar = tqdm(validloader,file = sys.stdout)
for val_data in val_bar:
val_imgs,val_labels = val_data
# 同样需要送入GPU
y = model(val_imgs.to(device))
predict_y = torch.max(y, dim = 1)[1]
# 累加正确样本
acc += torch.eq(predict_y,val_labels.to(device)).sum().item()
acc = acc/num_val
if best_acc < acc:
best_acc = acc
torch.save(model.state_dict(),'./bestparameters.pth')
print('当前为:epoch{},平均损失为:{},验证精度为:{}'.format(epoch+1, totalloss/train_steps, acc))
# 训练
train()
print('Finished Training')
train epoch[1/10] loss:0.873: 100%|███████████| 104/104 [00:02<00:00, 48.88it/s]
100%|██████████████████████████████████████████| 91/91 [00:00<00:00, 145.12it/s]
当前为:epoch1,平均损失为:1.3640630927223425,验证精度为:0.4807692307692308
train epoch[2/10] loss:1.276: 100%|███████████| 104/104 [00:02<00:00, 49.30it/s]
100%|██████████████████████████████████████████| 91/91 [00:00<00:00, 156.81it/s]
当前为:epoch2,平均损失为:1.1749353145177548,验证精度为:0.5164835164835165
train epoch[3/10] loss:1.016: 100%|███████████| 104/104 [00:02<00:00, 49.08it/s]
100%|██████████████████████████████████████████| 91/91 [00:00<00:00, 142.30it/s]
当前为:epoch3,平均损失为:1.0994601920247078,验证精度为:0.5604395604395604
train epoch[4/10] loss:0.697: 100%|███████████| 104/104 [00:02<00:00, 49.05it/s]
100%|██████████████████████████████████████████| 91/91 [00:00<00:00, 156.53it/s]
当前为:epoch4,平均损失为:1.0487159920426516,验证精度为:0.6043956043956044
train epoch[5/10] loss:1.281: 100%|███████████| 104/104 [00:02<00:00, 49.17it/s]
100%|██████████████████████████████████████████| 91/91 [00:00<00:00, 136.08it/s]
当前为:epoch5,平均损失为:0.9796863771401919,验证精度为:0.6675824175824175
train epoch[6/10] loss:1.105: 100%|███████████| 104/104 [00:02<00:00, 47.53it/s]
100%|██████████████████████████████████████████| 91/91 [00:00<00:00, 147.78it/s]
当前为:epoch6,平均损失为:0.9479855539707037,验证精度为:0.6346153846153846
train epoch[7/10] loss:1.644: 100%|███████████| 104/104 [00:02<00:00, 47.39it/s]
100%|██████████████████████████████████████████| 91/91 [00:00<00:00, 153.10it/s]
当前为:epoch7,平均损失为:0.9153569355033911,验证精度为:0.6373626373626373
train epoch[8/10] loss:0.868: 100%|███████████| 104/104 [00:02<00:00, 49.89it/s]
100%|██████████████████████████████████████████| 91/91 [00:00<00:00, 130.98it/s]
当前为:epoch8,平均损失为:0.8855450107501104,验证精度为:0.7087912087912088
train epoch[9/10] loss:0.563: 100%|███████████| 104/104 [00:02<00:00, 48.40it/s]
100%|██████████████████████████████████████████| 91/91 [00:00<00:00, 153.31it/s]
当前为:epoch9,平均损失为:0.8499517134175851,验证精度为:0.7060439560439561
train epoch[10/10] loss:0.907: 100%|██████████| 104/104 [00:02<00:00, 48.59it/s]
100%|██████████████████████████████████████████| 91/91 [00:00<00:00, 118.99it/s]
当前为:epoch10,平均损失为:0.8140908138683209,验证精度为:0.717032967032967
Finished Training
六、预测模块
1. 导包
import os
import json
from PIL import Image
from torchvision import transforms
2. 预处理
device = device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print("使用{}进行运算".format(device))
test_transforms = transforms.Compose([transforms.Resize((224, 224)), # cannot 224, must (224, 224)
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
testimagepath = './my_test/qiangwei.png'
# PIL中的图像读取函数读取出来的图像就是RGB图像,不像OpenCV中读取出来的是BGR图像
test_img = Image.open(testimagepath)
plt.xticks([]),plt.yticks([])
plt.imshow(test_img)
使用cuda:0进行运算
<matplotlib.image.AxesImage at 0x7f78ce54fc10>
# 直接送入转换类,进行pytorch转换
timg = test_transforms(test_img)
# 扩充到四维
timg = torch.unsqueeze(timg, dim = 0)
# 读取json文件
json_path = './class_indices.json'
assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path)
with open(json_path, "r") as f:
class_indict = json.load(f)
3. 实例化模型并测试
# 传入分类个数并导入训练好的参数
net = Network(num_classes = 5).cuda()
# 导入参数时不能送入GPU,所以我们应该先将模型送入GPU后再导入参数
net.load_state_dict(torch.load('./bestparameters.pth'))
<All keys matched successfully>
net.eval()
with torch.no_grad():
output = net(timg.to(device)).squeeze().cpu()
y = torch.softmax(output,dim = 0)
predict_cla = torch.argmax(y).numpy()
# 打印预测信息
print_res = "class: {} prob: {:.3}".format(class_indict[str(predict_cla)],
y[predict_cla].numpy())
plt.title(print_res)
for i in range(len(y)):
print("class: {:10} prob: {:.3}".format(class_indict[str(i)],
y[i].numpy()))
plt.imshow(test_img)
plt.show()
class: daisy prob: 0.0067
class: dandelion prob: 0.00137
class: roses prob: 0.786
class: sunflowers prob: 0.00314
class: tulips prob: 0.202