参与拿奖:本文已参与「新人创作礼」活动,一起开启掘金创作之路
1.卷积神经网络的结构
1.1 AlexNet结构[1][2]
输入层:输入的是大小为224x224x3的图像;
第一层:卷积层,卷积核大小为11x11,通道数目为3,输出为96个特征图,步长为2,填充为2,公式计算可得(224+2x2-11)//4+1=55,因此输出结果大小为55x55x96,卷积之后使用ReLu函数;
第二层:池化层,使用最大池化,池化窗口为3x3,步长为2,输出大小为(55-3)//2+1=27,因此池化层输出大小为27x27x96;
第三层:卷积层,卷积核大小为5x5,输出为256个特征图,因此卷积核的维度为5x5x96x256,步长为1,填充为2,输出为(27+2x2-5)//1+1=27,输出大小为27x27x96,然后经过ReLU函数;
第四层:最大池化层,窗口大小为3x3,步长为2,输出为(27-3)//2+1=13,输出大小为13x13x256;
第五层到第七层:都是卷积层,卷积核大小为3x3,输出的特征图分别为384、384、256,滑动步长stride=1,填充padding=1,(13+1x2-3)//2+1=13,因此第七层输出13×13×256,然后经过ReLu函数;
第八层:池化层,窗口大小为3x3,步长为2,输出为(13-3)//2+1=6,最后输出大小为6x6x256
最后三层:全连接层,将卷积的输出维数为6×6×256的tensor展平,得到全连接层的输入维度 为9216,三层全连接的神经元个数为4096、4096、1000。最终得到1000维的输出用于图像分类。
1.1.1 代码
import torch
from torch import nn
from torch.nn import Conv2d
from torch.nn import MaxPool2d
from torch.nn import Flatten
from torch.nn import Linear
from torch.nn import Sequential
class AlexNet(nn.Module):
def __init__(self):
super(AlexNet, self).__init__()
self.conv_0=Conv2d(in_channels=3,out_channels=96,kernel_size=11,stride=4,padding=2)
self.relu_0=nn.ReLU()
self.pool_0=MaxPool2d(kernel_size=3,stride=2)
self.conv_1 = Conv2d(in_channels=96, out_channels=256, kernel_size=5, stride=1, padding=2)
self.relu_1 = nn.ReLU()
self.pool_1 = MaxPool2d(kernel_size=3, stride=2)
self.conv_2 = Conv2d(in_channels=256, out_channels=384, kernel_size=3, stride=1, padding=1)
self.relu_2 = nn.ReLU()
self.conv_3 = Conv2d(in_channels=384, out_channels=384, kernel_size=3, stride=1, padding=1)
self.relu_3 = nn.ReLU()
self.conv_4 = Conv2d(in_channels=384, out_channels=256, kernel_size=3, stride=1, padding=1)
self.relu_4 = nn.ReLU()
self.pool_2 = MaxPool2d(kernel_size=3, stride=2)
self.flatten_0=Flatten(start_dim=0)
self.linear_0 = Linear(9216,4096)
self.linear_1 = Linear(4096, 4096)
self.linear_2 = Linear(4096, 1000)
self.model=Sequential(
self.conv_0,
self.relu_0,
self.pool_0,
self.conv_1,
self.relu_1,
self.pool_1,
self.conv_2,
self.relu_2,
self.conv_3,
self.relu_3,
self.conv_4,
self.relu_4,
self.pool_2,
self.flatten_0,
self.linear_0,
self.linear_1,
self.linear_2
)
def forward(self,input):
output=self.model(input)
return output
1.1.2 测试结果
def main():
alexNet=AlexNet()
temp=torch.randn((1,3,224,224))
print(temp.shape)
a=alexNet(temp)
print(a.shape)
if __name__ == '__main__':
main()
1.2 ResNet结构[1][3]
其创新点就是将输入与经过网络之后的输出进行融合,这种融合方式采用了跳跃连接的方式,跳跃连接的引入使得信息的流通更加顺畅,表现在以下两个方面:一是在前向传播时,将输入与输出的信息进行融合,能够更有效地利用特征;二是在反向传播时,总有一部分梯度通过跳跃连接反传到输入上,这缓解了梯度消失的问题。
1.2.1 简单Demo
import torch
from torch import nn
from torch.nn import Conv2d
from torch.nn import Sequential
from torch.nn import BatchNorm2d
class ResNet(nn.Module):
def __init__(self):
super(ResNet, self).__init__()
self.model=Sequential(
Conv2d(in_channels=3,out_channels=96,kernel_size=3,stride=1,padding=1),
BatchNorm2d(num_features=96),
nn.ReLU(),
BatchNorm2d(num_features=96),
Conv2d(in_channels=96, out_channels=3, kernel_size=3, stride=1, padding=1)
)
def forward(self,input):
output=self.model(input)
return input+output
1.2.2 测试与结果
def main():
a=torch.randn((1,3,224,224))
myRes=ResNet()
print("输入: ",a.shape,a[0][0][0])
b=myRes(a)
print("输出: ",b.shape,b[0][0][0])
if __name__ == '__main__':
main()
2.训练模型
2.1 Loss函数
import torch
from torch import nn
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
from torch.nn import L1Loss #绝对平均误差
from torch.nn import MSELoss #均方误差
from torch.nn import CrossEntropyLoss #交叉熵损失
inputs=torch.tensor([1,2,3],dtype=torch.float32)
targets=torch.tensor([1,2,5],dtype=torch.float32)
inputs=torch.reshape(inputs,(1,1,1,3))
targets=torch.reshape(targets,(1,1,1,3))
loss=L1Loss() #绝对平均误差
result=loss(inputs,targets)
print("L1loss ",result) # 2/3
loss_mse=MSELoss() #4/3
result=loss_mse(inputs,targets)
print("MSELoss ",result)
x=torch.tensor([0.1,0.2,0.3],dtype=torch.float32)
y=torch.tensor([1])
x=torch.reshape(x,(1,3))
loss_cross=CrossEntropyLoss() # -x[class]+log(sum(exp(xi)))
result=loss_cross(x,y)
print("CrossEntropyLoss ",result)
2.2 构建训练模型
from torch import nn
import torchvision
from torchvision import transforms
from torch.utils.data import DataLoader
from torch.nn import Conv2d
from torch.nn import MaxPool2d
from torch.nn import Flatten
from torch.nn import Linear
from torch.nn import Sequential
from torch.nn import L1Loss #绝对平均误差
from torch.nn import MSELoss #均方误差
from torch.nn import CrossEntropyLoss #交叉熵损失
class myNet(nn.Module):
def __init__(self):
super(myNet,self).__init__()
self.conv=Conv2d(3,32,5,padding=2) #根据公式计算所得参数
self.maxpool=MaxPool2d(2)
self.conv1=Conv2d(32,32,5,padding=2)
self.maxpool1=MaxPool2d(2)
self.conv2=Conv2d(32,64,5,padding=2)
self.maxpool2=MaxPool2d(2)
self.flatten=Flatten()
self.linear=Linear(1024,64)
self.linear1=Linear(64,10)
# 序列化打包
self.model=Sequential(
Conv2d(3,32,5,padding=2),
MaxPool2d(2),
Conv2d(32,32,5,padding=2),
MaxPool2d(2),
Conv2d(32,64,5,padding=2),
MaxPool2d(2),
Flatten(),
Linear(1024,64),
Linear(64,10)
)
def forward(self,input):
x=self.conv(input)
x=self.maxpool(x)
x=self.conv1(x)
x=self.maxpool1(x)
x=self.conv2(x)
x=self.maxpool2(x)
x=self.flatten(x)
x=self.linear(x)
output=self.linear1(x)
# 直接调用
output1=self.model(input)
return output1
dataset=torchvision.datasets.CIFAR10('./data/02 data',train=False,transform=transforms.ToTensor(),download=True)
dataloader=DataLoader(dataset,batch_size=1)
mynet=myNet()
loss_cross=CrossEntropyLoss()
for data in dataloader:
imgs,targets=data
outputs=mynet(imgs)
print("outputs: ",outputs)
print("targets: ",targets)
cross_loss=loss_cross(outputs,targets)
print("CrossEntropyLoss: ",cross_loss)
# 梯度下降
cross_loss.backward() #用断点观察梯度参数
print("OK")
2.3 使用优化器进行优化
from torch import nn
import torchvision
from torchvision import transforms
from torch.utils.data import DataLoader
from torch.nn import Conv2d
from torch.nn import MaxPool2d
from torch.nn import Flatten
from torch.nn import Linear
from torch.nn import Sequential
from torch.nn import L1Loss #绝对平均误差
from torch.nn import MSELoss #均方误差
from torch.nn import CrossEntropyLoss #交叉熵损失
from torch.optim import optimizer #优化器
from torch.optim import SGD
class myNet(nn.Module):
def __init__(self):
super(myNet,self).__init__()
self.conv=Conv2d(3,32,5,padding=2) #根据公式计算所得参数
self.maxpool=MaxPool2d(2)
self.conv1=Conv2d(32,32,5,padding=2)
self.maxpool1=MaxPool2d(2)
self.conv2=Conv2d(32,64,5,padding=2)
self.maxpool2=MaxPool2d(2)
self.flatten=Flatten()
self.linear=Linear(1024,64)
self.linear1=Linear(64,10)
# 序列化打包
self.model=Sequential(
Conv2d(3,32,5,padding=2),
MaxPool2d(2),
Conv2d(32,32,5,padding=2),
MaxPool2d(2),
Conv2d(32,64,5,padding=2),
MaxPool2d(2),
Flatten(),
Linear(1024,64),
Linear(64,10)
)
def forward(self,input):
x=self.conv(input)
x=self.maxpool(x)
x=self.conv1(x)
x=self.maxpool1(x)
x=self.conv2(x)
x=self.maxpool2(x)
x=self.flatten(x)
x=self.linear(x)
output=self.linear1(x)
# 直接调用
output1=self.model(input)
return output1
dataset=torchvision.datasets.CIFAR10('./data/02 data',train=False,transform=transforms.ToTensor(),download=True)
dataloader=DataLoader(dataset,batch_size=1)
mynet=myNet()
loss_cross=CrossEntropyLoss() #交叉熵损失
optim=SGD(mynet.parameters() #模型
,lr=0.01 #学习率
) #学习器
# 设置轮数
for epoch in range(10):
running_loss=0.0
for data in dataloader:
imgs,targets=data
optim.zero_grad() #设置初始梯度为0
outputs=mynet(imgs)
# print("outputs: ",outputs)
# print("targets: ",targets)
cross_loss=loss_cross(outputs,targets)
# print("CrossEntropyLoss: ",cross_loss)
# 梯度下降
cross_loss.backward() #用断点观察梯度参数
# 学习调优
optim.step() #学习调优,用断点观察梯度参数
running_loss+=cross_loss
print("round %d corssEntropyLoss is %.4f"%(epoch,running_loss))
3.模型的保存与加载
3.1 使用现有模型
import torchvision
from torchvision import transforms
# train_data=torchvision.datasets.ImageNet("./data/03 data",split="train",transform=transforms.ToTensor(),download=True)
vgg16_false=torchvision.models.vgg16(pretrained=False)
vgg16_true=torchvision.models.vgg16(pretrained=True) #加载网络模型
train_data=torchvision.datasets.CIFAR10('./data/02 data',train=True,transform=transforms.ToTensor(),download=True)
vgg16_true.add_module("add_linear",nn.Linear(1000,10)) #添加一个线形层
print(vgg16_true)
# print(vgg16_false)
vgg16_false.classifier[6]=nn.Linear(4096,10) #直接修改
print(vgg16_false)
3.2 多种保存和加载方法
import torchvision
import torch
vgg16=torchvision.models.vgg16(pretrained=False) #未训练的参数
# 保存方法1
torch.save(vgg16,"./data/04 model/vgg16_model.pth")
# 加载模型1
model=torch.load('./data/04 model/vgg16_model.pth')
# print(model)
# 保存方法2(官方推荐)
torch.save(vgg16.state_dict(),"./data/04 model/vgg16_model2.pth") #保存为字典形式
# 加载模型2
model=torch.load('./data/04 model/vgg16_model2.pth') #取出的是字典
# print(model)
vgg16=torchvision.models.vgg16(pretrained=False) #未训练的参数
vgg16.load_state_dict(torch.load('./data/04 model/vgg16_model2.pth'))
mynet=myNet()
# 保存
torch.save(mynet,"./data/04 model/mynet.pth")
# 加载
model=torch.load("./data/04 model/mynet.pth")
参考资料
[1] 深入浅出图神经网络
[2] Krizhevsky A,Sutskever I,Hinton G E.Imagenet classification with deep convolutional neural networks[C]//Advances in neural information processing systems.2012:1097-1105.
[3] He K,Zhang X,Ren S,et al.Deep residual learning for image recognition[C]//Proceedings of the IEEE conference on computer vision and pattern recognition.2016:770-778.
[4] b站课程链接
[5] 手敲代码链接