Pytorch实现新冠肺炎识别
项目摘要
通过手动搭建VGG16模型对肺部 CT 图片患病结果进行检测分类,该项目为多分类问题,label为【0,1,2,3】,代表不同程度的肺炎患病情况。
基本配置
import os
from PIL import Image
from matplotlib import pyplot as plt
import torchvision
import pandas as pd
import numpy as np
# 进度条tqdm
from tqdm import tqdm
# 导入pytoch及相关函数
import torch
from torch import nn
import torch.nn.functional as F
# 可迭代数据管道构建
from torch.utils.data import Dataset,DataLoader
# 导入图像预处理包
from torchvision import transforms,datasets,models
# 处理器配置
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device
1.读取和准备数据集文件
1.1 训练集
在transform.Resize这里为什么是[224,224],而不是其他数值? 是这样的,笔者后续打算VGG-16模型进行识别分类,要求输入的图片尺寸是【224,224】,所以在这里需要设置一下。
# 用Compose把多个步骤整合到一起
pic_transform = transforms.Compose([
# 将PIL图像的短边缩放至特定大小,长宽比保持不变
transforms.Resize([224,224]),
# ToTensor()能够把灰度范围从0-255变换到0-1之间
transforms.ToTensor(),
# transform.Normalize()则把0-1变换到(-1,1)
# 使用ImageNet的均值和标准差进行标准化(黄金数值)
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
1.2 继承Dataset类, 来构建自己的数据集
class MyDataset(Dataset):
def __init__(self, img_path, file_name ,transform=None):
# python 2.x写法
super(MyDataset, self).__init__()
# python 3.x写法
# super().__init__()
self.root = img_path
self.file_name = file_name
self.csv_root = self.root + '//' + self.file_name
# 读取csv文件
df = pd.read_csv(self.csv_root)
rows = df.shape[0]
imgs = []
labels = []
# 逐行读取
for row in range(0,rows):
imgs.append(os.path.join(self.root,df['image_path'][row]))
labels.append(df['labels'][row])
self.img = imgs
self.label = labels
self.transform = transform
def __len__(self):
return len(self.label)
def __getitem__(self, item):
img = self.img[item]
label = self.label[item]
img = Image.open(img).convert('RGB')
# 此时img是PIL.Image类型 label是str类型
if self.transform is not None:
img = self.transform(img)
label = np.array(label).astype(np.int64)
label = torch.from_numpy(label)
return img, label
train_path = r'/home/mw/input/pneumonia_ct7356/dataset-CT/dataset/train'
train_name = 'train.csv'
train_data = MyDataset(train_path,train_name,transform = pic_transform)
# 使用DataLoader封装成可以迭代的数据管道
dl_train = DataLoader(dataset=train_data, batch_size=32, shuffle=True)
for i, data in enumerate(dl_train):
images, labels = data
print(f'images的维度[N, C, H, W]为:{images.shape}')
# 打印数据集中的图片
# make_grid的作用是将若干幅图像拼成一幅图像
img = torchvision.utils.make_grid(images).numpy()
# 因为在plt.imshow在现实的时候输入的是(imagesize,imagesize,channels)
# 调用一次np.transpose函数,将npimg的数据格式由(channels,imagesize,imagesize)转化为(imagesize,imagesize,channels),进行格式的转换后方可进行显示
plt.imshow(np.transpose(img, (1, 2, 0)))
plt.show()
break
1.3 测试集
同样地,我们对测试集进行同等处理
test_path = r'/home/mw/input/pneumonia_ct7356/dataset-CT/dataset/test'
test_name = 'test.csv'
test_data = MyDataset(test_path,test_name,transform = pic_transform)
# 使用DataLoader封装成可以迭代的数据管道
dl_test = DataLoader(dataset=test_data, batch_size=32, shuffle=True)
for i, data in enumerate(dl_test):
images, labels = data
print(f'images的维度[N, C, H, W]为:{images.shape}')
# 打印数据集中的图片
# make_grid的作用是将若干幅图像拼成一幅图像
img = torchvision.utils.make_grid(images).numpy()
# 因为在plt.imshow在现实的时候输入的是(imagesize,imagesize,channels)
# 调用一次np.transpose函数,将npimg的数据格式由(channels,imagesize,imagesize)转化为(imagesize,imagesize,channels),进行格式的转换后方可进行显示
plt.imshow(np.transpose(img, (1, 2, 0)))
plt.show()
break
2.定义神经网络模型(VGG-16)
笔者在这里选择通过继承nn.Module的方式来构建神经网络; 在正式构建模型以前,我们先升级一下pip和安装torchsummary
!pip install torchsummary -i https://pypi.tuna.tsinghua.edu.cn/simple some-package
!pip install -i https://pypi.tuna.tsinghua.edu.cn/simple --upgrade pip
# 导入torchsummary
from torchsummary import summary
大家可以参考以下这张图片,根据卷积核大小和卷积层数目的不同,VGG可以分为以下6种子模型,笔者在这里选择手动构建一个VGG-16 D模型。
基本模型如下图所示:
# 定义神经网络模型(VGG-16)
class network(nn.Module):
def __init__(self):
super().__init__()
# 第一层:2个卷积层和1个池化层
self.layer1 = nn.Sequential(
# 输入3通道,输出64通道。卷积核大小3,填充1,步长默认为1
# (输入224*224*3的样本图片,输出为224*224*64),计算公式为:(224-3+2)/1+1 = 224
nn.Conv2d(in_channels = 3,out_channels = 64,kernel_size = 3,padding = 1),
# 对数据做归一化,这是由于经过训练后的数据分布改变,需要将其拉回到N(1,0)的正态分布,防止梯度消失,读入的参数是通道数
nn.BatchNorm2d(64),
# 激活函数。参数inplace =false 是默认返回对象,需要重开地址,这里True返回地址,为了节省开销
nn.ReLU(inplace = True),
# 输入224*224*64,输出224*224*64
nn.Conv2d(64,64,3,padding = 1),
nn.BatchNorm2d(64),
nn.ReLU(inplace = True),
# 进行池化(卷积核为2*2,步长为2),输入224*224*64,输出为112*112*64
nn.MaxPool2d(kernel_size = 2,stride = 2)
)
# 第二层:2个卷积层和1个池化层
self.layer2 = nn.Sequential(
# 输入为112*112*64,输出为112*112*128
nn.Conv2d(64,128,3,padding = 1),
nn.BatchNorm2d(128),
nn.ReLU(inplace = True),
# 输入为112*112*128,输出为112*112*128
nn.Conv2d(128,128,3,padding = 1),
nn.BatchNorm2d(128),
nn.ReLU(inplace = True),
# 进行池化,(卷积核为2*2,步长为2),输入112*112*128,输出为56*56*128
nn.MaxPool2d(kernel_size = 2,stride = 2)
)
# 第三层:3个卷积层+1个池化层
self.layer3 = nn.Sequential(
# 56*56*128 --> 56*56*256
nn.Conv2d(128,256,3,padding = 1),
nn.BatchNorm2d(256),
nn.ReLU(inplace = True),
# 56*56*256 --> 56*56*256
nn.Conv2d(256,256,3,padding = 1),
nn.BatchNorm2d(256),
nn.ReLU(inplace = True),
# 56*56*256 --> 56*56*256
nn.Conv2d(256,256,3,padding = 1),
nn.BatchNorm2d(256),
nn.ReLU(inplace = True),
# 池化,56*56*256 --> 28*28*256
nn.MaxPool2d(kernel_size = 2,stride = 2)
)
# 第四层:3个卷积层+1个池化层
self.layer4 = nn.Sequential(
# 28*28*256 --> 28*28*512
nn.Conv2d(256,512,3,padding = 1),
nn.BatchNorm2d(512),
nn.ReLU(inplace = True),
# 28*28*512 --> 28*28*512
nn.Conv2d(512,512,3,padding = 1),
nn.BatchNorm2d(512),
nn.ReLU(inplace = True),
# 28*28*512 --> 28*28*512
nn.Conv2d(512,512,3,padding = 1),
nn.BatchNorm2d(512),
nn.ReLU(inplace = True),
# 进行池化,28*28*512 --> 14*14*512
nn.MaxPool2d(kernel_size = 2,stride = 2)
)
# 第五层:3个卷积层+1个池化层
self.layer5 = nn.Sequential(
# 14*14*512 --> 14*14*512
nn.Conv2d(512,512,3,padding = 1),
nn.BatchNorm2d(512),
nn.ReLU(inplace = True),
# 14*14*512 --> 14*14*512
nn.Conv2d(512,512,3,padding = 1),
nn.BatchNorm2d(512),
nn.ReLU(inplace = True),
# 14*14*512 --> 14*14*512
nn.Conv2d(512,512,3,padding = 1),
nn.BatchNorm2d(512),
nn.ReLU(inplace = True),
# 进行池化(14*14*512 --> 7*7*512)
nn.MaxPool2d(kernel_size = 2,stride = 2)
)
# 将卷积层封装
self.conv_layer = nn.Sequential(
self.layer1,
self.layer2,
self.layer3,
self.layer4,
self.layer5
)
# 全连接层:3个线性层
self.fc = nn.Sequential(
# 7*7*512 --> 1*1*4096
nn.Linear(in_features = 7*7*512,out_features = 4096),
nn.ReLU(inplace = True),
# nn.dropout()是为了防止或减轻过拟合而使用的函数,它一般用在全连接层,参数p表示不保留节点数的比例,默认0.5
nn.Dropout(p = 0.5),
# 1*1*4096 --> 1*1*4096
nn.Linear(4096,4096),
nn.ReLU(inplace = True),
nn.Dropout(p = 0.5),
# 1*1*4096 --> 1*1*1000
nn.Linear(4096,1000),
nn.ReLU(inplace = True),
nn.Dropout(p = 0.5),
# 最后的输出要根据你处理的类别数量决定(多分类则输入标签类别数量)
nn.Linear(1000,4)
)
# 定义正向传播函数
def forward(self,x):
x = self.conv_layer(x)
# 利用flatten转换成二维向量,方便全连接层输入
# start_dim = 1表示从第一维度展平到最后一个维度,即[64,512,7,7] --> [64,25088]
x = torch.flatten(x,start_dim = 1)
x = self.fc(x)
return x
将我们的模型进行实例化。
vgg_16 = network().to(device)
vgg_16
2.1 查看模型详情
利用torchsummary进行查看。
summary(vgg_16,input_size = (3,224,224))
3.训练VGG-16模型(函数风格)
笔者比较喜欢用函数风格训练模型,所以这里就采用函数风格吧。
# 导入acc指标
from sklearn.metrics import accuracy_score
model = vgg_16
# 损失函数(多分类使用CrossEntropyLoss)
model.loss_func = nn.CrossEntropyLoss()
# 实现SGD算法,第一个参数是输入需要优化的参数,第二个是学习率,第三个是动量,大致就是借助上一次导数结果,加快收敛速度
model.optimizer = torch.optim.SGD(model.parameters(),lr = 0.001,momentum = 0.8)
# model.optimizer = torch.optim.Adam(model.parameters(), lr= 1e-4)
# 评价指标函数recall
def acc(y_pred,y_true):
# nn.Softmax(dim=1):让行之和为1
# torch.argmax(dim=1):返回每一行最大值的index
y_pred_cls = torch.argmax(nn.Softmax(dim=1)(y_pred),dim=1)
# 此处解决该问题:can't convert cuda:0 device type tensor to numpy.
y_pred_cls = y_pred_cls.data.cpu().numpy()
y_true = y_true.data.cpu().numpy()
return accuracy_score(y_true,y_pred_cls)
model.metric_func = acc
# 评价指标名称
model.metric_name = 'acc'
3.1 定义训练函数
# 定义训练函数
def train(model,features,labels):
# 训练模型
model.train()
# 梯度清零
model.optimizer.zero_grad()
# 此处主要是解决该问题:Input type (torch.FloatTensor) and weight type (torch.cuda.FloatTensor) should be the same or input should be a MKLDNN tensor and weight is a dense tensor
features = features.to(device)
labels = labels.to(device)
# 正向传播求损失
predictions = model(features)
loss = model.loss_func(predictions,labels)
# 评价指标计算
metric = model.metric_func(predictions,labels)
# 反向传播求梯度
loss.backward()
# 每一步自动更新
model.optimizer.step()
return loss.item(),metric.item()
features,labels = next(iter(dl_train))
train(model,features,labels)
3.2 定义验证函数
# 定义验证函数
@torch.no_grad()
def valid(model,features,labels):
# 预测模式
model.eval()
features = features.to(device)
labels = labels.to(device)
predictions = model(features)
loss = model.loss_func(predictions,labels)
metric = model.metric_func(predictions,labels)
return loss.item(),metric.item()
3.3 正式训练
# 参数:神经网络模型,训练次数,训练集,验证集,日志记录频率
def vgg_train(model,epochs,dl_train,dl_test,log_step_freq):
metric_name = model.metric_name
# 构建datafram表,用于记录训练过程中的评估指标
metirc_history = pd.DataFrame(columns = ['epoch','loss',metric_name,'val_loss','val_'+metric_name])
print('开始训练,请稍后~')
for epoch in tqdm(range(1,epochs+1)):
# 1.训练循环-------------------------------------------
# 训练集损失
loss_sum = 0.0
# 训练集评估指标
metric_sum = 0.0
step = 1
# # 从指定索引1开始枚举
for step,(features,labels) in enumerate(dl_train,1):
loss,metric = train(model,features,labels)`# 训练次数
epochs = 20
# 日志输出频率
log_step_freq = 100
historydf = vgg_train(model,epochs,dl_train,dl_test,log_step_freq)`
# 打印batch日志
loss_sum += loss
metric_sum += metric
if step % log_step_freq == 0:
print(f'[step = {step}] loss:{loss_sum/step},{metric_name}:{metric_sum/step}')
# 2.验证循环-------------------------------------------
val_loss_sum = 0.0
val_metric_sum = 0.0
val_step = 1
for val_step, (features,labels) in enumerate(dl_test, 1):
val_loss,val_metric = valid(model,features,labels)
val_loss_sum += val_loss
val_metric_sum += val_metric
# 3.记录日志-------------------------------------------------
info = (epoch, loss_sum/step, metric_sum/step, val_loss_sum/val_step, val_metric_sum/val_step)
metirc_history.loc[epoch-1] = info
# 打印epoch日志
print(f'epoch = {info[0]},loss = {info[1]},{metric_name} = {info[2]},val_loss = {info[3]},val_+{metric_name} = {info[4]}')
print('训练成功~')
return metirc_history
4.评估模型
# 定义绘图函数
def line_plotling(df,metric):
import seaborn as sns
import matplotlib.pyplot as plt
# 设置绘图主题
custom_params = {'axes.spines.right':True,'axes.spines.top':True}
sns.set_theme(style = 'ticks',rc = custom_params)
# 散点图
sns.lineplot(x = 'epoch',y = metric,data = df,color = 'r',markers = True,dashes = False)
sns.lineplot(x = 'epoch',y = 'val_'+ metric,data = df,color = 'b',markers = True,dashes = False)
# 增加图例
plt.legend(['train_'+metric,'val_'+metric])
# 查看CrossEntropyLoss损失变化
line_plotling(historydf,'loss')
# 查看recall
line_plotling(historydf,'acc')
5.指定图片进行预测
def predict_pic(image_path,model,transform):
img = Image.open(image_path).convert('RGB')
# 展示待预测的图片
plt.imshow(img)
# 图片尺寸转换
img = transform(img)
print(img.shape)
# 将输入增加一个维度
pred_img = img.to(device).unsqueeze(0)
print(pred_img.shape)
# 预测模式
model.eval()
result = model(pred_img)
# torch.max(a,1) 返回每一行中最大值的那个元素,且返回其索引
_,pred = torch.max(result,1)
print(f'预测结果为:{pred}')
# 实际结果(读取存储结果的文件))
df = pd.read_csv(r'/home/mw/input/pneumonia_ct7356/dataset-CT/dataset/test/test.csv')
# 因为会选取测试集的图片进行预测,所以根目录这里选为测试集路径
root_path = '/home/mw/input/pneumonia_ct7356/dataset-CT/dataset/test/'
# 将img_path转换为文件中可以查找的路径
df_path = image_path.replace(root_path,'')
label = df[df['image_path'] == df_path]['labels'].to_string(index=False)
print(type(label))
print(f'实际结果为:{label}')
# 随便挑一张图片
image_path = r'/home/mw/input/pneumonia_ct7356/dataset-CT/dataset/test/images/00000231_007.png'
predict_pic(image_path,model,pic_transform)
6.保存训练好的模型
通过保存参数的方式保存模型
print(model.state_dict().keys())
# 保存参数模型
if not os.path.exists('model'):
os.makedirs('model')
torch.save(model.state_dict(),r'/home/mw/project/model/vgg_16.pkl')
# 若需要完整数据集以及代码
https://mbd.pub/o/bread/aJWVm5lp