本文已参与「新人创作礼」活动,一起开启掘金创作之路。
作为深度学习的入门,手写数字识别是基础中的基础,希望看到的读者能够深入的去掌握他,这里仅仅介绍代码实现,具体的卷积网络原理这里就不做深入的介绍,当然手写数字识别的代码类型风格很多,但是只要掌握本质其实都差不多。好了,废话补多少,开始吧。
1、导入相关的包
import torch.nn as nn # 模型包
from torchvision import datasets, transforms # 数据加载包
import torch.optim as optim # 优化器包
import torch.utils.data # 加载数据需要用到的包
2、数据划分并介绍
# 训练集
train_loader = torch.utils.data.DataLoader(
datasets.MNIST("../MNIST", train=True, download=False, transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))
])), batch_size=batchSzie, shuffle=True)
# 测试集
test_loader = torch.utils.data.DataLoader(datasets.MNIST("../MNIST", train=False, transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))
])), batch_size=batchSzie, shuffle=True)
这里做个简单的介绍,可能有人对这里面的参数不是很了解 我们看下DataLoader的参数列表
"""
dataset (Dataset): dataset from which to load the data.
batch_size (int, optional): how many samples per batch to load
(default: ``1``).
shuffle (bool, optional): set to ``True`` to have the data reshuffled
at every epoch (default: ``False``).
"""
def __init__(self, dataset: Dataset[T_co], batch_size: Optional[int] = 1,
shuffle: bool = False, sampler: Union[Sampler, Iterable, None] = None,
batch_sampler: Union[Sampler[Sequence], Iterable[Sequence], None] = None,
num_workers: int = 0, collate_fn: Optional[_collate_fn_t] = None,
pin_memory: bool = False, drop_last: bool = False,
timeout: float = 0, worker_init_fn: Optional[_worker_init_fn_t] = None,
multiprocessing_context=None, generator=None,
*, prefetch_factor: int = 2,
persistent_workers: bool = False):
torch._C._log_api_usage_once("python.data_loader")
可以看到第一个参数dataset是数据集,我们这里使用的是MNIST提供的数据集,因此我们使用
"""
def __init__(
self,
root: str,
train: bool = True,
transform: Optional[Callable] = None,
target_transform: Optional[Callable] = None,
download: bool = False,
) -> None:
"""
datasets.MNIST("../MNIST", train=True, download=False, transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))
]))
那么在这个MNIST里面也有几个参数
- 第一个是目录
- 第二个是训练集或者是测试集的标志
- 第三个是是否下载数据集的标志,我们这个数据集是可以直接下载的,不需要我们手动去官网下载
- 第四个torchvision.transforms是pytorch中的图像预处理包。
一般用Compose把多个步骤整合到一起,如上面所示
- 第一个是转化成Tensor类型,
- 第二个是标准化的操作
当然整理还可以加很多,比如数据增强等等操作。后面两个参数,batchSize很简单就是批量大小,shuffle表示是否打乱。 测试集和训练集写法是一样的,只是download值不一样
3、创建网络模型
class MLP(nn.Module):
def __init__(self):
super(MLP, self).__init__()
self.model = nn.Sequential(
# MNIST的数据集是28*28 = 784, [batchSize,28*28] -> [batchSize,200]
nn.Linear(784,200),
# LeakyReLU 激活函数读者可以查下,RelU用的也比较多
nn.LeakyReLU(inplace=True),
# [batchSize, 200] -> [batchSize, 200]
nn.Linear(200,200),
nn.LeakyReLU(inplace=True),
# [batchSize, 200] -> [batchSize, 10]
nn.Linear(200,10),
nn.LeakyReLU(inplace=True)
)
# 这个函数名一定要注意,不要写错,小编就写错过很多次,导致程序报错
def forward(self,x):
x = self.model(x)
return x
这里的网络模型,我在注释上已经写的很清楚了,这里面写的很简单,网络结构也很简单,就是一些简单的转换,这里我们看不到卷积核等属性,全部都封装到了Linner这里面来了,我们点进去看看
class Linear(Module):
r"""Applies a linear transformation to the incoming data: :math:`y = xA^T + b`
This module supports :ref:`TensorFloat32<tf32_on_ampere>`.
Args:
in_features: size of each input sample
out_features: size of each output sample
bias: If set to ``False``, the layer will not learn an additive bias.
Default: ``True``
Shape:
- Input: :math:`(*, H_{in})` where :math:`*` means any number of
dimensions including none and :math:`H_{in} = \text{in_features}`.
- Output: :math:`(*, H_{out})` where all but the last dimension
are the same shape as the input and :math:`H_{out} = \text{out_features}`.
Attributes:
weight: the learnable weights of the module of shape
:math:`(\text{out_features}, \text{in_features})`. The values are
initialized from :math:`\mathcal{U}(-\sqrt{k}, \sqrt{k})`, where
:math:`k = \frac{1}{\text{in_features}}`
bias: the learnable bias of the module of shape :math:`(\text{out_features})`.
If :attr:`bias` is ``True``, the values are initialized from
:math:`\mathcal{U}(-\sqrt{k}, \sqrt{k})` where
:math:`k = \frac{1}{\text{in_features}}`
Examples::
>>> m = nn.Linear(20, 30)
>>> input = torch.randn(128, 20)
>>> output = m(input)
>>> print(output.size())
torch.Size([128, 30])
"""
__constants__ = ['in_features', 'out_features']
in_features: int
out_features: int
weight: Tensor
def __init__(self, in_features: int, out_features: int, bias: bool = True,
device=None, dtype=None) -> None:
factory_kwargs = {'device': device, 'dtype': dtype}
super(Linear, self).__init__()
self.in_features = in_features
self.out_features = out_features
self.weight = Parameter(torch.empty((out_features, in_features), **factory_kwargs))
if bias:
self.bias = Parameter(torch.empty(out_features, **factory_kwargs))
else:
self.register_parameter('bias', None)
self.reset_parameters()
def reset_parameters(self) -> None:
# Setting a=sqrt(5) in kaiming_uniform is the same as initializing with
# uniform(-1/sqrt(in_features), 1/sqrt(in_features)). For details, see
# https://github.com/pytorch/pytorch/issues/57109
init.kaiming_uniform_(self.weight, a=math.sqrt(5))
if self.bias is not None:
fan_in, _ = init._calculate_fan_in_and_fan_out(self.weight)
bound = 1 / math.sqrt(fan_in) if fan_in > 0 else 0
init.uniform_(self.bias, -bound, bound)
def forward(self, input: Tensor) -> Tensor:
return F.linear(input, self.weight, self.bias)
def extra_repr(self) -> str:
return 'in_features={}, out_features={}, bias={}'.format(
self.in_features, self.out_features, self.bias is not None
)
读者可以简单看下上面的注释,这里使用了何凯明同学,参数初始化的操作,提供了输入输出维度的转换。
4、训练前的相关实例
batchSzie = 64 # 批量数据处理的大小,这个可以调整看机器性能的高低配置
learningRate = 0.01 # 学习率,这个数值一般比较小
epoch = 10 # 训练的轮次
# 创建模型实例
net = MLP()
# 创建优化器,需要把模型的参数传递进去
optimizer = optim.SGD(net.parameters(),lr=learningRate)
# 使用交叉熵损失函数
criterion = nn.CrossEntropyLoss()
# 全局步长
global_step = 0
# 全局测试步长
global_test_step = 0
5、开始训练
for epoch in range(epoch):
# 训练阶段
# 从训练集中拿到数据
for batchIdx,(x,lable) in enumerate(train_loader):
# 数据形状改变 -1 表示适配,这里 -1 也可以用batchSize表示
data = x.view(-1,28*28)
# 将数据传到神经网络模型当中去,得到预测结果
logits = net(data)
# 计算其损失
loss = criterion(logits,lable)
# 梯度清零
optimizer.zero_grad()
# 反向传播
loss.backward()
# 更新
optimizer.step()
# 没100步打印出来一个计算的过程
if batchIdx % 100 == 0:
print("Train Epoch:{} [{}/{} ({:.0f}%)] \t loss: {:.6f}".format(epoch, batchIdx * len(data),
len(train_loader.dataset),
100. * batchIdx / len(train_loader),
loss.item()))
global_step+=1
# 测试阶段
test_loss = 0
correct = 0
# 测试阶段获取数据
for data,target in test_loader:
# 同上
data = data.view(-1,28*28)
# 将数据送到上面训练过的网络模型当中去
test_logists = net(data)
# 计算损失
test_loss += criterion(test_logists,target).item()
# 获取预测的标签
pred = test_logists.data.max(1)[1]
#统计正确的个数
correct += pred.eq(target.data).sum()
# 测试步数,这里的步数可以用于 Visidom显示到界面上
global_test_step +=1
# 计算平均损失
test_loss /= len(test_loader.dataset)
# 打印结果
print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.
format(test_loss, correct, len(test_loader.dataset), 100. * correct / len(test_loader.dataset)))
具体每一步我在上面做了很细的注释
5、训练的结果
Train Epoch:0 [0/60000 (0%)] loss: 2.304667
Train Epoch:0 [6400/60000 (11%)] loss: 1.914003
Train Epoch:0 [12800/60000 (21%)] loss: 1.125605
Train Epoch:0 [19200/60000 (32%)] loss: 0.848263
Train Epoch:0 [25600/60000 (43%)] loss: 0.497234
Train Epoch:0 [32000/60000 (53%)] loss: 0.477975
Train Epoch:0 [38400/60000 (64%)] loss: 0.376689
Train Epoch:0 [44800/60000 (75%)] loss: 0.350738
Train Epoch:0 [51200/60000 (85%)] loss: 0.345194
Train Epoch:0 [57600/60000 (96%)] loss: 0.461904
Test set: Average loss: 0.0053, Accuracy: 9042/10000 (90%)
...
...
...
Train Epoch:9 [0/60000 (0%)] loss: 0.082586
Train Epoch:9 [6400/60000 (11%)] loss: 0.065986
Train Epoch:9 [12800/60000 (21%)] loss: 0.195389
Train Epoch:9 [19200/60000 (32%)] loss: 0.087784
Train Epoch:9 [25600/60000 (43%)] loss: 0.106803
Train Epoch:9 [32000/60000 (53%)] loss: 0.165314
Train Epoch:9 [38400/60000 (64%)] loss: 0.169176
Train Epoch:9 [44800/60000 (75%)] loss: 0.159838
Train Epoch:9 [51200/60000 (85%)] loss: 0.195588
Train Epoch:9 [57600/60000 (96%)] loss: 0.024768
Test set: Average loss: 0.0018, Accuracy: 9666/10000 (97%)
Process finished with exit code 0
可以看到经过简单的10轮训练,识别精度就已经达到了 97%
6、完整代码
# 手写数字识别的Demo
# 1、导入相关的包
import torch.nn as nn # 模型包
from torchvision import datasets, transforms # 数据加载包
import torch.optim as optim # 优化器包
import torch.utils.data # 加载数据需要用到的包
# 2、设置一些基本参数
batchSzie = 64 # 批量数据处理的大小,这个可以调整看机器性能的高低配置
learningRate = 0.01 # 学习率,这个数值一般比较小
epoch = 10 # 训练的轮次
# 3、数据集划分
# 训练集
train_loader = torch.utils.data.DataLoader(
datasets.MNIST("./MNIST", train=True, download=False, transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))
])), batch_size=batchSzie, shuffle=True)
# 测试集
test_loader = torch.utils.data.DataLoader(
datasets.MNIST("./MNIST", train=False, download=False, transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))
])), batch_size=batchSzie, shuffle=True)
# 4、创建网络模型
class MLP(nn.Module):
def __init__(self):
super(MLP, self).__init__()
self.model = nn.Sequential(
# MNIST的数据集是28*28 = 784, [batchSize,28*28] -> [batchSize,200]
nn.Linear(784, 200),
# LeakyReLU 激活函数读者可以查下,RelU用的也比较多
nn.LeakyReLU(inplace=True),
# [batchSize, 200] -> [batchSize, 200]
nn.Linear(200, 200),
nn.LeakyReLU(inplace=True),
# [batchSize, 200] -> [batchSize, 10]
nn.Linear(200, 10),
nn.LeakyReLU(inplace=True)
)
# 这个函数名一定要注意,不要写错,小编就写错过很多次,导致程序报错
def forward(self, x):
x = self.model(x)
return x
# 5、训练前的准备
# 创建模型实例
net = MLP()
# 创建优化器,需要把模型的参数传递进去
optimizer = optim.SGD(net.parameters(), lr=learningRate)
# 使用交叉熵损失函数
criterion = nn.CrossEntropyLoss()
# 全局步长
global_step = 0
# 全局测试步长
global_test_step = 0
# 6、 开始训练
for epoch in range(epoch):
# 训练阶段
# 从训练集中拿到数据
for batchIdx, (x, lable) in enumerate(train_loader):
# 数据形状改变 -1 表示适配,这里 -1 也可以用batchSize表示
data = x.view(-1, 28 * 28)
# 将数据传到神经网络模型当中去,得到预测结果
logits = net(data)
# 计算其损失
loss = criterion(logits, lable)
# 梯度清零
optimizer.zero_grad()
# 反向传播
loss.backward()
# 更新
optimizer.step()
# 没100步打印出来一个计算的过程
if batchIdx % 100 == 0:
print("Train Epoch:{} [{}/{} ({:.0f}%)] \t loss: {:.6f}".format(epoch, batchIdx * len(data),
len(train_loader.dataset),
100. * batchIdx / len(train_loader),
loss.item()))
global_step += 1
# 测试阶段
test_loss = 0
correct = 0
# 测试阶段获取数据
for data, target in test_loader:
# 同上
data = data.view(-1, 28 * 28)
# 将数据送到上面训练过的网络模型当中去
test_logists = net(data)
# 计算损失
test_loss += criterion(test_logists, target).item()
# 获取预测的标签
pred = test_logists.data.max(1)[1]
# 统计正确的个数
correct += pred.eq(target.data).sum()
# 测试步数,这里的步数可以用于 Visidom显示到界面上
global_test_step += 1
# 计算平均损失
test_loss /= len(test_loader.dataset)
# 打印结果
print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.
format(test_loss, correct, len(test_loader.dataset), 100. * correct / len(test_loader.dataset)))