深度学习的PyTorch基础
import torch
from torch import nn
from torch.nn import functional as F
一、模型构造
net = nn.Sequential(nn.Linear(20, 256), nn.ReLU(),nn. Linear(256, 10)) # 按序生成神经网络
# net 构造为 第一层将 20 列 映射为 256 列, 第二层进行非线性处理(将负值置为0), 第三层将 256 列 映射处理为 10 列
# Linear 方法会自动初始化各层参数:权重和偏移
X = torch.rand(2, 20)
net(X)
X
tensor([[0.2054, 0.8184, 0.9979, 0.9119, 0.3989, 0.2033, 0.6710, 0.9759, 0.5752,
0.0476, 0.4391, 0.7127, 0.1686, 0.6571, 0.7262, 0.0141, 0.3670, 0.9285,
0.1190, 0.7207],
[0.4501, 0.2882, 0.6584, 0.8790, 0.9895, 0.8160, 0.7391, 0.0306, 0.1681,
0.6919, 0.3837, 0.9131, 0.8668, 0.7399, 0.4439, 0.7896, 0.6411, 0.3337,
0.2536, 0.4702]])
1. 自定义块
# 实现和上述方法相同的类定义,重写父类的初始方法,加入将nn.Linear(20, 256)加入隐藏层
# 输出层则置为 nn.Linear(256, 10)
# forward内定义了预处理方法,F内实现了大量的常用函数
class MLP(nn.Module):
def __init__(self):
super().__init__()
self.hidden = nn.Linear(20, 256)
self.out = nn.Linear(256, 10)
def forward(self, X):
return self.out(F.relu(self.hidden(X)))
net = MLP() # 使用前面定义的类来定义nn实例
net(X)
X
tensor([[0.2054, 0.8184, 0.9979, 0.9119, 0.3989, 0.2033, 0.6710, 0.9759, 0.5752,
0.0476, 0.4391, 0.7127, 0.1686, 0.6571, 0.7262, 0.0141, 0.3670, 0.9285,
0.1190, 0.7207],
[0.4501, 0.2882, 0.6584, 0.8790, 0.9895, 0.8160, 0.7391, 0.0306, 0.1681,
0.6919, 0.3837, 0.9131, 0.8668, 0.7399, 0.4439, 0.7896, 0.6411, 0.3337,
0.2536, 0.4702]])
2. 自定义顺序块
class MySequential(nn.Module):
def __init__(self, *args):
super().__init__()
# 将传入的初始方法都装载进来,用于实例初始化
for block in args:
self._modules[block] = block
def forward(self, X):
# 对实例传入的参数,进行预处理,用于方法调用
for block in self._modules.values():
X = block(X)
return X
net = MySequential(nn.Linear(20, 256), nn.ReLU(),nn. Linear(256, 10))
net(X)
X
tensor([[0.2054, 0.8184, 0.9979, 0.9119, 0.3989, 0.2033, 0.6710, 0.9759, 0.5752,
0.0476, 0.4391, 0.7127, 0.1686, 0.6571, 0.7262, 0.0141, 0.3670, 0.9285,
0.1190, 0.7207],
[0.4501, 0.2882, 0.6584, 0.8790, 0.9895, 0.8160, 0.7391, 0.0306, 0.1681,
0.6919, 0.3837, 0.9131, 0.8668, 0.7399, 0.4439, 0.7896, 0.6411, 0.3337,
0.2536, 0.4702]])
3. 在正向传播函数中执行代码
便于在init和forward函数里面进行大量的自定义计算
class FixedHiddenMLP(nn.Module):
def __init__(self):
super().__init__()
self.rand_weight = torch.rand((20, 20), requires_grad = False)
self.linear = nn.Linear(20, 20)
def forward(self, X):
X = self.linear(X)
X = F.relu(torch.mm(X, self.rand_weight) + 1)
X = self.linear(X)
while X.abs().sum() > 1:
X /= 2
return X.sum()
net = FixedHiddenMLP()
net(X)
tensor(0.3205, grad_fn=<SumBackward0>)
4. 混合搭配各种组合块的方法
class NestMLP(nn.Module):
def __init__(self):
super().__init__()
self.net = nn.Sequential(nn.Linear(20, 64), nn.ReLU(),
nn.Linear(64, 32), nn.ReLU())
self.linear = nn.Linear(32, 16)
def forward(self, X):
return self.linear(self.net(X))
chimera = nn.Sequential(NestMLP(), nn.Linear(16, 20), FixedHiddenMLP())
chimera(X)
tensor(0.0182, grad_fn=<SumBackward0>)
5. 小结
- 自定义Module需要定义哪些方法?
- _ init _()重载父类方法
- forward加入与定义方法
二、参数管理
首先来看只有单隐藏层的多层感知机
net = nn.Sequential(nn.Linear(4, 8), nn.ReLU(), nn.Linear(8, 1))
X = torch.rand(size = (2, 4))
net(X)
tensor([[0.1571],
[0.0735]], grad_fn=<AddmmBackward0>)
1. 参数访问
输出对应层的参数
# 输出第三层,也就是第二个线性层(输出层)的参数:权值和偏移,这些都是Linear 自动生成的
print(net[2].state_dict())
OrderedDict([('weight', tensor([[-0.2839, 0.2971, -0.2729, 0.1470, -0.2174, -0.1694, 0.0226, 0.2773]])), ('bias', tensor([0.1295]))])
2. 目标参数
print(type(net[2].bias)) #torch.nn.parameter表明 bias 是一个可以被优化的参数
print(net[2].bias)
print(net[2].bias.data)
<class 'torch.nn.parameter.Parameter'>
Parameter containing:
tensor([0.1295], requires_grad=True)
tensor([0.1295])
# 为什么梯度为空?还没有进行反向计算
net[2].weight.grad == None
True
3. 一次性访问所有参数
# 下面的 * 是解包,把序列拆分为多个变量
print(*[(name, param.shape) for name, param in net[0].named_parameters()])
print(*[(name, param.shape) for name, param in net.named_parameters()])
# ReLU是没有参数的,全连接层可以取出参数
('weight', torch.Size([8, 4])) ('bias', torch.Size([8]))
('0.weight', torch.Size([8, 4])) ('0.bias', torch.Size([8])) ('2.weight', torch.Size([1, 8])) ('2.bias', torch.Size([1]))
# named_parameters 方法返回的是一个生成器
print(*[(name, param.shape) for name, param in net[0].named_parameters()])
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
F:\Temp1/ipykernel_10604/3850033904.py in <module>
1 # named_parameters 方法返回的是一个生成器
----> 2 print(*[(name, param.shape) for name, param in net[0].named_parameters()].data)
AttributeError: 'list' object has no attribute 'data'
net.state_dict()['0.weight'].data
tensor([[-0.0831, -0.3560, 0.3638, -0.1269],
[ 0.2467, 0.2860, 0.4703, 0.1994],
[ 0.2613, 0.4495, 0.4926, -0.4293],
[ 0.2690, 0.1472, -0.1898, -0.4751],
[ 0.2996, -0.4090, 0.0557, 0.4479],
[-0.1008, 0.2883, -0.1388, -0.4359],
[-0.0015, -0.2353, -0.1831, -0.3098],
[ 0.2744, 0.3285, -0.3505, 0.2105]])
4. 从嵌套块收集参数
def block1():
return nn.Sequential(nn.Linear(4, 8), nn.ReLU(), nn.Linear(8, 4), nn.ReLU())
def block2():
net = nn.Sequential()
# 利用循环将block1中的四层加入到 net 并返回
for i in range(4):
net.add_module(f'block{i}', block1())
return net
# 将 block2 中的Sequential和 最后一层 Linear 层作为 Sequential 的参数
rgnet = nn.Sequential(block2(), nn.Linear(4, 1))
rgnet(X)
tensor([[0.5266],
[0.5266]], grad_fn=<AddmmBackward0>)
设计好神经网络之后,可以查看它是如何组织的
print(rgnet)
Sequential(
(0): Sequential(
(block0): Sequential(
(0): Linear(in_features=4, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=4, bias=True)
(3): ReLU()
)
(block1): Sequential(
(0): Linear(in_features=4, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=4, bias=True)
(3): ReLU()
)
(block2): Sequential(
(0): Linear(in_features=4, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=4, bias=True)
(3): ReLU()
)
(block3): Sequential(
(0): Linear(in_features=4, out_features=8, bias=True)
(1): ReLU()
(2): Linear(in_features=8, out_features=4, bias=True)
(3): ReLU()
)
)
(1): Linear(in_features=4, out_features=1, bias=True)
)
5. 内置初始化
def init_normal(m):
if type(m) == nn.Linear:
# 对 参数 weight 进行 均值为0,标准差为 0.01 的初始化
# 下划线后置表示会进行替换,而不返回新的 weight 张量
nn.init.normal_(m.weight, mean = 0, std = 0.01)
# 将 bias 张量置零
nn.init.zeros_(m.bias)
# 对net中的所有层都执行此方法
net.apply(init_normal)
# 为何此处的均值不为0?
net[0].weight.data, net[0].bias.data[0]
(tensor([[ 0.0014, 0.0051, -0.0076, 0.0048],
[-0.0051, 0.0081, -0.0030, -0.0091],
[ 0.0060, -0.0030, 0.0060, 0.0086],
[-0.0019, 0.0084, -0.0112, 0.0112],
[ 0.0138, -0.0095, 0.0061, 0.0164],
[ 0.0051, -0.0105, 0.0112, -0.0072],
[-0.0036, 0.0124, 0.0104, -0.0107],
[ 0.0048, 0.0070, -0.0121, 0.0020]]),
tensor(0.))
net[0].weight.data.sum()
tensor(0.0541)
# 均值不为0
mn = nn.Sequential(nn.Linear(1, 2), nn.ReLU(), nn.Linear(2, 1))
mn.apply(init_normal)
mn[2].weight.data, mn[0].weight.data
(tensor([[ 0.0082, -0.0120]]),
tensor([[ 0.0192],
[-0.0005]]))
def init_constant(m):
if type(m) == nn.Linear:
# 将所有全连接层的weight置为1
nn.init.constant_(m.weight, 1)
nn.init.zeros_(m.bias)
net.apply(init_constant)
net[0].weight.data[0], net[0].bias.data[0]
(tensor([1., 1., 1., 1.]), tensor(0.))
6. 对某些块应用不同的初始化方法
def xavier(m):
if type(m) == nn.Linear:
nn.init.xavier_uniform_(m.weight)
def init_42(m):
if type(m) == nn.Linear:
nn.init.constant_(m.weight, 42)
net[0].apply(xavier)
net[2].apply(init_42)
print(net[0].weight.data[0])
print(net[2].weight.data[0])
tensor([-0.6291, -0.4084, -0.3237, 0.6755])
tensor([42., 42., 42., 42., 42., 42., 42., 42.])
7. 自定义初始化
def my_init(m):
if type(m) == nn.Linear:
print(
"init",
*[(name, param.shape) for name, param in m.named_parameters()][0])
nn.init.uniform_(m.weight, -10, 10)
m.weight.data *= m.weight.data.abs() >= 5
net.apply(my_init)
net[0].weight[:2]
init weight torch.Size([8, 4])
init weight torch.Size([1, 8])
tensor([[ 0.0000, -7.9767, -8.0924, -7.6980],
[-8.0685, -0.0000, -7.6175, 0.0000]], grad_fn=<SliceBackward0>)
net[0].weight.data[:] += 1 # 阵列加法
net[0].weight.data[0, 0] = 42 # 单个赋值
net[0].weight.data[0]
tensor([42.0000, -6.9767, -7.0924, -6.6980])
8. 参数绑定
shared = nn.Linear(8, 8)
# 让第二、第三个全连接层指向同一层,则无论net怎么更新,第二三层都会共享此层
net = nn.Sequential(nn.Linear(4, 8), nn.ReLU(), shared, nn.ReLU(), shared,
nn.ReLU(), nn.Linear(8, 1))
net(X)
print(net[2].weight.data[0] == net[4].weight.data[0])
# 此处等效于 net[4].weight.data[0, 0] = 100 、 shared.weight.data[0. 0]
net[2].weight.data[0, 0] = 100
print(net[2].weight.data[0] == net[4].weight.data[0])
tensor([True, True, True, True, True, True, True, True])
tensor([True, True, True, True, True, True, True, True])
三、自定义层
自定义层和自定义网络并无太大差别。
1. 构造一个没有任何参数的自定义层
class CenteredLayer(nn.Module): # 层 也是 Module 的subclass
def __init__(self):
super().__init__()
def forward(self, X):
return X - X.mean()
layer = CenteredLayer()
layer(torch.FloatTensor([1, 2, 3, 4, 5]))
tensor([-2., -1., 0., 1., 2.])
2. 将层作为组件合并到构建更复杂的模型中
net = nn.Sequential(nn.Linear(8, 128), CenteredLayer()) # 传入自定义层的实例
Y = net(torch.rand(4, 8))
Y.mean() # 将所有传入的参数进行求均值
tensor(2.7940e-09, grad_fn=<MeanBackward0>)
3. 带参数的图层
class MyLinear(nn.Module):
def __init__(self, in_units, units):
super().__init__()
# randn 是正态分布
self.weight = nn.Parameter(torch.randn(in_units, units))
# 括号内加 , 是为了区分元组(num,)和普通的(num)
# bias的个数和列数保持一致,每层都是以列为单位进行数据处理的
self.bias = nn.Parameter(torch.randn(units,))
def forward(self, X):
# matmul矩阵乘法
linear = torch.matmul(X, self.weight.data) + self.bias.data
return F.relu(linear)
dense = MyLinear(5, 3)
dense.weight
Parameter containing:
tensor([[ 0.3066, -0.0803, -1.3319],
[ 1.3973, 0.0083, 0.8268],
[-2.4014, -3.3563, 0.7567],
[-0.0061, -0.8527, -0.1337],
[ 0.7235, 2.4853, 0.6930]], requires_grad=True)
4. 使用自定义层直接执行正向传播计算
dense(torch.rand(2, 5))
tensor([[1.5130, 0.8024, 1.4486],
[0.7055, 0.7942, 0.8594]])
5. 使用自定义层构建模型
net = nn.Sequential(MyLinear(64, 8), MyLinear(8, 1))
net(torch.rand(2, 64))
tensor([[0.],
[0.]])
四、读写文件
1. 加载或保存张量
# 写张量
x_w = torch.arange(4)
torch.save(x_w,'x-file') # 二进制类型
x_w
tensor([0, 1, 2, 3])
# 读张量
x_r = torch.load('x-file')
x_r
tensor([0, 1, 2, 3])
2. 存储一个张量列表,然后把它们读回内存
y = torch.zeros(4)
torch.save([x, y], 'x-files')
x2, y2 = torch.load('x-files')
(x2, y2)
(tensor([0, 1, 2, 3]), tensor([0., 0., 0., 0.]))
3. 写入或读取从字符串映射到张量的字典
mydict_w = {'x':x, 'y':y}
torch.save(mydict_w, 'mydict')
mydict_r = torch.load('mydict')
mydict_r
{'x': tensor([0, 1, 2, 3]), 'y': tensor([0., 0., 0., 0.])}
4. 加载和保存模型参数
class MLP(nn.Module):
def __init__(self):
super().__init__()
self.hidden = nn.Linear(20, 256)
self.output = nn.Linear(256, 10)
def forward(self, x):
return self.output(F.relu(self.hidden(x)))
net = MLP()
# 生成参数
X = torch.randn(size = (2, 20))
# 计算神经网络的结果
Y = net(X)
Y
tensor([[-0.3682, -0.4598, 0.2436, -0.4754, 0.1927, -0.2204, 0.0149, -0.2386,
0.0807, -0.8114],
[-0.0472, -0.0460, -0.0148, 0.0373, 0.2986, -0.3682, 0.1478, 0.4753,
0.1156, -0.1635]], grad_fn=<AddmmBackward0>)
# 将模型的参数字典存储为一个叫做'mlp.params'的文件,只存储了参数而没有存储结构
torch.save(net.state_dict(), 'mlp.params')
-
eval是Python的一个内置函数,功能十分强大,这个函数的作用是,返回传入字符串的表达式的结果。就是说:将字符串当成有效的表达式 来求值 并 返回计算结果。
-
eval函数就是实现list、dict、tuple与str之间的转化,同样str函数把list,dict,tuple转为为字符串
# 实例化了原始多层感知机模型的一个备份。直接读取文件中存储的参数
clone = MLP() # 因为没有存储结构,所以要将clone先进行结构初始化
clone.load_state_dict(torch.load('mlp.params')) # 填入参数
clone.eval()
MLP(
(hidden): Linear(in_features=20, out_features=256, bias=True)
(output): Linear(in_features=256, out_features=10, bias=True)
)
Y_clone = clone(X)
Y_clone == Y
tensor([[True, True, True, True, True, True, True, True, True, True],
[True, True, True, True, True, True, True, True, True, True]])