PyTorch Basis

153 阅读7分钟

深度学习的PyTorch基础

import torch
from torch import nn
from torch.nn import functional as F

一、模型构造

net = nn.Sequential(nn.Linear(20, 256), nn.ReLU(),nn. Linear(256, 10)) # 按序生成神经网络

# net 构造为 第一层将 20 列 映射为 256 列, 第二层进行非线性处理(将负值置为0), 第三层将 256 列 映射处理为 10 列 
# Linear 方法会自动初始化各层参数:权重和偏移
X = torch.rand(2, 20)
net(X)
X
tensor([[0.2054, 0.8184, 0.9979, 0.9119, 0.3989, 0.2033, 0.6710, 0.9759, 0.5752,
         0.0476, 0.4391, 0.7127, 0.1686, 0.6571, 0.7262, 0.0141, 0.3670, 0.9285,
         0.1190, 0.7207],
        [0.4501, 0.2882, 0.6584, 0.8790, 0.9895, 0.8160, 0.7391, 0.0306, 0.1681,
         0.6919, 0.3837, 0.9131, 0.8668, 0.7399, 0.4439, 0.7896, 0.6411, 0.3337,
         0.2536, 0.4702]])

1. 自定义块

# 实现和上述方法相同的类定义,重写父类的初始方法,加入将nn.Linear(20, 256)加入隐藏层
# 输出层则置为 nn.Linear(256, 10)
# forward内定义了预处理方法,F内实现了大量的常用函数
class MLP(nn.Module): 
    def __init__(self):
        super().__init__()
        self.hidden = nn.Linear(20, 256)
        self.out = nn.Linear(256, 10)
    
    def forward(self, X):
        return self.out(F.relu(self.hidden(X)))
net = MLP() # 使用前面定义的类来定义nn实例
net(X)
X
tensor([[0.2054, 0.8184, 0.9979, 0.9119, 0.3989, 0.2033, 0.6710, 0.9759, 0.5752,
         0.0476, 0.4391, 0.7127, 0.1686, 0.6571, 0.7262, 0.0141, 0.3670, 0.9285,
         0.1190, 0.7207],
        [0.4501, 0.2882, 0.6584, 0.8790, 0.9895, 0.8160, 0.7391, 0.0306, 0.1681,
         0.6919, 0.3837, 0.9131, 0.8668, 0.7399, 0.4439, 0.7896, 0.6411, 0.3337,
         0.2536, 0.4702]])

2. 自定义顺序块

class MySequential(nn.Module):
    def __init__(self, *args):
        super().__init__()
        # 将传入的初始方法都装载进来,用于实例初始化
        for block in args:
            self._modules[block] = block
            
    def forward(self, X):
        # 对实例传入的参数,进行预处理,用于方法调用
        for block in self._modules.values():
            X = block(X)
        return X
    
net = MySequential(nn.Linear(20, 256), nn.ReLU(),nn. Linear(256, 10))
net(X)
X
tensor([[0.2054, 0.8184, 0.9979, 0.9119, 0.3989, 0.2033, 0.6710, 0.9759, 0.5752,
         0.0476, 0.4391, 0.7127, 0.1686, 0.6571, 0.7262, 0.0141, 0.3670, 0.9285,
         0.1190, 0.7207],
        [0.4501, 0.2882, 0.6584, 0.8790, 0.9895, 0.8160, 0.7391, 0.0306, 0.1681,
         0.6919, 0.3837, 0.9131, 0.8668, 0.7399, 0.4439, 0.7896, 0.6411, 0.3337,
         0.2536, 0.4702]])

3. 在正向传播函数中执行代码

便于在init和forward函数里面进行大量的自定义计算

class FixedHiddenMLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.rand_weight = torch.rand((20, 20), requires_grad = False)
        self.linear = nn.Linear(20, 20)
            
    def forward(self, X):
        X = self.linear(X)
        X = F.relu(torch.mm(X, self.rand_weight) + 1)
        X = self.linear(X)
        while X.abs().sum() > 1:
            X /= 2
        return X.sum()
    
net = FixedHiddenMLP()
net(X)
tensor(0.3205, grad_fn=<SumBackward0>)

4. 混合搭配各种组合块的方法

class NestMLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(nn.Linear(20, 64), nn.ReLU(),
                                nn.Linear(64, 32), nn.ReLU())
        self.linear = nn.Linear(32, 16)
    def forward(self, X):
        return self.linear(self.net(X))
    
chimera = nn.Sequential(NestMLP(), nn.Linear(16, 20), FixedHiddenMLP())
chimera(X)
tensor(0.0182, grad_fn=<SumBackward0>)

5. 小结

  • 自定义Module需要定义哪些方法?
    1. _ init _()重载父类方法
    2. forward加入与定义方法

二、参数管理

首先来看只有单隐藏层的多层感知机

net = nn.Sequential(nn.Linear(4, 8), nn.ReLU(), nn.Linear(8, 1))
X = torch.rand(size = (2, 4))
net(X)
tensor([[0.1571],
        [0.0735]], grad_fn=<AddmmBackward0>)

1. 参数访问

输出对应层的参数

# 输出第三层,也就是第二个线性层(输出层)的参数:权值和偏移,这些都是Linear 自动生成的
print(net[2].state_dict()) 
OrderedDict([('weight', tensor([[-0.2839,  0.2971, -0.2729,  0.1470, -0.2174, -0.1694,  0.0226,  0.2773]])), ('bias', tensor([0.1295]))])

2. 目标参数

print(type(net[2].bias)) #torch.nn.parameter表明 bias 是一个可以被优化的参数
print(net[2].bias) 
print(net[2].bias.data)
<class 'torch.nn.parameter.Parameter'>
Parameter containing:
tensor([0.1295], requires_grad=True)
tensor([0.1295])
# 为什么梯度为空?还没有进行反向计算
net[2].weight.grad == None
True

3. 一次性访问所有参数

# 下面的 * 是解包,把序列拆分为多个变量
print(*[(name, param.shape) for name, param in net[0].named_parameters()])
print(*[(name, param.shape) for name, param in net.named_parameters()]) 
# ReLU是没有参数的,全连接层可以取出参数
('weight', torch.Size([8, 4])) ('bias', torch.Size([8]))
('0.weight', torch.Size([8, 4])) ('0.bias', torch.Size([8])) ('2.weight', torch.Size([1, 8])) ('2.bias', torch.Size([1]))
# named_parameters 方法返回的是一个生成器
print(*[(name, param.shape) for name, param in net[0].named_parameters()])
---------------------------------------------------------------------------

AttributeError                            Traceback (most recent call last)

F:\Temp1/ipykernel_10604/3850033904.py in <module>
      1 # named_parameters 方法返回的是一个生成器
----> 2 print(*[(name, param.shape) for name, param in net[0].named_parameters()].data)


AttributeError: 'list' object has no attribute 'data'
net.state_dict()['0.weight'].data
tensor([[-0.0831, -0.3560,  0.3638, -0.1269],
        [ 0.2467,  0.2860,  0.4703,  0.1994],
        [ 0.2613,  0.4495,  0.4926, -0.4293],
        [ 0.2690,  0.1472, -0.1898, -0.4751],
        [ 0.2996, -0.4090,  0.0557,  0.4479],
        [-0.1008,  0.2883, -0.1388, -0.4359],
        [-0.0015, -0.2353, -0.1831, -0.3098],
        [ 0.2744,  0.3285, -0.3505,  0.2105]])

4. 从嵌套块收集参数

def block1():
    return nn.Sequential(nn.Linear(4, 8), nn.ReLU(), nn.Linear(8, 4), nn.ReLU())
def block2():
    net = nn.Sequential()
    # 利用循环将block1中的四层加入到 net 并返回
    for i in range(4):
        net.add_module(f'block{i}', block1())
    return net
# 将 block2 中的Sequential和 最后一层 Linear 层作为 Sequential 的参数
rgnet = nn.Sequential(block2(), nn.Linear(4, 1))
rgnet(X)
tensor([[0.5266],
        [0.5266]], grad_fn=<AddmmBackward0>)

设计好神经网络之后,可以查看它是如何组织的

print(rgnet)
Sequential(
  (0): Sequential(
    (block0): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
    (block1): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
    (block2): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
    (block3): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
  )
  (1): Linear(in_features=4, out_features=1, bias=True)
)

5. 内置初始化

def init_normal(m):
    if type(m) == nn.Linear:
        # 对 参数 weight 进行 均值为0,标准差为 0.01 的初始化
        # 下划线后置表示会进行替换,而不返回新的 weight 张量
        nn.init.normal_(m.weight, mean = 0, std = 0.01)
        # 将 bias 张量置零
        nn.init.zeros_(m.bias)
# 对net中的所有层都执行此方法        
net.apply(init_normal)
# 为何此处的均值不为0?
net[0].weight.data, net[0].bias.data[0]
(tensor([[ 0.0014,  0.0051, -0.0076,  0.0048],
         [-0.0051,  0.0081, -0.0030, -0.0091],
         [ 0.0060, -0.0030,  0.0060,  0.0086],
         [-0.0019,  0.0084, -0.0112,  0.0112],
         [ 0.0138, -0.0095,  0.0061,  0.0164],
         [ 0.0051, -0.0105,  0.0112, -0.0072],
         [-0.0036,  0.0124,  0.0104, -0.0107],
         [ 0.0048,  0.0070, -0.0121,  0.0020]]),
 tensor(0.))
net[0].weight.data.sum()
tensor(0.0541)
# 均值不为0
mn = nn.Sequential(nn.Linear(1, 2), nn.ReLU(), nn.Linear(2, 1))
mn.apply(init_normal)
mn[2].weight.data, mn[0].weight.data
(tensor([[ 0.0082, -0.0120]]),
 tensor([[ 0.0192],
         [-0.0005]]))
def init_constant(m):
    if type(m) == nn.Linear:
        # 将所有全连接层的weight置为1
        nn.init.constant_(m.weight, 1)
        nn.init.zeros_(m.bias)
        
net.apply(init_constant)
net[0].weight.data[0], net[0].bias.data[0]
(tensor([1., 1., 1., 1.]), tensor(0.))

6. 对某些块应用不同的初始化方法

def xavier(m):
    if type(m) == nn.Linear:
        nn.init.xavier_uniform_(m.weight)
        
def init_42(m):
    if type(m) == nn.Linear:
        nn.init.constant_(m.weight, 42)
        
net[0].apply(xavier)
net[2].apply(init_42)

print(net[0].weight.data[0])
print(net[2].weight.data[0])
tensor([-0.6291, -0.4084, -0.3237,  0.6755])
tensor([42., 42., 42., 42., 42., 42., 42., 42.])

7. 自定义初始化

def my_init(m):
    if type(m) == nn.Linear:
        print(
            "init",
            *[(name, param.shape) for name, param in m.named_parameters()][0])
        nn.init.uniform_(m.weight, -10, 10)
        m.weight.data *= m.weight.data.abs() >= 5
        
net.apply(my_init)
net[0].weight[:2]
init weight torch.Size([8, 4])
init weight torch.Size([1, 8])





tensor([[ 0.0000, -7.9767, -8.0924, -7.6980],
        [-8.0685, -0.0000, -7.6175,  0.0000]], grad_fn=<SliceBackward0>)
net[0].weight.data[:] += 1 # 阵列加法
net[0].weight.data[0, 0] = 42 # 单个赋值
net[0].weight.data[0]
tensor([42.0000, -6.9767, -7.0924, -6.6980])

8. 参数绑定

shared = nn.Linear(8, 8)
# 让第二、第三个全连接层指向同一层,则无论net怎么更新,第二三层都会共享此层
net = nn.Sequential(nn.Linear(4, 8), nn.ReLU(), shared, nn.ReLU(), shared,
                   nn.ReLU(), nn.Linear(8, 1))
net(X)
print(net[2].weight.data[0] == net[4].weight.data[0])

# 此处等效于 net[4].weight.data[0, 0] = 100 、 shared.weight.data[0. 0]
net[2].weight.data[0, 0] = 100 
print(net[2].weight.data[0] == net[4].weight.data[0])
tensor([True, True, True, True, True, True, True, True])
tensor([True, True, True, True, True, True, True, True])

三、自定义层

自定义层和自定义网络并无太大差别。

1. 构造一个没有任何参数的自定义层

class CenteredLayer(nn.Module): # 层 也是 Module 的subclass
    def __init__(self):
        super().__init__()
        
    def forward(self, X):
        return X - X.mean()
    
layer = CenteredLayer()
layer(torch.FloatTensor([1, 2, 3, 4, 5]))
tensor([-2., -1.,  0.,  1.,  2.])

2. 将层作为组件合并到构建更复杂的模型中

net = nn.Sequential(nn.Linear(8, 128), CenteredLayer()) # 传入自定义层的实例

Y = net(torch.rand(4, 8))
Y.mean() # 将所有传入的参数进行求均值
tensor(2.7940e-09, grad_fn=<MeanBackward0>)

3. 带参数的图层

class MyLinear(nn.Module):
    def __init__(self, in_units, units):
        super().__init__()
        # randn 是正态分布
        self.weight = nn.Parameter(torch.randn(in_units, units))
        # 括号内加 , 是为了区分元组(num,)和普通的(num)
        # bias的个数和列数保持一致,每层都是以列为单位进行数据处理的
        self.bias = nn.Parameter(torch.randn(units,)) 
        
    def forward(self, X):
        # matmul矩阵乘法
        linear = torch.matmul(X, self.weight.data) + self.bias.data
        return F.relu(linear)
    
dense = MyLinear(5, 3)
dense.weight
Parameter containing:
tensor([[ 0.3066, -0.0803, -1.3319],
        [ 1.3973,  0.0083,  0.8268],
        [-2.4014, -3.3563,  0.7567],
        [-0.0061, -0.8527, -0.1337],
        [ 0.7235,  2.4853,  0.6930]], requires_grad=True)

4. 使用自定义层直接执行正向传播计算

dense(torch.rand(2, 5))
tensor([[1.5130, 0.8024, 1.4486],
        [0.7055, 0.7942, 0.8594]])

5. 使用自定义层构建模型

net = nn.Sequential(MyLinear(64, 8), MyLinear(8, 1))
net(torch.rand(2, 64))
tensor([[0.],
        [0.]])

四、读写文件

1. 加载或保存张量

# 写张量
x_w = torch.arange(4)
torch.save(x_w,'x-file') # 二进制类型
x_w
tensor([0, 1, 2, 3])
# 读张量
x_r = torch.load('x-file')
x_r
tensor([0, 1, 2, 3])

2. 存储一个张量列表,然后把它们读回内存

y = torch.zeros(4)
torch.save([x, y], 'x-files')
x2, y2 = torch.load('x-files')
(x2, y2)
(tensor([0, 1, 2, 3]), tensor([0., 0., 0., 0.]))

3. 写入或读取从字符串映射到张量的字典

mydict_w = {'x':x, 'y':y}
torch.save(mydict_w, 'mydict')
mydict_r = torch.load('mydict')
mydict_r
{'x': tensor([0, 1, 2, 3]), 'y': tensor([0., 0., 0., 0.])}

4. 加载和保存模型参数

class MLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.hidden = nn.Linear(20, 256)
        self.output = nn.Linear(256, 10)
    
    def forward(self, x):
        return self.output(F.relu(self.hidden(x)))
    
net = MLP()
# 生成参数
X = torch.randn(size = (2, 20))
# 计算神经网络的结果
Y = net(X)
Y
tensor([[-0.3682, -0.4598,  0.2436, -0.4754,  0.1927, -0.2204,  0.0149, -0.2386,
          0.0807, -0.8114],
        [-0.0472, -0.0460, -0.0148,  0.0373,  0.2986, -0.3682,  0.1478,  0.4753,
          0.1156, -0.1635]], grad_fn=<AddmmBackward0>)
# 将模型的参数字典存储为一个叫做'mlp.params'的文件,只存储了参数而没有存储结构
torch.save(net.state_dict(), 'mlp.params')
  • eval是Python的一个内置函数,功能十分强大,这个函数的作用是,返回传入字符串的表达式的结果。就是说:将字符串当成有效的表达式 来求值 并 返回计算结果。

  • eval函数就是实现list、dict、tuple与str之间的转化,同样str函数把list,dict,tuple转为为字符串

# 实例化了原始多层感知机模型的一个备份。直接读取文件中存储的参数
clone = MLP() # 因为没有存储结构,所以要将clone先进行结构初始化
clone.load_state_dict(torch.load('mlp.params')) # 填入参数
clone.eval()
MLP(
  (hidden): Linear(in_features=20, out_features=256, bias=True)
  (output): Linear(in_features=256, out_features=10, bias=True)
)
Y_clone = clone(X)
Y_clone == Y
tensor([[True, True, True, True, True, True, True, True, True, True],
        [True, True, True, True, True, True, True, True, True, True]])