深度学习的PyTorch基础

import torch
from torch import nn
from torch.nn import functional as F

一、模型构造

net = nn.Sequential(nn.Linear(20, 256), nn.ReLU(),nn. Linear(256, 10)) # 按序生成神经网络

# net 构造为 第一层将 20 列 映射为 256 列， 第二层进行非线性处理(将负值置为0)， 第三层将 256 列 映射处理为 10 列 
# Linear 方法会自动初始化各层参数：权重和偏移
X = torch.rand(2, 20)
net(X)
X

tensor([[0.2054, 0.8184, 0.9979, 0.9119, 0.3989, 0.2033, 0.6710, 0.9759, 0.5752,
         0.0476, 0.4391, 0.7127, 0.1686, 0.6571, 0.7262, 0.0141, 0.3670, 0.9285,
         0.1190, 0.7207],
        [0.4501, 0.2882, 0.6584, 0.8790, 0.9895, 0.8160, 0.7391, 0.0306, 0.1681,
         0.6919, 0.3837, 0.9131, 0.8668, 0.7399, 0.4439, 0.7896, 0.6411, 0.3337,
         0.2536, 0.4702]])

1. 自定义块

# 实现和上述方法相同的类定义，重写父类的初始方法，加入将nn.Linear(20, 256)加入隐藏层
# 输出层则置为 nn.Linear(256, 10)
# forward内定义了预处理方法，F内实现了大量的常用函数
class MLP(nn.Module): 
    def __init__(self):
        super().__init__()
        self.hidden = nn.Linear(20, 256)
        self.out = nn.Linear(256, 10)
    
    def forward(self, X):
        return self.out(F.relu(self.hidden(X)))

net = MLP() # 使用前面定义的类来定义nn实例
net(X)
X

tensor([[0.2054, 0.8184, 0.9979, 0.9119, 0.3989, 0.2033, 0.6710, 0.9759, 0.5752,
         0.0476, 0.4391, 0.7127, 0.1686, 0.6571, 0.7262, 0.0141, 0.3670, 0.9285,
         0.1190, 0.7207],
        [0.4501, 0.2882, 0.6584, 0.8790, 0.9895, 0.8160, 0.7391, 0.0306, 0.1681,
         0.6919, 0.3837, 0.9131, 0.8668, 0.7399, 0.4439, 0.7896, 0.6411, 0.3337,
         0.2536, 0.4702]])

2. 自定义顺序块

class MySequential(nn.Module):
    def __init__(self, *args):
        super().__init__()
        # 将传入的初始方法都装载进来，用于实例初始化
        for block in args:
            self._modules[block] = block
            
    def forward(self, X):
        # 对实例传入的参数，进行预处理，用于方法调用
        for block in self._modules.values():
            X = block(X)
        return X
    
net = MySequential(nn.Linear(20, 256), nn.ReLU(),nn. Linear(256, 10))
net(X)
X

tensor([[0.2054, 0.8184, 0.9979, 0.9119, 0.3989, 0.2033, 0.6710, 0.9759, 0.5752,
         0.0476, 0.4391, 0.7127, 0.1686, 0.6571, 0.7262, 0.0141, 0.3670, 0.9285,
         0.1190, 0.7207],
        [0.4501, 0.2882, 0.6584, 0.8790, 0.9895, 0.8160, 0.7391, 0.0306, 0.1681,
         0.6919, 0.3837, 0.9131, 0.8668, 0.7399, 0.4439, 0.7896, 0.6411, 0.3337,
         0.2536, 0.4702]])

3. 在正向传播函数中执行代码

便于在init和forward函数里面进行大量的自定义计算

class FixedHiddenMLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.rand_weight = torch.rand((20, 20), requires_grad = False)
        self.linear = nn.Linear(20, 20)
            
    def forward(self, X):
        X = self.linear(X)
        X = F.relu(torch.mm(X, self.rand_weight) + 1)
        X = self.linear(X)
        while X.abs().sum() > 1:
            X /= 2
        return X.sum()
    
net = FixedHiddenMLP()
net(X)

tensor(0.3205, grad_fn=<SumBackward0>)

4. 混合搭配各种组合块的方法

class NestMLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(nn.Linear(20, 64), nn.ReLU(),
                                nn.Linear(64, 32), nn.ReLU())
        self.linear = nn.Linear(32, 16)
    def forward(self, X):
        return self.linear(self.net(X))
    
chimera = nn.Sequential(NestMLP(), nn.Linear(16, 20), FixedHiddenMLP())
chimera(X)

tensor(0.0182, grad_fn=<SumBackward0>)

5. 小结

自定义Module需要定义哪些方法?
1. _ init _()重载父类方法
2. forward加入与定义方法

二、参数管理

首先来看只有单隐藏层的多层感知机

net = nn.Sequential(nn.Linear(4, 8), nn.ReLU(), nn.Linear(8, 1))
X = torch.rand(size = (2, 4))
net(X)

tensor([[0.1571],
        [0.0735]], grad_fn=<AddmmBackward0>)

1. 参数访问

输出对应层的参数

# 输出第三层，也就是第二个线性层(输出层)的参数：权值和偏移，这些都是Linear 自动生成的
print(net[2].state_dict())

OrderedDict([('weight', tensor([[-0.2839,  0.2971, -0.2729,  0.1470, -0.2174, -0.1694,  0.0226,  0.2773]])), ('bias', tensor([0.1295]))])

2. 目标参数

print(type(net[2].bias)) #torch.nn.parameter表明 bias 是一个可以被优化的参数
print(net[2].bias) 
print(net[2].bias.data)

<class 'torch.nn.parameter.Parameter'>
Parameter containing:
tensor([0.1295], requires_grad=True)
tensor([0.1295])

# 为什么梯度为空？还没有进行反向计算
net[2].weight.grad == None

True

3. 一次性访问所有参数

# 下面的 * 是解包，把序列拆分为多个变量
print(*[(name, param.shape) for name, param in net[0].named_parameters()])
print(*[(name, param.shape) for name, param in net.named_parameters()]) 
# ReLU是没有参数的，全连接层可以取出参数

('weight', torch.Size([8, 4])) ('bias', torch.Size([8]))
('0.weight', torch.Size([8, 4])) ('0.bias', torch.Size([8])) ('2.weight', torch.Size([1, 8])) ('2.bias', torch.Size([1]))

# named_parameters 方法返回的是一个生成器
print(*[(name, param.shape) for name, param in net[0].named_parameters()])

---------------------------------------------------------------------------

AttributeError                            Traceback (most recent call last)

F:\Temp1/ipykernel_10604/3850033904.py in <module>
      1 # named_parameters 方法返回的是一个生成器
----> 2 print(*[(name, param.shape) for name, param in net[0].named_parameters()].data)


AttributeError: 'list' object has no attribute 'data'

net.state_dict()['0.weight'].data

tensor([[-0.0831, -0.3560,  0.3638, -0.1269],
        [ 0.2467,  0.2860,  0.4703,  0.1994],
        [ 0.2613,  0.4495,  0.4926, -0.4293],
        [ 0.2690,  0.1472, -0.1898, -0.4751],
        [ 0.2996, -0.4090,  0.0557,  0.4479],
        [-0.1008,  0.2883, -0.1388, -0.4359],
        [-0.0015, -0.2353, -0.1831, -0.3098],
        [ 0.2744,  0.3285, -0.3505,  0.2105]])

4. 从嵌套块收集参数

def block1():
    return nn.Sequential(nn.Linear(4, 8), nn.ReLU(), nn.Linear(8, 4), nn.ReLU())
def block2():
    net = nn.Sequential()
    # 利用循环将block1中的四层加入到 net 并返回
    for i in range(4):
        net.add_module(f'block{i}', block1())
    return net
# 将 block2 中的Sequential和 最后一层 Linear 层作为 Sequential 的参数
rgnet = nn.Sequential(block2(), nn.Linear(4, 1))
rgnet(X)

tensor([[0.5266],
        [0.5266]], grad_fn=<AddmmBackward0>)

设计好神经网络之后，可以查看它是如何组织的

print(rgnet)

Sequential(
  (0): Sequential(
    (block0): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
    (block1): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
    (block2): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
    (block3): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
  )
  (1): Linear(in_features=4, out_features=1, bias=True)
)

5. 内置初始化

def init_normal(m):
    if type(m) == nn.Linear:
        # 对 参数 weight 进行 均值为0，标准差为 0.01 的初始化
        # 下划线后置表示会进行替换，而不返回新的 weight 张量
        nn.init.normal_(m.weight, mean = 0, std = 0.01)
        # 将 bias 张量置零
        nn.init.zeros_(m.bias)
# 对net中的所有层都执行此方法        
net.apply(init_normal)
# 为何此处的均值不为0？
net[0].weight.data, net[0].bias.data[0]

(tensor([[ 0.0014,  0.0051, -0.0076,  0.0048],
         [-0.0051,  0.0081, -0.0030, -0.0091],
         [ 0.0060, -0.0030,  0.0060,  0.0086],
         [-0.0019,  0.0084, -0.0112,  0.0112],
         [ 0.0138, -0.0095,  0.0061,  0.0164],
         [ 0.0051, -0.0105,  0.0112, -0.0072],
         [-0.0036,  0.0124,  0.0104, -0.0107],
         [ 0.0048,  0.0070, -0.0121,  0.0020]]),
 tensor(0.))

net[0].weight.data.sum()

tensor(0.0541)

# 均值不为0
mn = nn.Sequential(nn.Linear(1, 2), nn.ReLU(), nn.Linear(2, 1))
mn.apply(init_normal)
mn[2].weight.data, mn[0].weight.data

(tensor([[ 0.0082, -0.0120]]),
 tensor([[ 0.0192],
         [-0.0005]]))

def init_constant(m):
    if type(m) == nn.Linear:
        # 将所有全连接层的weight置为1
        nn.init.constant_(m.weight, 1)
        nn.init.zeros_(m.bias)
        
net.apply(init_constant)
net[0].weight.data[0], net[0].bias.data[0]

(tensor([1., 1., 1., 1.]), tensor(0.))

6. 对某些块应用不同的初始化方法

def xavier(m):
    if type(m) == nn.Linear:
        nn.init.xavier_uniform_(m.weight)
        
def init_42(m):
    if type(m) == nn.Linear:
        nn.init.constant_(m.weight, 42)
        
net[0].apply(xavier)
net[2].apply(init_42)

print(net[0].weight.data[0])
print(net[2].weight.data[0])

tensor([-0.6291, -0.4084, -0.3237,  0.6755])
tensor([42., 42., 42., 42., 42., 42., 42., 42.])

7. 自定义初始化

def my_init(m):
    if type(m) == nn.Linear:
        print(
            "init",
            *[(name, param.shape) for name, param in m.named_parameters()][0])
        nn.init.uniform_(m.weight, -10, 10)
        m.weight.data *= m.weight.data.abs() >= 5
        
net.apply(my_init)
net[0].weight[:2]

init weight torch.Size([8, 4])
init weight torch.Size([1, 8])





tensor([[ 0.0000, -7.9767, -8.0924, -7.6980],
        [-8.0685, -0.0000, -7.6175,  0.0000]], grad_fn=<SliceBackward0>)

net[0].weight.data[:] += 1 # 阵列加法
net[0].weight.data[0, 0] = 42 # 单个赋值
net[0].weight.data[0]

tensor([42.0000, -6.9767, -7.0924, -6.6980])

8. 参数绑定

shared = nn.Linear(8, 8)
# 让第二、第三个全连接层指向同一层，则无论net怎么更新，第二三层都会共享此层
net = nn.Sequential(nn.Linear(4, 8), nn.ReLU(), shared, nn.ReLU(), shared,
                   nn.ReLU(), nn.Linear(8, 1))
net(X)
print(net[2].weight.data[0] == net[4].weight.data[0])

# 此处等效于 net[4].weight.data[0, 0] = 100 、 shared.weight.data[0. 0]
net[2].weight.data[0, 0] = 100 
print(net[2].weight.data[0] == net[4].weight.data[0])

tensor([True, True, True, True, True, True, True, True])
tensor([True, True, True, True, True, True, True, True])

三、自定义层

自定义层和自定义网络并无太大差别。

1. 构造一个没有任何参数的自定义层

class CenteredLayer(nn.Module): # 层 也是 Module 的subclass
    def __init__(self):
        super().__init__()
        
    def forward(self, X):
        return X - X.mean()
    
layer = CenteredLayer()
layer(torch.FloatTensor([1, 2, 3, 4, 5]))

tensor([-2., -1.,  0.,  1.,  2.])

2. 将层作为组件合并到构建更复杂的模型中

net = nn.Sequential(nn.Linear(8, 128), CenteredLayer()) # 传入自定义层的实例

Y = net(torch.rand(4, 8))
Y.mean() # 将所有传入的参数进行求均值

tensor(2.7940e-09, grad_fn=<MeanBackward0>)

3. 带参数的图层

class MyLinear(nn.Module):
    def __init__(self, in_units, units):
        super().__init__()
        # randn 是正态分布
        self.weight = nn.Parameter(torch.randn(in_units, units))
        # 括号内加 ， 是为了区分元组(num,)和普通的(num)
        # bias的个数和列数保持一致，每层都是以列为单位进行数据处理的
        self.bias = nn.Parameter(torch.randn(units,)) 
        
    def forward(self, X):
        # matmul矩阵乘法
        linear = torch.matmul(X, self.weight.data) + self.bias.data
        return F.relu(linear)
    
dense = MyLinear(5, 3)
dense.weight

Parameter containing:
tensor([[ 0.3066, -0.0803, -1.3319],
        [ 1.3973,  0.0083,  0.8268],
        [-2.4014, -3.3563,  0.7567],
        [-0.0061, -0.8527, -0.1337],
        [ 0.7235,  2.4853,  0.6930]], requires_grad=True)

4. 使用自定义层直接执行正向传播计算

dense(torch.rand(2, 5))

tensor([[1.5130, 0.8024, 1.4486],
        [0.7055, 0.7942, 0.8594]])

5. 使用自定义层构建模型

net = nn.Sequential(MyLinear(64, 8), MyLinear(8, 1))
net(torch.rand(2, 64))

tensor([[0.],
        [0.]])

四、读写文件

1. 加载或保存张量

# 写张量
x_w = torch.arange(4)
torch.save(x_w,'x-file') # 二进制类型
x_w

tensor([0, 1, 2, 3])

# 读张量
x_r = torch.load('x-file')
x_r

tensor([0, 1, 2, 3])

2. 存储一个张量列表，然后把它们读回内存

y = torch.zeros(4)
torch.save([x, y], 'x-files')
x2, y2 = torch.load('x-files')
(x2, y2)

(tensor([0, 1, 2, 3]), tensor([0., 0., 0., 0.]))

3. 写入或读取从字符串映射到张量的字典

mydict_w = {'x':x, 'y':y}
torch.save(mydict_w, 'mydict')
mydict_r = torch.load('mydict')
mydict_r

{'x': tensor([0, 1, 2, 3]), 'y': tensor([0., 0., 0., 0.])}

4. 加载和保存模型参数

class MLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.hidden = nn.Linear(20, 256)
        self.output = nn.Linear(256, 10)
    
    def forward(self, x):
        return self.output(F.relu(self.hidden(x)))
    
net = MLP()
# 生成参数
X = torch.randn(size = (2, 20))
# 计算神经网络的结果
Y = net(X)
Y

tensor([[-0.3682, -0.4598,  0.2436, -0.4754,  0.1927, -0.2204,  0.0149, -0.2386,
          0.0807, -0.8114],
        [-0.0472, -0.0460, -0.0148,  0.0373,  0.2986, -0.3682,  0.1478,  0.4753,
          0.1156, -0.1635]], grad_fn=<AddmmBackward0>)

# 将模型的参数字典存储为一个叫做'mlp.params'的文件，只存储了参数而没有存储结构
torch.save(net.state_dict(), 'mlp.params')

eval是Python的一个内置函数，功能十分强大，这个函数的作用是，返回传入字符串的表达式的结果。就是说：将字符串当成有效的表达式来求值并返回计算结果。
eval函数就是实现list、dict、tuple与str之间的转化，同样str函数把list，dict，tuple转为为字符串

# 实例化了原始多层感知机模型的一个备份。直接读取文件中存储的参数
clone = MLP() # 因为没有存储结构，所以要将clone先进行结构初始化
clone.load_state_dict(torch.load('mlp.params')) # 填入参数
clone.eval()

MLP(
  (hidden): Linear(in_features=20, out_features=256, bias=True)
  (output): Linear(in_features=256, out_features=10, bias=True)
)

Y_clone = clone(X)
Y_clone == Y

tensor([[True, True, True, True, True, True, True, True, True, True],
        [True, True, True, True, True, True, True, True, True, True]])

PyTorch Basis

深度学习的PyTorch基础

一、模型构造

1. 自定义块

2. 自定义顺序块

3. 在正向传播函数中执行代码

4. 混合搭配各种组合块的方法

5. 小结

二、参数管理

1. 参数访问

2. 目标参数

3. 一次性访问所有参数

4. 从嵌套块收集参数

5. 内置初始化

6. 对某些块应用不同的初始化方法

7. 自定义初始化

8. 参数绑定

三、自定义层

1. 构造一个没有任何参数的自定义层

2. 将层作为组件合并到构建更复杂的模型中

3. 带参数的图层

4. 使用自定义层直接执行正向传播计算

5. 使用自定义层构建模型

四、读写文件

1. 加载或保存张量

2. 存储一个张量列表，然后把它们读回内存

3. 写入或读取从字符串映射到张量的字典

4. 加载和保存模型参数