Paddle快速上手

397 阅读4分钟

目录

  • 说明
  • 创建数据集
  • 构建模型
    • Sequential 组网
    • Layer 组网
  • 配置模型
  • 训练模型
    • 基于高层API
    • 基于底层API
  • 模型预测
  • 模型保存
  • 加载参数

说明

本文以简单加法计算为例,作为Paddle快速入门教程。构建并训练模型来实现算式的计算,其中包含数据集的构建,模型的构建、训练、保存等内容

算式结构:a+b(a∈[0,9],b∈[0,9])

创建数据集

import paddle

# 生成算式
data = []
data_ = []
label = []
for a in range(10):
    for b in range(10):
        # 生成算式
        eq = f'{a}+{b}'
        label.append([eval(eq)])
        # “编码”,根据ASCII码转换成数字
        data_.append(eq)
        eq = list(map(lambda x: ord(x), eq))
        data.append(eq)
# 生成训练数据
# 因为paddle的类型默认为float32,所有data也因使用float32,否则后面模型的计算会出问题
data = paddle.to_tensor(data, dtype='float32')
# 生成标签
label = paddle.to_tensor(label, dtype='int64')

# 构建数据集
class Dataset(paddle.io.Dataset):
    def __init__(self, data, label):
        self.data = data
        self.label = label

    def __getitem__(self, idx):
        # 返回训练数据与标签
        return self.data[idx], self.label[idx]

    def __len__(self):
        return len(self.data)

dataset = Dataset(data, label)
# 输出训练数据与标签
for i, j in zip(data_, dataset):
    print(f'算式:{i} ASCII编码:{j[0].numpy()} 答案:{j[1].numpy()[0]}')
# 输出数据集元素数量
print(len(dataset))

屏幕截图 2022-08-14 214928.png

创建模型

(1)Sequential 组网

import paddle

# 使用 paddle.nn.Sequential 构建模型
model = paddle.nn.Sequential(
    # 3:输入大小为3,64:64个输出(神经元)
    paddle.nn.Linear(3, 64),
    paddle.nn.LeakyReLU(),
    paddle.nn.Linear(64, 16),
    paddle.nn.Sigmoid(),
    # 19:0~18共19个输出
    paddle.nn.Linear(16, 19))
# 封装模型
model = paddle.Model(model)

(2)Layer 组网

import paddle

# 使用 paddle.nn.Layer 构建模型
class Model(paddle.nn.Layer):
    def __init__(self):
        super(Model, self).__init__()
        self.linear_1 = paddle.nn.Linear(3, 64)
        self.leakyrelu_1 = paddle.nn.LeakyReLU()
        self.linear_2 = paddle.nn.Linear(64, 16)
        self.sigmoid_1 = paddle.nn.Sigmoid()
        self.linear_3 = paddle.nn.Linear(16, 19)

    def forward(self, x):
        y = self.linear_1(x)
        y = self.leakyrelu_1(y)
        y = self.linear_2(y)
        y = self.sigmoid_1(y)
        y = self.linear_3(y)

        return y

model = paddle.Model(Model())

配置模型

import paddle

model = paddle.nn.Sequential(paddle.nn.Linear(3, 64),
                             paddle.nn.LeakyReLU(),
                             paddle.nn.Linear(64, 16),
                             paddle.nn.Sigmoid(),
                             paddle.nn.Linear(16, 19))
model = paddle.Model(model)

# 配置模型
# learning_rate:设置学习率
model.prepare(optimizer=paddle.optimizer.Adam(learning_rate=0.003, parameters=model.parameters()),
              loss=paddle.nn.CrossEntropyLoss(),
              metrics=paddle.metric.Accuracy())

训练模型

关于训练,如果人品够好的话很快就可以达到“计算”算式的效果,如若不然嘛,多试亿遍就ok了

(1)基于高层API

import paddle

model = paddle.nn.Sequential(paddle.nn.Linear(3, 64),
                             paddle.nn.LeakyReLU(),
                             paddle.nn.Linear(64, 16),
                             paddle.nn.Sigmoid(),
                             paddle.nn.Linear(16, 19))
model = paddle.Model(model)
model.prepare(optimizer=paddle.optimizer.Adam(learning_rate=0.003, parameters=model.parameters()),
              loss=paddle.nn.CrossEntropyLoss(),
              metrics=paddle.metric.Accuracy())
# 开始训练
# tratrain_data:训练数据集,epochs:训练轮数,verbose:是否输出日志
# batch_size:一次训练抓取的样本数量,与轮次epochs训练次数有关
model.fit(train_data=dataset, epochs=700, batch_size=20, shuffle=True, verbose=1)

(2)基于底层API

import paddle

model = paddle.nn.Sequential(paddle.nn.Linear(3, 64),
                             paddle.nn.LeakyReLU(),
                             paddle.nn.Linear(64, 16),
                             paddle.nn.Sigmoid(),
                             paddle.nn.Linear(16, 19))

# dataset:训练数据集,shuffle:True:打乱顺序
# batch_size:一次训练抓取的样本数量,与轮次epochs训练次数有关
train_loader = paddle.io.DataLoader(dataset=dataset, batch_size=20,shuffle=True)
model = Model()
# 将模型及其所有子层设置为训练模式
model.train()
# 训练次数
epochs = 700
# 设置优化器
optim = paddle.optimizer.Adam(learning_rate=0.003, parameters=model.parameters())
for epoch in range(epochs):
    for batch_id, data in enumerate(train_loader):
        x_data, y_data = data[0], data[1]
        # 计算结果
        predicts = model(x_data)
        # 计算损失
        loss = paddle.nn.functional.cross_entropy(predicts, y_data)
        acc = paddle.metric.accuracy(predicts, y_data)
        # 误差反向传播
        loss.backward()
        # 更新参数
        optim.step()
        # 梯度清零
        optim.clear_grad()
    print("训练次数: {}, 损失: {}, 准确率: {}".format(epoch + 1, loss.numpy(), acc.numpy())) 作者:CHI_KONG https://www.bilibili.com/read/cv17989852?spm_id_from=333.999.list.card_article.click 出处:bilibili

前五次学习loss(误差)约为3,acc(准确率为0.09)

经过700次训练,loss下降至0.9164,准确率约为86%

loss随训练次数增加而逐渐下降

output.png

模型预测

import paddle

model = paddle.nn.Sequential(paddle.nn.Linear(3, 64),
                             paddle.nn.LeakyReLU(),
                             paddle.nn.Linear(64, 16),
                             paddle.nn.Sigmoid(),
                             paddle.nn.Linear(16, 19))
model = paddle.Model(model)
model.prepare(optimizer=paddle.optimizer.Adam(learning_rate=0.003, parameters=model.parameters()),
              loss=paddle.nn.CrossEntropyLoss(),
              metrics=paddle.metric.Accuracy())
model.fit(train_data=dataset, epochs=700, batch_size=20, verbose=1)

# 要计算的算式
eq = ['1+1', '4+6', '0+7', '5+0', '2+4', '9+6', '4+7', '3+2', '4+3', '5+5', '9+0']
# 算式“编码”,用ASCII码转换成数字
eq_ = paddle.to_tensor(list(map(lambda x: [ord(i) for i in x], eq)), dtype='float32')
# 预测算式答案
answer = model.predict(eq_)
# 遍历并依次输出答案
for num, i in enumerate(answer[0]):
    print(f'预测算式:{eq[num]}  预测答案:{i.argmax()}  正确答案:{eval(eq[num])}')

训练前预测结果

屏幕截图 2022-08-14 220942.png

训练后预测结果

屏幕截图 2022-08-14 220456.png

模型保存

import paddle

model = paddle.nn.Sequential(paddle.nn.Linear(3, 64),
                             paddle.nn.LeakyReLU(),
                             paddle.nn.Linear(64, 16),
                             paddle.nn.Sigmoid(),
                             paddle.nn.Linear(16, 19))
model = paddle.Model(model)
model.prepare(optimizer=paddle.optimizer.Adam(learning_rate=0.003, parameters=model.parameters()),
              loss=paddle.nn.CrossEntropyLoss(),
              metrics=paddle.metric.Accuracy())
model.fit(train_data=dataset, epochs=700, batch_size=20, verbose=1)

# 保存模型和优化器参数信息
model.save('model_data/test')

加载参数

import paddle

model = paddle.nn.Sequential(paddle.nn.Linear(3, 64),
                             paddle.nn.LeakyReLU(),
                             paddle.nn.Linear(64, 16),
                             paddle.nn.Sigmoid(),
                             paddle.nn.Linear(16, 19))
model = paddle.Model(model)
model.prepare(optimizer=paddle.optimizer.Adam(learning_rate=0.003, parameters=model.parameters()),
              loss=paddle.nn.CrossEntropyLoss(),
              metrics=paddle.metric.Accuracy())
eq = ['1+1', '4+6', '0+7', '5+0', '2+4', '9+6', '4+7', '3+2', '4+3', '5+5', '9+0']
eq_ = paddle.to_tensor(list(map(lambda x: [ord(i) for i in x], eq)), dtype='float32')

# 加载模型
model.load('model_data/test')
answer = model.predict(eq_)
for num, i in enumerate(answer[0]):
    print(f'预测算式:{eq[num]}  预测答案:{i.argmax()}  正确答案:{eval(eq[num])}')