参与拿奖:本文已参与「新人创作礼」活动,一起开启掘金创作之路
ps:代码文末自取
1.相关概念
简单来说就是分为前向传播和反向传播,前向用于预测,反向用于更新参数。例如输入的数据为1,第一层有一个神经元x1,其bias为b1,第二层有一个神经元为x2,其bias为b2。第一层的输出为l1=1*x1+b1,第二层的输出为l2=l1*x2+b2。然后计算Loss,进行反向传播。
2.简单Demo
2.1 数据集准备
2.1.1 创建数据集
# 创建数据集
def create_data(data_length=10000,distribution="Binomial"):
"""
:param feature_num:特征数目
:param data_length:数据的长度
:param distribution: 真实标签服从某项分布
:return: 创建好的数据集
"""
data=[[] for _ in range(data_length)]
f_1=[round(i-data_length+i/(i+1),2) for i in range(data_length)]
f_2=[round(i%data_length+i/(i+1),2) for i in range(data_length)]
f_3=[round(i/data_length+i/(i+1),2) for i in range(data_length)]
random.shuffle(f_1)
random.shuffle(f_2)
random.shuffle(f_3)
for i_ in range(data_length):
data[i_].append(f_1[i_])
data[i_].append(f_2[i_])
data[i_].append(f_3[i_])
distributions=["Norm","Poisson","Binomial"]
if distribution not in distributions:
raise ValueError("distribution must in distributions")
if distribution=="Norm":
# 均值为5,方差为1(标签为小数)
label=normal(5,1,data_length)
elif distribution=='Poisson':
# 出现概率为0.5,(标签为整数)
label=poisson(0.5,data_length)
else:
# 标签取值为[0,1],类别所占概率为0.2(标签为整数)
label=binomial(1,0.2,data_length)
return data,label
2.1.2 切分数据集
# 数据切分
def data_split(data,ratio):
return data[:int(len(data)*ratio[0])],data[int(len(data)*ratio[0]):int(len(data)*(ratio[1]+ratio[0]))],data[int(len(data)*(ratio[1]+ratio[0])):]
2.2 模型部分
2.2.1 模型结构
# 构造类
class FFN(nn.Module):
def __init__(self,args):
super(FFN,self).__init__()
# 层的数目
self.l_num=args.l_num
# 输入层的神经元个数
self.i_num=args.i_num
# 隐藏层的神经元个数,数据类型为list
self.h_num=eval(args.h_num)
# 输出层神经元个数
self.o_num=args.o_num
# 激活函数
self.act_f=ACT2FN[args.act_function]
# 模型参数
self.Linears=self.create_ffn()
# 参数初始化
self.apply(self._init_weights)
# 构建模型框架
def create_ffn(self):
temp=nn.ModuleList()
if not isinstance(self.h_num,list):
raise ValueError("the type of hidden layer nums must be a list!")
temp.append(nn.Linear(self.i_num,self.h_num[0])) # 输入层
for h in range(len(self.h_num)-1):
temp.append(nn.Linear(self.h_num[h],self.h_num[h+1]))
temp.append(nn.Linear(self.h_num[-1],self.o_num))
return temp
# 权重,偏执初始化(参数初始化)
def _init_weights(self,m):
if isinstance(m,(nn.Linear,nn.Embedding)):
m.weight.data.normal_(mean=0.0,std=1e-2)
elif isinstance(m,nn.BatchNorm1d):
pass
elif isinstance(m,LayerNorm):
m.bias.data.zero_()
m.weight.data.fill_(1.0)
if isinstance(m,nn.Linear) and m.bias is not None:
m.bias.data.zero_()
# 前向传播
def forward(self,data):
out_put=data
for i_ in range(len(self.Linears)-1):
out_put=self.Linears[i_](out_put)
out_put=self.act_f(out_put) # 激活
out_put=self.Linears[-1](out_put)
# out_put=nn.Softmax()(out_put)
return out_put
2.2.2 训练模型
# 测试模型
def test(data,label,FFN,args):
loss_f=get_loss(args)
data_train=torch.tensor(data[0])
label_train=torch.tensor(label[0],dtype=torch.long)
best_evel_result=[]
# 优化器
betas = (args.adam_beta1, args.adam_beta2)
optim = Adam(FFN.parameters(), lr = args.lr, betas = betas, weight_decay = args.weight_decay)
train_es=[]
train_ls=[]
for b_ in range(math.ceil(len(data_train) / args.batch_size)):
train_e = data_train[b_ * args.batch_size:(b_ + 1) * args.batch_size, :]
train_l=label_train[b_*args.batch_size:(b_+1)*args.batch_size]
train_es.append(train_e)
train_ls.append(train_l)
data_eval = [torch.tensor(data[1])]
label_eval = torch.tensor(label[1], dtype=torch.long)
data_test = [torch.tensor(data[1])]
label_test = torch.tensor(label[1], dtype=torch.long)
train_losses=[]
test_losses=[]
es=0
if args.do_eval==True:
model_path=args.model_path
if os.path.exists(model_path):
FFN.load_state_dict(torch.load(model_path))
FFN.eval()
print("load {} the best model for testing!".format(args.model_path))
for b_, test_e in tqdm(enumerate(data_test), total=len(data_test),
desc=set_color(f"Testing ", 'pink'), ):
output = FFN(test_e)
# print(b_,data_train.shape,train_e.shape)
loss = loss_f(output, label_test)
time.sleep(0.5)
print("test loss:{:.4f}".format(loss))
else:
raise ValueError("no model for testing!")
else:
for e_ in range(args.epoch):
FFN.train()
for b_,train_e in tqdm(enumerate(train_es),total=len(train_es),desc=set_color(f"Training {e_:>5}", 'pink'),):
output=FFN(train_e)
# print(b_,data_train.shape,train_e.shape)
if b_==0:
loss=loss_f(output,train_ls[b_])
else:
loss+= loss_f(output,train_ls[b_])
if torch.isnan(loss):
raise ValueError("loss is nan")
train_losses.append(loss)
optim.zero_grad()
loss.backward()
optim.step()
# time.sleep(0.5)
print("epoch: {0} training loss:{1:.4f}".format(e_,loss))
# time.sleep(0.5)
FFN.eval()
for b_,eval_e in tqdm(enumerate(data_eval),total=len(data_eval),desc=set_color(f"Valid {e_:>5}", 'pink'),):
output=FFN(eval_e)
# print(b_,data_train.shape,train_e.shape)
loss=loss_f(output,label_eval)
test_losses.append(loss)
# time.sleep(0.5)
if len(best_evel_result)==0:
best_evel_result.append(loss)
print("the valid loss is descending:{:.4f},save the best model".format(loss))
torch.save(FFN.cpu().state_dict(), args.model_path)
elif best_evel_result[0]<loss:
print("early stop :{}".format(es))
es+=1
else:
print("the valid loss is descending:{:.4f},save the best model".format(loss))
torch.save(FFN.cpu().state_dict(), args.model_path)
# 判断早停
if es==args.early_stop:
break
# 绘制loss函数的图像
if args.batch_size==256:
draw_plot(torch.tensor([train_losses,test_losses]).detach().cpu().numpy().tolist())
#测试集效果
FFN.load_state_dict(torch.load(args.model_path))
for b_, test_e in tqdm(enumerate(data_test), total=len(data_test), desc=set_color(f"Testing ", 'pink'), ):
output = FFN(test_e)
# print(b_,data_train.shape,train_e.shape)
loss = loss_f(output, label_test)
print("test loss:{:.4f}".format(loss.detach().cpu().numpy().tolist()))
return loss
2.2.3 实验结果
(1)训练和验证Loss随迭代次数的变化折线图
(2)测试Loss随batch_size的变化折线图
3.参考资料
[1] 苏大机器学习课程
[2] 完整代码