本文已参与「新人创作礼」活动,一起开启掘金创作之路。
网络训练数据准备---数据标签在numpy和tensor之间的转换
1.从txt文档中读取训练数据和测试数据
fp = open('E:\桌面\test_new_3\data\train.txt', 'r', encoding='utf-8')
string = fp.read() # string是一行字符串,该字符串包含文件所有内容
fp.close()
row_list = string.splitlines() # splitlines默认参数是‘\n’
data_list = [[float(i) for i in row.strip().split(" ")] for row in row_list]
shuffle(data_list) ##打乱数据
data_train = np.array(data_list) # data=data.to_numpy()
fp = open('E:\桌面\test_new_3\data\test.txt', 'r', encoding='utf-8')
string = fp.read() # string是一行字符串,该字符串包含文件所有内容
fp.close()
row_list = string.splitlines() # splitlines默认参数是‘\n’
data_list = [[float(i) for i in row.strip().split(" ")] for row in row_list]
shuffle(data_list) ##打乱数据
data_test = np.array(data_list) # data=data.to_numpy()
2.从txt文件中读取标签数据
fp = open('E:\桌面\test_new_3\label\1004.txt', 'r', encoding='utf-8')
line = fp.readline()
l = line.split(",")
label1 = [[float(i)] for i in l]
label1 = np.array(label1)
fp = open('E:\桌面\test_new_3\label\355.txt', 'r', encoding='utf-8')
line = fp.readline()
l = line.split(",")
label2 = [[float(i)] for i in l]
label2 = np.array(label2)
# label1 = np.loadtxt('./284.txt', dtype=np.float32, delimiter=' ')
# print("原始红外矩阵大小为:", label1)
# label2 = np.loadtxt('./71.txt', dtype=np.float32, delimiter=' ')
# print("原始红外矩阵大小为:", label2)
train_datas = data_train
train_labels = label1
test_datas = data_test
test_labels = label2
3.将训练数据和测试数据转化为tensor形式,为后面训练做准备,因为训练数据在pytorch要求一般为tensor格式
##这几步的作用就是将数据转化为tensor的格式,为后面训练做准备(因为训练的时候数据要是tensor格式)
##同时,train_datas = train_datas / 1.0 ,这里的作用就是将其强制转化为浮点类型
train_datas = train_datas / 1.0
train_datas = torch.from_numpy(train_datas).float()
test_datas = test_datas / 1.0
test_datas = torch.from_numpy(test_datas).float()
4.将标签转化为float型(此处是因为做的是回归任务),如果想要做分类任务的话需要将标签转化为long型!
train_labels = torch.from_numpy(train_labels).float()
test_labels = torch.from_numpy(test_labels).float()
5.将已经转化为tensor格式的(train_datas, train_labels)和(test_datas, test_labels)转化为Data.DataLoader可以使用的格式
torch_dataset_train = Data.TensorDataset(train_datas, train_labels)
torch_dataset_test = Data.TensorDataset(test_datas, test_labels)
print(train_datas.size()) # (60000, 28, 28)
print(train_labels.size())
print(test_datas.size())
print(test_labels.size())
6.利用Data.DataLoader来对数据集进行迭代
train_loader = Data.DataLoader(dataset=torch_dataset_train, batch_size=BATCH_SIZE, shuffle=True)
7.训练流程
a = []
for epoch in range(EPOCH):
for step, (b_x, b_y) in enumerate(train_loader): # train_loader的用法!
# print(b_x.size())
b_x = b_x.view(-1, 3)
# print(b_x.size())
output = mlp(b_x) # logistic output
loss = loss_func(output, b_y) # cross entropy loss
optimizer.zero_grad() # clear gradients for this training step
loss.backward() # backpropagation, compute gradients
optimizer.step() # apply gradients
if step % 50 == 0:
test_output = mlp(test_x.view(-1, 3))
accuracy = (((abs(test_output.detach().numpy()-test_y.numpy()) < 0.1)).sum())/len(test_y.numpy())
print('Epoch: ', round(epoch), '| train loss: %.4f' % loss.data.numpy(), '| test accuracy:%.2f' % accuracy)
a.append(accuracy)
8.全部代码
# library
# standard library
import os
from random import shuffle
# third-party library
import torch
import torch.nn as nn
import torch.utils.data as Data
import torchvision
import numpy as np
# import matplotlib.pyplot as plt
# torch.manual_seed(1) # reproducible
# Hyper Parameters
EPOCH = 300 # train the training data n times, to save time, we just train 1 epoch
BATCH_SIZE = 50
LR = 0.0005 # learning rate
# # Mnist digits dataset
# if not(os.path.exists('./mnist/')) or not os.listdir('./mnist/'):
# # not mnist dir or mnist is empyt dir
# DOWNLOAD_MNIST = True
fp = open('E:\桌面\test_new_3\data\train.txt', 'r', encoding='utf-8')
string = fp.read() # string是一行字符串,该字符串包含文件所有内容
fp.close()
row_list = string.splitlines() # splitlines默认参数是‘\n’
data_list = [[float(i) for i in row.strip().split(" ")] for row in row_list]
shuffle(data_list) ##打乱数据
data_train = np.array(data_list) # data=data.to_numpy()
fp = open('E:\桌面\test_new_3\data\test.txt', 'r', encoding='utf-8')
string = fp.read() # string是一行字符串,该字符串包含文件所有内容
fp.close()
row_list = string.splitlines() # splitlines默认参数是‘\n’
data_list = [[float(i) for i in row.strip().split(" ")] for row in row_list]
shuffle(data_list) ##打乱数据
data_test = np.array(data_list) # data=data.to_numpy()
##划分数据
fp = open('E:\桌面\test_new_3\label\1004.txt', 'r', encoding='utf-8')
line = fp.readline()
l = line.split(",")
label1 = [[float(i)] for i in l]
label1 = np.array(label1)
fp = open('E:\桌面\test_new_3\label\355.txt', 'r', encoding='utf-8')
line = fp.readline()
l = line.split(",")
label2 = [[float(i)] for i in l]
label2 = np.array(label2)
# label1 = np.loadtxt('./284.txt', dtype=np.float32, delimiter=' ')
# print("原始红外矩阵大小为:", label1)
# label2 = np.loadtxt('./71.txt', dtype=np.float32, delimiter=' ')
# print("原始红外矩阵大小为:", label2)
train_datas = data_train
train_labels = label1
test_datas = data_test
test_labels = label2
# print(train_datas)
# print(train_labels)
# print(test_datas)
# print(test_labels)
##这几步的作用就是将数据转化为tensor的格式,为后面训练做准备(因为训练的时候数据要是tensor格式)
##同时,train_datas = train_datas / 1.0 ,这里的作用就是将其强制转化为浮点类型
train_datas = train_datas / 1.0
train_datas = torch.from_numpy(train_datas).float()
test_datas = test_datas / 1.0
test_datas = torch.from_numpy(test_datas).float()
##------------------------------------------------
#将标签转化为float型(此处是因为做的是回归任务),如果想要做分类任务的话需要将标签转化为long型!
##------------------------------------------------
train_labels = torch.from_numpy(train_labels).float()
test_labels = torch.from_numpy(test_labels).float()
# train_datas=torch.tensor(train_datas)
# train_labels=torch.tensor(train_labels)
# test_datas=torch.tensor(test_datas)
# test_labels=torch.tensor(test_labels)
##将已经转化为tensor格式的(train_datas, train_labels)和(test_datas, test_labels)转化为Data.DataLoader可以使用的格式
torch_dataset_train = Data.TensorDataset(train_datas, train_labels)
torch_dataset_test = Data.TensorDataset(test_datas, test_labels)
print(train_datas.size()) # (60000, 28, 28)
print(train_labels.size())
print(test_datas.size())
print(test_labels.size())
##Data.DataLoader来对数据集进行迭代
train_loader = Data.DataLoader(dataset=torch_dataset_train, batch_size=BATCH_SIZE, shuffle=True)
# pick 2000 samples to speed up testing
# test_data = torch_dataset_test
test_x = test_datas # shape from (2000, 28, 28) to (2000, 1, 28, 28), value in range(0,1)
test_y = test_labels
class MLP(nn.Module):
def __init__(self):
super(MLP, self).__init__()
self.mlp = nn.Sequential(
nn.Linear(3, 5), nn.BatchNorm1d(5), nn.Tanh(), nn.Linear(5, 5), nn.BatchNorm1d(5),
nn.Linear(5, 1)
)
def forward(self, x):
output = self.mlp(x)
return output # return x for visualization
"""
无激活层
"""
# class MLP(nn.Module):
# def __init__(self):
# super(MLP, self).__init__()
# self.mlp = nn.Sequential(
# nn.Linear(5, 5),
# nn.Linear(5, 1)
# )
# def forward(self, x):
# output = self.mlp(x)
# return output # return x for visualization
mlp = MLP()
print(mlp) # net architecture
optimizer = torch.optim.Adam(mlp.parameters(), lr=LR) # optimize all logistic parameters
# optimizer = torch.optim.SGD(mlp.parameters(), lr=LR, momentum=0.9) # optimize all logistic parameters
loss_func = nn.MSELoss() # the target label is not one-hotted
# plt.ion()
# training and testing
a = []
for epoch in range(EPOCH):
for step, (b_x, b_y) in enumerate(train_loader): # gives batch data, normalize x when iterate train_loader
# print(b_x.size())
b_x = b_x.view(-1, 3)
# print(b_x.size())
output = mlp(b_x) # logistic output
loss = loss_func(output, b_y) # cross entropy loss
optimizer.zero_grad() # clear gradients for this training step
loss.backward() # backpropagation, compute gradients
optimizer.step() # apply gradients
if step % 50 == 0:
test_output = mlp(test_x.view(-1, 3))
# accuracy = abs(test_output-test_y)
# print(accuracy)
accuracy = (((abs(test_output.detach().numpy()-test_y.numpy()) < 0.1)).sum())/len(test_y.numpy())
# print(accuracy)
# accuracy = float(abs(pred_y-test_y.numpy()).astype(int).sum()) / float(test_y.numpy().sum())
print('Epoch: ', round(epoch), '| train loss: %.4f' % loss.data.numpy(), '| test accuracy:%.2f' % accuracy)
a.append(accuracy)
# print('Epoch: ', epoch, '| train loss: %.4f' % loss.data.numpy())
# if HAS_SK:
# # Visualization of trained flatten layer (T-SNE)
# tsne = TSNE(perplexity=30, n_components=2, init='pca', n_iter=5000)
# plot_only = 500
# low_dim_embs = tsne.fit_transform(last_layer.data.numpy()[:plot_only, :])
# labels = test_y.numpy()[:plot_only]
# plot_with_labels(low_dim_embs, labels)
# plt.ioff()
# print 10 predictions from test data
test_output = mlp(test_x[:355].view(-1, 3))
# pred_y = torch.max(test_output, 1)[1].data.numpy()
pred_y = test_output.data.numpy()
pred_y = pred_y.flatten()
print(pred_y, 'prediction number')
print(test_y[:355].numpy(), 'real number')
v=a[-1]
print(a)
import matplotlib.pyplot as plt
plt.rc("font", family='KaiTi')
plt.figure()
f, axes = plt.subplots(1, 1)
x = np.arange(1, 356)
# axes.plot(x , pred_y)
axes.scatter(x, pred_y, c='r', s=3, marker = 'o')
plt.axhline(36.7, c ='g')
axes.set_xlabel("位置点位")
axes.set_ylabel("预测值")
axes.set_title("矫正网络预测结果")
axes.set_ylim((36, 37))
plt.savefig("result.png")
plt.legend(['真实值36.7℃', '预测值'], loc='upper left')
row_labels = ['准确率:']
col_labels = ['数值']
table_vals = [['{:.2f}%'.format(v*100)]]
row_colors = ['gold']
my_table = plt.table(cellText=table_vals, colWidths=[0.1] * 5,
rowLabels=row_labels, rowColours=row_colors, loc='best')
plt.show()
\