机器学习跟普通学习的逻辑是一样的
唐诗300首,1遍学不明白,学10遍,10遍学不明白学100遍
1次学300首太多了,分10批,每批30首,就是300首
代码实现 机器学习 梯度下降中 分轮次、批次训练模型
重点是:
- 我们要植入 epochs 轮次,batch_size 批次 的概念
- 每个轮次要打乱 训练数据 的下标,使X_train , y_train 随机
- 利用双层 for 循环,外层每一轮次(epochs),内部走完所有批次(batchs)
- 每批训练中 会 求梯度 并 更新参数
- 每次末尾 求得的 loss 损失 要与 每轮开始的 loss损失 取差值 得到 delta_loss
from sklearn.datasets import load_boston
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
# 学习率
learning_rate = 0.000000001
# 学习轮次
num_epoches = 500000
# 每批样本数
batch_size = 100
# 收敛阈值
threshold = 1e-4
# 导入X,y数据
X, y = load_boston(return_X_y=True)
# 506行,13列
m, n = X.shape[0], X.shape[1]
# 头部拼接(全为1的)一列,506行,14列
X = np.concatenate((np.ones((m, 1)), X), axis=1)
# 把1行506列的一维数据y,转成506行1列的二维数据
y = np.reshape(y, (m, 1))
# 把全量数据X按照0.33的比例切割训练训练数据和测试数据
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)
# 计数器,记录累计掉入阈值区间次数
count = 0
# 根据训练数据的总样本数和每批训练的样本数,求批次
num_batches = int(np.ceil(X_train.shape[0] / batch_size))
# 1. 随机参数
W = np.random.rand(n+1, 1)
# 一共学习多少轮
for epoch in range(num_epoches):
# 生成训练数据的下标数组
indexes = np.arange(X_train.shape[0])
# 随机打乱下标数组
np.random.shuffle(indexes)
# 随机X训练数据
X_train = X_train[indexes]
# 对应的随机y训练数据
y_train = y_train[indexes]
# 损失值 = (预测值 - 真实值)的均方差
previous_loss = mean_squared_error(y_true=y_train, y_pred=np.dot(X_train, W))
# 每100轮打印当前的损失值
if epoch % 100 == 0:
print("当前轮次是: %s, 当前的损失是: %s" % (epoch, previous_loss))
# 每轮要分几批去训练(假设5批)
for batch in range(num_batches):
# 每一批的X数据和y数据
X_batch = X_train[batch*batch_size:(batch+1)*batch_size]
y_batch = y_train[batch*batch_size:(batch+1)*batch_size]
# 2. 求梯度
# 参数(W)更新前,每批的y预测数据
previous_y_batch_hat = np.dot(X_batch, W)
# 梯度的求导公式(某一轮某一批的梯度向量)
gradients = np.dot(X_batch.T, (previous_y_batch_hat - y_batch))
# 3. 更新参数
W = W - learning_rate * gradients
# 4. 判断收敛
# 参数(W)更新后,每批的y预测数据
current_y_batch_hat = np.dot(X_batch, W)
# 参数(W)更新后,当前的损失值
current_loss = mean_squared_error(y_true=y_batch, y_pred=current_y_batch_hat)
delta_loss = previous_loss - current_loss
if 0 < delta_loss < threshold:
count += 1
else:
count = 0
if count >= 10:
print("当前的轮次是: %s, 当前的批次是: %s" % (epoch, batch))
break
loss = current_loss
# 打印参数
print(W)
# 用训练出的参数测试训练数据的损失值
y_hat_train = np.dot(X_train, W)
mes_train = mean_squared_error(y_true=y_train, y_pred=y_hat_train)
print("mes_train: %s" % mes_train)
# 用训练出的参数测试测试数据的损失值
y_hat_test = np.dot(X_test, W)
mes_test = mean_squared_error(y_true=y_test, y_pred=y_hat_test)
print("mes_test: %s" % mes_test)