手搓线性回归<房价预测>

45 阅读1分钟

背景:假设房价与对应的相关特征(地段, 学校, 犯罪率...)符合线性关系

加载数据

import panda as pd
from sklearn.model_selection import train_test_split

data = data = pd.read_csv("boston_house_prices.csv", skiprows=1)
data = data.toNumpy()
X = data[:,:-1]
y = data[:,-1]

切分数据

X_train, X_test, y_train, y_test = train_test_split(X,y test_size=0.2, randomState=0)

规范化

mu = X_train.mean(axis=0) //均值
sigma = X_train.std(axis=0) // 标准差
X_train = (X_train-mu)/sigma
X_test = (X_test-mu)/sigma

定义权重

improt torch
w = torch.randn(30, 1, requires_gard=True) //30个特征, 1个输出
b = torch.zeros(1,1,requires_gard=True) // 1个输出

数据转张量

X_train = torch.tensor(X_train, dtype=torch.float32)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.float32)

定义模型

//线性模型
def model(X):
    return X @ w + b

过程监控

def get_acc(X, y):
    with torch.no_grad():
        #正向传播
         y_pred = model(X)
        
        #MSE
        acc = (y_pred-y)**2.mean()
        return acc

开始训练

steps = 1000  //循环次数
learning_rate = 1e-3 // 学习率, 防止梯度震荡
X_train_acc = get_acc(X_train, y_train)
X_test_acc = get_acc(X_test, y_test)
print(f"训练前X_train_acc:{X_train_acc}")
print(f"训练前X_test_acc:{X_test_acc}")
for step in range(steps):
    #正向传播
    y_pred = model(X_train)
    #计算损失
    loss = ((y_pred - y_train)**2).mean()
    #方向传播 =》 求导
    loss.backward()
    #梯度下降
    w.data -= learning_rate*w.grad
    b.data -= learning_rate*b.grad
    #清空梯度
    w.grad.zero_()
    b.grad.zero_()
    X_train_acc = get_acc(X_train, y_train)
    X_test_acc = get_acc(X_test, y_test)
    print(f"训练{step+1}步后X_train_acc:{X_train_acc}")
    print(f"训练{step+1}步后X_test_acc:{X_test_acc}")
    print(loss.item())