线性回归
先来一个问题
求的一个合理的y = ax + b
就要找到⬇️⬇️⬇️⬇️⬇️⬇️
⬆️⬆️⬆️⬆️⬆️⬆️损失函数的最小值
好,我们就可以再复习一次,最小二乘法的推倒。前面人工智能必备数学知识-线性回归里有,这是数学的方法。那么在人工智能里机器学习里用什么方法呢
梯度下降法
再来个问题
理解一下
这是个循环
每次根据一定的步长,去找下一个点的斜率,一直到收敛
机器学习处理问题实战
那么先来看看机器学习问题的基本流程
第一题
单因子房价预测
# 数据加载
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
# 获取数据
# import pandas as pd
data = pd.read_csv('task1_data.csv')
data.head()
print(type(data))
# x,y赋值
x = data.loc[:, '面积']
x.head()
y = data.loc[:, '房价']
y.head()
# 数据可视化
# from matplotlib import pyplot as plt
fig1 = plt.figure()
plt.scatter(x, y)
plt.show()
# 数据预处理
print(type(x))
x = np.array(x)
print(type(x), x.shape)
y = np.array(y)
# 维度转化
# import numpy as np
x = x.reshape(-1, 1)
print(type(x), x.shape)
y = y.reshape(-1, 1)
# 简历模型
model = LinearRegression()
print(model)
# 模型训练
# from sklearn.linear_model import LinearRegression
model.fit(x, y)
# 获取线性回归模型的参数 a,b
a = model.coef_
b = model.intercept_
print(a, b)
# 预测结果对比
y_predict = a * x + b
print(type(y_predict), y_predict)
y_predict2 = model.predict(x)
print(y_predict2 == y_predict)
# 数据预测
# 测试样本x_test = 100,计算y
x_test = np.array([100])
x_test = x_test.reshape(-1, 1)
y_test_predict = model.predict(x_test)
print(y_test_predict)
# 数据可视化
fig2 = plt.figure()
plt.scatter(x, y)
plt.plot(x, y_predict, label='y_predict')
plt.xlabel('size(x)')
plt.ylabel('price(y)')
plt.legend()
plt.show()
# 模型评估
# from sklearn.metrics import mean_squared_error, r2_score
MSE = mean_squared_error(y, y_predict)
R2 = r2_score(y, y_predict)
print(MSE)
print(R2)
第二题
多因子房价预测
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
# 数据加载
# import pandas as pd
data = pd.read_csv('task2_data.csv')
data.head()
# 数据赋值及可视化
# form matplotlib import pyplot as plt
fig = plt.figure(figsize=(20, 5))
fig1 = plt.subplot(131)
plt.scatter(data.loc[:, '面积'], data.loc[:, '价格'], )
plt.title('Price VS Size')
fig2 = plt.subplot(132)
plt.scatter(data.loc[:, '人均收入'], data.loc[:, '价格'], )
plt.title('Price VS Income')
fig3 = plt.subplot(133)
plt.scatter(data.loc[:, '平均房龄'], data.loc[:, '价格'], )
plt.title('Price VS House_age')
plt.show()
# 面积价格单因子模型
# x, y赋值
x = data.loc[:, '面积']
y = data.loc[:, '价格']
x.head()
y.head()
# 数据预处理
# import numpy as np
x = np.array(x).reshape(-1, 1)
y = np.array(y).reshape(-1, 1)
print(x.shape, y.shape)
# 建立模型
# from sklearn.linear_model import LinearRegression
model = LinearRegression()
# 训练模型
model.fit(x, y)
a = model.coef_
b = model.intercept_
print(a, b)
# 建立预测数据
y_predict = model.predict(x)
print(y_predict)
# 数据可视化
fig4 = plt.figure()
plt.scatter(x, y)
plt.plot(x, y_predict, 'r')
plt.show()
# 模型评估
# from sklearn.metrics import mean_squared_error, r2_score
MSE = mean_squared_error(y, y_predict)
R2 = r2_score(y, y_predict)
print(MSE)
print(R2)
# 简历多因子模型
# x,y 赋值
y = y
x = data.drop(['价格'], axis=1)
x.head()
print(x.shape)
# 建立模型
model_multi = LinearRegression()
# 训练模型
model_multi.fit(x, y)
# 建立预测数据
y_predict_multi = model_multi.predict(x)
# 模型评估
MSE = mean_squared_error(y, y_predict)
R2 = r2_score(y, y_predict)
print(MSE)
print(R2)
# 数据可视化
# 三维x不好可视化,对比两次的y
fig5 = plt.figure()
plt.scatter(y, y_predict_multi)
plt.xlabel('real price(y)')
plt.ylabel('predicted price(x)')
plt.show()
# 对比单因子的,y,y_predict对比
fig6 = plt.figure()
plt.scatter(y, y_predict)
plt.xlabel('real price(y)')
plt.ylabel('predicted price(x)')
plt.show()
# 数据预测
x_test = np.array([160, 70000, 5]).reshape(1, -1)
print(x_test)
print(x_test.shape)
y_test_predict = model_multi.predict(x_test)
print(y_test_predict)