人工智能-线性回归模型

503 阅读2分钟

线性回归

先来一个问题

image.png

求的一个合理的y = ax + b

就要找到⬇️⬇️⬇️⬇️⬇️⬇️

image.png

⬆️⬆️⬆️⬆️⬆️⬆️损失函数的最小值

好,我们就可以再复习一次,最小二乘法的推倒。前面人工智能必备数学知识-线性回归里有,这是数学的方法。那么在人工智能里机器学习里用什么方法呢

梯度下降法

再来个问题

image.png

image.png

理解一下

image.png

image.png

这是个循环

image.png

每次根据一定的步长,去找下一个点的斜率,一直到收敛

机器学习处理问题实战

那么先来看看机器学习问题的基本流程

image.png

第一题

image.png

单因子房价预测

# 数据加载
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score


# 获取数据
# import pandas as pd

data = pd.read_csv('task1_data.csv')
data.head()
print(type(data))

# x,y赋值

x = data.loc[:, '面积']
x.head()
y = data.loc[:, '房价']
y.head()

# 数据可视化
# from matplotlib import pyplot as plt

fig1 = plt.figure()
plt.scatter(x, y)
plt.show()

# 数据预处理

print(type(x))
x = np.array(x)
print(type(x), x.shape)

y = np.array(y)

# 维度转化
# import numpy as np
x = x.reshape(-1, 1)
print(type(x), x.shape)

y = y.reshape(-1, 1)

# 简历模型

model = LinearRegression()

print(model)

# 模型训练
# from sklearn.linear_model import LinearRegression

model.fit(x, y)

# 获取线性回归模型的参数 a,b


a = model.coef_
b = model.intercept_

print(a, b)

# 预测结果对比

y_predict = a * x + b
print(type(y_predict), y_predict)
y_predict2 = model.predict(x)
print(y_predict2 == y_predict)

# 数据预测

# 测试样本x_test = 100,计算y

x_test = np.array([100])
x_test = x_test.reshape(-1, 1)
y_test_predict = model.predict(x_test)

print(y_test_predict)

# 数据可视化

fig2 = plt.figure()
plt.scatter(x, y)
plt.plot(x, y_predict, label='y_predict')
plt.xlabel('size(x)')
plt.ylabel('price(y)')
plt.legend()
plt.show()

# 模型评估

# from sklearn.metrics import mean_squared_error, r2_score

MSE = mean_squared_error(y, y_predict)

R2 = r2_score(y, y_predict)

print(MSE)
print(R2)  

第二题

image.png

多因子房价预测

import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# 数据加载
# import pandas as pd

data = pd.read_csv('task2_data.csv')
data.head()

# 数据赋值及可视化
# form matplotlib import pyplot as plt

fig = plt.figure(figsize=(20, 5))
fig1 = plt.subplot(131)
plt.scatter(data.loc[:, '面积'], data.loc[:, '价格'], )
plt.title('Price VS Size')
fig2 = plt.subplot(132)
plt.scatter(data.loc[:, '人均收入'], data.loc[:, '价格'], )
plt.title('Price VS Income')
fig3 = plt.subplot(133)
plt.scatter(data.loc[:, '平均房龄'], data.loc[:, '价格'], )
plt.title('Price VS House_age')
plt.show()

# 面积价格单因子模型

# x, y赋值

x = data.loc[:, '面积']
y = data.loc[:, '价格']
x.head()
y.head()

# 数据预处理
# import numpy as np

x = np.array(x).reshape(-1, 1)
y = np.array(y).reshape(-1, 1)
print(x.shape, y.shape)


# 建立模型
# from sklearn.linear_model import LinearRegression

model = LinearRegression()

# 训练模型

model.fit(x, y)


a = model.coef_
b = model.intercept_

print(a, b)


# 建立预测数据
y_predict = model.predict(x)
print(y_predict)


# 数据可视化

fig4 = plt.figure()
plt.scatter(x, y)
plt.plot(x, y_predict, 'r')
plt.show()


# 模型评估
# from sklearn.metrics import mean_squared_error, r2_score

MSE = mean_squared_error(y, y_predict)
R2 = r2_score(y, y_predict)

print(MSE)
print(R2)



# 简历多因子模型

# x,y 赋值
y = y
x = data.drop(['价格'], axis=1)
x.head()
print(x.shape)


# 建立模型
model_multi = LinearRegression()

# 训练模型
model_multi.fit(x, y)

# 建立预测数据
y_predict_multi = model_multi.predict(x)


# 模型评估

MSE = mean_squared_error(y, y_predict)
R2 = r2_score(y, y_predict)

print(MSE)
print(R2)


# 数据可视化

# 三维x不好可视化,对比两次的y
fig5 = plt.figure()
plt.scatter(y, y_predict_multi)

plt.xlabel('real price(y)')
plt.ylabel('predicted price(x)')
plt.show()

# 对比单因子的,y,y_predict对比
fig6 = plt.figure()
plt.scatter(y, y_predict)
plt.xlabel('real price(y)')
plt.ylabel('predicted price(x)')
plt.show()


# 数据预测

x_test = np.array([160, 70000, 5]).reshape(1, -1)
print(x_test)
print(x_test.shape)
y_test_predict = model_multi.predict(x_test)
print(y_test_predict)