机器学习 房产估价

223 阅读14分钟
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
data1 = pd.read_csv('lianjia1.csv', encoding='gbk')
data2 = pd.read_csv('lianjia2.csv', encoding='gbk')
data3 = pd.read_csv('lianjia3.csv', encoding='utf-8')
data4 = pd.read_csv('lianjia4.csv', encoding='utf-8')
data5 = pd.read_csv('lianjia5.csv', encoding='utf-8')
data6 = pd.read_csv('lianjia6.csv', encoding='utf-8')
data7 = pd.read_csv('lianjia7.csv', encoding='utf-8')
data = pd.concat([data1, data2, data3, data4, data5,data6,data7])
data.dropna(inplace=True)
np.unique(data.cjshijian.str.contains('2015-'))
array([False,  True])
data = data[data.cjshijian.str.contains('2015-')]
data = data[['cjdanjia','cjxiaoqu','cjlouceng']]
data.head(2)
cjdanjia cjxiaoqu cjlouceng
0 43997元/平 红莲北里 3室1厅 57平 南 北/高楼层/6层
1 36969元/平 红莲南里 1室1厅 43平 南/高楼层/7层
data['cjdanjia'] = data.cjdanjia.str.replace('元/平','').astype(np.float32).map(lambda x : round(x/10000, 2))
data = data[data.cjxiaoqu.str.split().map(len) ==3]
data = data.assign(xiaoqu= data.cjxiaoqu.map(lambda x :x.split()[0]))
data = data.assign(huxing= data.cjxiaoqu.map(lambda x :x.split()[1]))
data = data.assign(mianji= data.cjxiaoqu.map(lambda x :x.split()[2]))
del data['cjxiaoqu']
data = data.assign(chaoxiang = data.cjlouceng.map(lambda x:x.split('/')[0]))
data = data.assign(louceng = data.cjlouceng.map(lambda x:x.split('/')[1]))
del data['cjlouceng']
top15 = data.xiaoqu.value_counts()[:15].index
top15
Index(['新龙城', '北京新天地', '北京像素南区', '远洋山水', '芍药居北里', '天通西苑三区', '荣丰2008', '天通苑东一区',
       '天通苑中苑', '北京像素北区', '青年汇佳园', '海特花园小区', '天通西苑二区', '东亚上北中心', '沿海赛洛城'],
      dtype='object')
data = data[data.xiaoqu.isin(top15)]
data.head()
cjdanjia xiaoqu huxing mianji chaoxiang louceng
31 6.62 荣丰2008 1室1厅 32平 低楼层
347 3.97 远洋山水 1室--厅 56平 中楼层
388 3.01 北京像素北区 2室1厅 57平 西南 低楼层
716 2.90 北京像素北区 2室1厅 58平 东北 中楼层
260 3.81 沿海赛洛城 1室1厅 64平 低楼层
data['mianji'] = data.mianji.str.replace('平','').astype(np.float32)
data.head(3)
cjdanjia xiaoqu huxing mianji chaoxiang louceng
31 6.62 荣丰2008 1室1厅 32.0 低楼层
347 3.97 远洋山水 1室--厅 56.0 中楼层
388 3.01 北京像素北区 2室1厅 57.0 西南 低楼层
data.chaoxiang.unique()
array(['南', '东', '西南', '东北', '南 北', '西 南', '东 南', '东 南 北', '西北', '东 北',
       '东 西', '西', '北', '西 北', '东南', '南 西', '东 南 西', '西 南 北', '南 北 西',
       '东 西 北', '南 西 北', '西南 东北', '南 北 东', '暂无数据', '北 东南', '北 西南',
       '东 西 南', '东 北 南'], dtype=object)
data = data[data.chaoxiang != '暂无数据']
data = data.join(pd.get_dummies(data[['xiaoqu','huxing','louceng']]))
data
cjdanjia xiaoqu huxing mianji chaoxiang louceng xiaoqu_东亚上北中心 xiaoqu_北京像素北区 xiaoqu_北京像素南区 xiaoqu_北京新天地 ... huxing_4室--厅 huxing_4室1厅 huxing_4室2厅 huxing_5室--厅 huxing_5室1厅 huxing_5室2厅 louceng_中楼层 louceng_低楼层 louceng_地下室 louceng_高楼层
31 6.62 荣丰2008 1室1厅 32.0 低楼层 0 0 0 0 ... 0 0 0 0 0 0 0 1 0 0
148 3.86 芍药居北里 1室1厅 43.0 地下室 0 0 0 0 ... 0 0 0 0 0 0 0 0 1 0
260 3.81 沿海赛洛城 1室1厅 64.0 低楼层 0 0 0 0 ... 0 0 0 0 0 0 0 1 0 0
261 3.32 沿海赛洛城 1室1厅 57.0 中楼层 0 0 0 0 ... 0 0 0 0 0 0 1 0 0 0
285 3.96 沿海赛洛城 2室2厅 105.0 南 北 中楼层 0 0 0 0 ... 0 0 0 0 0 0 1 0 0 0
286 3.50 沿海赛洛城 1室1厅 51.0 低楼层 0 0 0 0 ... 0 0 0 0 0 0 0 1 0 0
342 2.84 东亚上北中心 1室--厅 41.0 中楼层 1 0 0 0 ... 0 0 0 0 0 0 1 0 0 0
347 3.97 远洋山水 1室--厅 56.0 中楼层 0 0 0 0 ... 0 0 0 0 0 0 1 0 0 0
362 2.99 新龙城 2室1厅 95.0 南 北 低楼层 0 0 0 0 ... 0 0 0 0 0 0 0 1 0 0
365 2.80 新龙城 2室1厅 103.0 南 北 高楼层 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 1
366 3.00 新龙城 2室2厅 101.0 中楼层 0 0 0 0 ... 0 0 0 0 0 0 1 0 0 0
367 3.45 新龙城 1室1厅 70.0 低楼层 0 0 0 0 ... 0 0 0 0 0 0 0 1 0 0
368 2.90 新龙城 2室2厅 97.0 东 西 中楼层 0 0 0 0 ... 0 0 0 0 0 0 1 0 0 0
370 3.35 新龙城 3室2厅 110.0 南 北 高楼层 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 1
371 2.98 新龙城 2室2厅 100.0 南 北 高楼层 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 1
372 3.36 新龙城 2室2厅 86.0 南 西 中楼层 0 0 0 0 ... 0 0 0 0 0 0 1 0 0 0
374 3.11 新龙城 3室2厅 128.0 南 北 西 低楼层 0 0 0 0 ... 0 0 0 0 0 0 0 1 0 0
375 2.86 新龙城 2室1厅 108.0 南 北 中楼层 0 0 0 0 ... 0 0 0 0 0 0 1 0 0 0
376 2.85 新龙城 2室1厅 100.0 南 北 低楼层 0 0 0 0 ... 0 0 0 0 0 0 0 1 0 0
388 3.01 北京像素北区 2室1厅 57.0 西南 低楼层 0 1 0 0 ... 0 0 0 0 0 0 0 1 0 0
388 3.01 北京像素北区 2室1厅 57.0 西南 低楼层 0 0 0 0 ... 0 0 0 0 0 0 1 0 0 0
388 2.78 海特花园小区 2室1厅 74.0 东 西 中楼层 0 1 0 0 ... 0 0 0 0 0 0 0 1 0 0
388 2.78 海特花园小区 2室1厅 74.0 东 西 中楼层 0 0 0 0 ... 0 0 0 0 0 0 1 0 0 0
403 2.70 北京新天地 2室1厅 89.0 西 北 中楼层 0 0 0 1 ... 0 0 0 0 0 0 1 0 0 0
415 3.76 北京新天地 2室1厅 95.0 东南 高楼层 0 0 0 1 ... 0 0 0 0 0 0 0 0 0 1
416 3.04 北京新天地 2室1厅 91.0 高楼层 0 0 0 1 ... 0 0 0 0 0 0 0 0 0 1
417 3.21 北京新天地 2室1厅 88.0 东 北 低楼层 0 0 0 1 ... 0 0 0 0 0 0 0 1 0 0
418 2.90 北京新天地 3室2厅 127.0 西南 低楼层 0 0 0 1 ... 0 0 0 0 0 0 0 1 0 0
419 2.74 北京新天地 1室1厅 68.0 东 北 低楼层 0 0 0 1 ... 0 0 0 0 0 0 0 1 0 0
420 2.98 北京新天地 1室1厅 61.0 中楼层 0 0 0 1 ... 0 0 0 0 0 0 1 0 0 0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
55606 3.05 北京像素南区 2室1厅 55.0 西南 中楼层 0 0 1 0 ... 0 0 0 0 0 0 1 0 0 0
55607 3.06 北京像素南区 2室1厅 50.0 中楼层 0 0 1 0 ... 0 0 0 0 0 0 1 0 0 0
55608 3.16 北京像素南区 2室2厅 50.0 西 低楼层 0 0 1 0 ... 0 0 0 0 0 0 0 1 0 0
55633 3.04 北京像素北区 2室1厅 56.0 南 西 高楼层 0 1 0 0 ... 0 0 0 0 0 0 0 0 0 1
55634 2.94 北京像素南区 4室1厅 52.0 东北 低楼层 0 0 1 0 ... 0 1 0 0 0 0 0 1 0 0
55635 2.91 北京像素南区 2室1厅 55.0 南 西 低楼层 0 0 1 0 ... 0 0 0 0 0 0 0 1 0 0
55636 3.01 北京像素南区 1室1厅 51.0 低楼层 0 0 1 0 ... 0 0 0 0 0 0 0 1 0 0
55637 3.02 北京像素北区 2室1厅 58.0 西南 中楼层 0 1 0 0 ... 0 0 0 0 0 0 1 0 0 0
55638 3.27 北京像素北区 2室1厅 41.0 中楼层 0 1 0 0 ... 0 0 0 0 0 0 1 0 0 0
55639 2.81 北京像素南区 1室1厅 49.0 中楼层 0 0 1 0 ... 0 0 0 0 0 0 1 0 0 0
55641 2.73 北京像素南区 4室--厅 50.0 低楼层 0 0 1 0 ... 1 0 0 0 0 0 0 1 0 0
55642 3.12 北京像素北区 1室1厅 40.0 西 北 中楼层 0 1 0 0 ... 0 0 0 0 0 0 1 0 0 0
56119 2.09 北京像素南区 1室--厅 50.0 中楼层 0 0 1 0 ... 0 0 0 0 0 0 1 0 0 0
56120 3.58 北京像素北区 2室1厅 60.0 南 北 中楼层 0 1 0 0 ... 0 0 0 0 0 0 1 0 0 0
56237 3.19 新龙城 3室2厅 128.0 南 北 西 中楼层 0 0 0 0 ... 0 0 0 0 0 0 1 0 0 0
56238 2.66 东亚上北中心 1室1厅 57.0 中楼层 1 0 0 0 ... 0 0 0 0 0 0 1 0 0 0
56513 3.64 沿海赛洛城 1室2厅 70.0 东 西 中楼层 0 0 0 0 ... 0 0 0 0 0 0 1 0 0 0
56514 3.79 沿海赛洛城 1室1厅 70.0 高楼层 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 1
56515 3.16 沿海赛洛城 2室1厅 115.0 南 北 高楼层 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 1
56518 3.60 沿海赛洛城 1室1厅 72.0 南 西 高楼层 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 1
56519 3.23 沿海赛洛城 1室1厅 55.0 高楼层 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 1
56520 3.59 沿海赛洛城 1室1厅 55.0 低楼层 0 0 0 0 ... 0 0 0 0 0 0 0 1 0 0
56521 3.59 沿海赛洛城 2室1厅 96.0 西南 中楼层 0 0 0 0 ... 0 0 0 0 0 0 1 0 0 0
56700 2.59 东亚上北中心 1室--厅 37.0 西 中楼层 1 0 0 0 ... 0 0 0 0 0 0 1 0 0 0
56702 2.43 东亚上北中心 1室1厅 57.0 西 低楼层 1 0 0 0 ... 0 0 0 0 0 0 0 1 0 0
56956 3.59 新龙城 1室1厅 56.0 低楼层 0 0 0 0 ... 0 0 0 0 0 0 0 1 0 0
56957 2.20 天通苑东一区 2室1厅 94.0 东南 中楼层 0 0 0 0 ... 0 0 0 0 0 0 1 0 0 0
56960 3.11 新龙城 1室2厅 70.0 低楼层 0 0 0 0 ... 0 0 0 0 0 0 0 1 0 0
56961 3.18 新龙城 3室2厅 111.0 南 北 高楼层 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 1
56962 2.83 新龙城 2室2厅 98.0 东 西 中楼层 0 0 0 0 ... 0 0 0 0 0 0 1 0 0 0

1852 rows × 42 columns

data['dong'] = (data.chaoxiang.map(lambda x : '东' in x.split())).astype(np.int32)
data['xi'] = (data.chaoxiang.map(lambda x : '西' in x.split())).astype(np.int32)
data['nan'] = (data.chaoxiang.map(lambda x : '南' in x.split())).astype(np.int32)
data['bei'] = (data.chaoxiang.map(lambda x : '北' in x.split())).astype(np.int32)
data['dongnan'] = (data.chaoxiang.map(lambda x : '东南' in x.split())).astype(np.int32)
data['xinan'] = (data.chaoxiang.map(lambda x : '西南' in x.split())).astype(np.int32)
data['dongbei'] = (data.chaoxiang.map(lambda x : '东北' in x.split())).astype(np.int32)
data['xibei'] = (data.chaoxiang.map(lambda x : '西北' in x.split())).astype(np.int32)
data.columns
Index(['cjdanjia', 'xiaoqu', 'huxing', 'mianji', 'chaoxiang', 'louceng',
       'xiaoqu_东亚上北中心', 'xiaoqu_北京像素北区', 'xiaoqu_北京像素南区', 'xiaoqu_北京新天地',
       'xiaoqu_天通苑东一区', 'xiaoqu_天通苑中苑', 'xiaoqu_天通西苑三区', 'xiaoqu_天通西苑二区',
       'xiaoqu_新龙城', 'xiaoqu_沿海赛洛城', 'xiaoqu_海特花园小区', 'xiaoqu_芍药居北里',
       'xiaoqu_荣丰2008', 'xiaoqu_远洋山水', 'xiaoqu_青年汇佳园', 'huxing_1室--厅',
       'huxing_1室1厅', 'huxing_1室2厅', 'huxing_2室--厅', 'huxing_2室1厅',
       'huxing_2室2厅', 'huxing_2室3厅', 'huxing_3室--厅', 'huxing_3室1厅',
       'huxing_3室2厅', 'huxing_3室3厅', 'huxing_4室--厅', 'huxing_4室1厅',
       'huxing_4室2厅', 'huxing_5室--厅', 'huxing_5室1厅', 'huxing_5室2厅',
       'louceng_中楼层', 'louceng_低楼层', 'louceng_地下室', 'louceng_高楼层', 'dong',
       'xi', 'nan', 'bei', 'dongnan', 'xinan', 'dongbei', 'xibei'],
      dtype='object')
data.drop(data.columns[[1,2,4,5]], axis=1, inplace=True)
data
cjdanjia mianji xiaoqu_东亚上北中心 xiaoqu_北京像素北区 xiaoqu_北京像素南区 xiaoqu_北京新天地 xiaoqu_天通苑东一区 xiaoqu_天通苑中苑 xiaoqu_天通西苑三区 xiaoqu_天通西苑二区 ... louceng_地下室 louceng_高楼层 dong xi nan bei dongnan xinan dongbei xibei
31 6.62 32.0 0 0 0 0 0 0 0 0 ... 0 0 0 0 1 0 0 0 0 0
148 3.86 43.0 0 0 0 0 0 0 0 0 ... 1 0 0 0 1 0 0 0 0 0
260 3.81 64.0 0 0 0 0 0 0 0 0 ... 0 0 1 0 0 0 0 0 0 0
261 3.32 57.0 0 0 0 0 0 0 0 0 ... 0 0 1 0 0 0 0 0 0 0
285 3.96 105.0 0 0 0 0 0 0 0 0 ... 0 0 0 0 1 1 0 0 0 0
286 3.50 51.0 0 0 0 0 0 0 0 0 ... 0 0 1 0 0 0 0 0 0 0
342 2.84 41.0 1 0 0 0 0 0 0 0 ... 0 0 0 0 1 0 0 0 0 0
347 3.97 56.0 0 0 0 0 0 0 0 0 ... 0 0 1 0 0 0 0 0 0 0
362 2.99 95.0 0 0 0 0 0 0 0 0 ... 0 0 0 0 1 1 0 0 0 0
365 2.80 103.0 0 0 0 0 0 0 0 0 ... 0 1 0 0 1 1 0 0 0 0
366 3.00 101.0 0 0 0 0 0 0 0 0 ... 0 0 0 0 1 0 0 0 0 0
367 3.45 70.0 0 0 0 0 0 0 0 0 ... 0 0 0 0 1 0 0 0 0 0
368 2.90 97.0 0 0 0 0 0 0 0 0 ... 0 0 1 1 0 0 0 0 0 0
370 3.35 110.0 0 0 0 0 0 0 0 0 ... 0 1 0 0 1 1 0 0 0 0
371 2.98 100.0 0 0 0 0 0 0 0 0 ... 0 1 0 0 1 1 0 0 0 0
372 3.36 86.0 0 0 0 0 0 0 0 0 ... 0 0 0 1 1 0 0 0 0 0
374 3.11 128.0 0 0 0 0 0 0 0 0 ... 0 0 0 1 1 1 0 0 0 0
375 2.86 108.0 0 0 0 0 0 0 0 0 ... 0 0 0 0 1 1 0 0 0 0
376 2.85 100.0 0 0 0 0 0 0 0 0 ... 0 0 0 0 1 1 0 0 0 0
388 3.01 57.0 0 1 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 1 0 0
388 3.01 57.0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 1 0 0
388 2.78 74.0 0 1 0 0 0 0 0 0 ... 0 0 1 1 0 0 0 0 0 0
388 2.78 74.0 0 0 0 0 0 0 0 0 ... 0 0 1 1 0 0 0 0 0 0
403 2.70 89.0 0 0 0 1 0 0 0 0 ... 0 0 0 1 0 1 0 0 0 0
415 3.76 95.0 0 0 0 1 0 0 0 0 ... 0 1 0 0 0 0 1 0 0 0
416 3.04 91.0 0 0 0 1 0 0 0 0 ... 0 1 0 0 1 0 0 0 0 0
417 3.21 88.0 0 0 0 1 0 0 0 0 ... 0 0 1 0 0 1 0 0 0 0
418 2.90 127.0 0 0 0 1 0 0 0 0 ... 0 0 0 0 0 0 0 1 0 0
419 2.74 68.0 0 0 0 1 0 0 0 0 ... 0 0 1 0 0 1 0 0 0 0
420 2.98 61.0 0 0 0 1 0 0 0 0 ... 0 0 1 0 0 0 0 0 0 0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
55606 3.05 55.0 0 0 1 0 0 0 0 0 ... 0 0 0 0 0 0 0 1 0 0
55607 3.06 50.0 0 0 1 0 0 0 0 0 ... 0 0 0 0 0 1 0 0 0 0
55608 3.16 50.0 0 0 1 0 0 0 0 0 ... 0 0 0 1 0 0 0 0 0 0
55633 3.04 56.0 0 1 0 0 0 0 0 0 ... 0 1 0 1 1 0 0 0 0 0
55634 2.94 52.0 0 0 1 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 1 0
55635 2.91 55.0 0 0 1 0 0 0 0 0 ... 0 0 0 1 1 0 0 0 0 0
55636 3.01 51.0 0 0 1 0 0 0 0 0 ... 0 0 0 0 0 1 0 0 0 0
55637 3.02 58.0 0 1 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 1 0 0
55638 3.27 41.0 0 1 0 0 0 0 0 0 ... 0 0 0 0 0 1 0 0 0 0
55639 2.81 49.0 0 0 1 0 0 0 0 0 ... 0 0 0 0 0 1 0 0 0 0
55641 2.73 50.0 0 0 1 0 0 0 0 0 ... 0 0 0 0 0 1 0 0 0 0
55642 3.12 40.0 0 1 0 0 0 0 0 0 ... 0 0 0 1 0 1 0 0 0 0
56119 2.09 50.0 0 0 1 0 0 0 0 0 ... 0 0 0 0 1 0 0 0 0 0
56120 3.58 60.0 0 1 0 0 0 0 0 0 ... 0 0 0 0 1 1 0 0 0 0
56237 3.19 128.0 0 0 0 0 0 0 0 0 ... 0 0 0 1 1 1 0 0 0 0
56238 2.66 57.0 1 0 0 0 0 0 0 0 ... 0 0 0 0 1 0 0 0 0 0
56513 3.64 70.0 0 0 0 0 0 0 0 0 ... 0 0 1 1 0 0 0 0 0 0
56514 3.79 70.0 0 0 0 0 0 0 0 0 ... 0 1 0 0 1 0 0 0 0 0
56515 3.16 115.0 0 0 0 0 0 0 0 0 ... 0 1 0 0 1 1 0 0 0 0
56518 3.60 72.0 0 0 0 0 0 0 0 0 ... 0 1 0 1 1 0 0 0 0 0
56519 3.23 55.0 0 0 0 0 0 0 0 0 ... 0 1 1 0 0 0 0 0 0 0
56520 3.59 55.0 0 0 0 0 0 0 0 0 ... 0 0 1 0 0 0 0 0 0 0
56521 3.59 96.0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 1 0 0
56700 2.59 37.0 1 0 0 0 0 0 0 0 ... 0 0 0 1 0 0 0 0 0 0
56702 2.43 57.0 1 0 0 0 0 0 0 0 ... 0 0 0 1 0 0 0 0 0 0
56956 3.59 56.0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 1 0 0 0 0
56957 2.20 94.0 0 0 0 0 1 0 0 0 ... 0 0 0 0 0 0 1 0 0 0
56960 3.11 70.0 0 0 0 0 0 0 0 0 ... 0 0 0 0 1 0 0 0 0 0
56961 3.18 111.0 0 0 0 0 0 0 0 0 ... 0 1 0 0 1 1 0 0 0 0
56962 2.83 98.0 0 0 0 0 0 0 0 0 ... 0 0 1 1 0 0 0 0 0 0

1852 rows × 46 columns

Y = data.cjdanjia
X = data[[x for x in data.columns if x != 'cjdanjia']]
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X, Y)
from sklearn.linear_model import LinearRegression
model = LinearRegression()
model.fit(X_train, Y_train)
LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)
model.predict(X_test)
array([3.54665262, 1.84432528, 3.51138981, 3.98215708, 3.36580034,
       1.88201581, 3.21580288, 2.33208225, 2.09160868, 4.1281406 ,
       2.36620194, 4.21686701, 3.59944481, 2.06453045, 4.13269   ,
       1.84432576, 2.43646187, 2.88226974, 2.33208225, 3.1939061 ,
       4.03979213, 2.96406722, 1.81346511, 3.06775758, 3.05491474,
       3.1360286 , 3.99916963, 4.32998919, 3.33511616, 1.94227664,
       3.10201174, 3.21006363, 4.68719684, 2.9850422 , 2.53152937,
       2.07719992, 3.1481618 , 2.15467268, 3.08020351, 4.64958521,
       2.64958863, 2.84651008, 4.57702631, 3.4841747 , 4.16648524,
       2.84844517, 2.97104674, 2.61040881, 4.32998919, 2.03134972,
       4.01823018, 1.97383557, 2.41625926, 4.37021131, 1.77878054,
       2.41629925, 3.09124581, 2.37530854, 4.73390401, 4.30862556,
       1.80473525, 3.02924878, 2.83819801, 4.91761246, 4.40271337,
       1.98365363, 3.61851997, 2.8662761 , 4.97066841, 3.00969724,
       2.15917709, 2.79711081, 2.82625599, 5.75809283, 2.4868289 ,
       2.00149098, 4.18132244, 2.32747275, 2.76499757, 2.42745698,
       2.26339241, 3.16802316, 4.35109157, 2.49178652, 2.91376021,
       2.84356037, 1.49141912, 2.27554011, 2.05316992, 4.29595972,
       2.87189315, 4.90403619, 5.74139584, 3.09648816, 3.58979582,
       3.17806313, 1.47075586, 2.06083945, 3.08564913, 2.6397108 ,
       3.06089967, 2.96597686, 2.63565871, 3.20329316, 4.04179007,
       3.17552503, 3.8474278 , 3.12522532, 3.04294964, 5.65158196,
       4.21936542, 2.48125583, 4.50437188, 1.70229024, 2.07846376,
       2.31880324, 3.46787309, 5.21671323, 1.97186316, 3.04086916,
       3.99530084, 2.15917709, 2.91376021, 3.80513772, 2.97910429,
       3.13648847, 4.08180629, 2.71471393, 3.07686345, 2.27709597,
       5.04433487, 5.46478765, 2.73      , 5.81495598, 3.73140732,
       3.02825421, 3.2108307 , 3.42007434, 2.26268721, 2.93784626,
       4.12022006, 1.98955488, 2.4914239 , 1.73998077, 2.19861149,
       2.21302025, 3.84180461, 4.99191227, 3.06621723, 4.30862556,
       4.28137263, 1.49141912, 2.31767349, 2.75904334, 2.95716276,
       2.55429561, 3.82162929, 5.75581726, 3.57589166, 2.36620194,
       2.29128132, 3.10721108, 4.56052941, 2.87053392, 4.71962696,
       1.7939048 , 2.74208975, 1.76956268, 2.41138165, 2.04103672,
       2.85037354, 4.00545245, 3.96797116, 4.63925095, 1.63403144,
       1.78186996, 2.84171639, 2.21918606, 2.83819801, 2.48125583,
       3.50032946, 3.79765893, 4.15787802, 1.8339119 , 2.53200218,
       2.70243355, 3.02025366, 5.71257831, 3.05491474, 2.00068069,
       3.58162687, 2.94462235, 4.03751656, 2.22435034, 2.13035956,
       5.7595965 , 4.07926215, 2.6109347 , 5.14267305, 2.7644082 ,
       3.96172366, 2.17426963, 2.95286696, 1.87986081, 3.01209872,
       2.77455019, 2.95722048, 3.38375406, 2.52907712, 5.85097804,
       4.50701389, 2.27554011, 3.0551991 , 2.79978517, 2.4868289 ,
       1.98787534, 2.93035767, 3.37035682, 1.28330249, 4.02510574,
       5.06547241, 2.74208975, 1.32417066, 5.88439056, 2.77462455,
       3.64819662, 3.8994612 , 2.90551035, 2.96186412, 5.66651034,
       3.92157605, 2.77131832, 2.22295888, 3.60834399, 4.13269   ,
       2.1595355 , 1.62033836, 3.1548615 , 3.45311976, 3.41147678,
       3.0673323 , 1.11023806, 4.06216025, 3.2901411 , 3.66921306,
       2.8792539 , 1.72041791, 4.26206491, 3.37035682, 3.08561177,
       2.89592903, 3.5091269 , 1.95705219, 3.02518262, 4.27124282,
       3.80789055, 4.16719131, 3.92157605, 1.27463079, 2.84739984,
       2.96000937, 1.72893733, 2.68997097, 2.9407321 , 2.74463457,
       5.73803455, 4.90092813, 2.72620481, 2.32347635, 3.09648816,
       4.16719131, 4.14994499, 3.09390438, 4.03503639, 3.7603951 ,
       4.49211408, 2.69030036, 1.84432528, 5.7595965 , 5.75011669,
       2.59299853, 1.99806239, 3.16683734, 5.40511666, 2.33422325,
       3.98237992, 2.63716069, 3.08783297, 2.81769255, 4.81379465,
       2.25026695, 2.43873744, 3.10066662, 4.12974402, 4.47053858,
       3.24793352, 2.28662038, 2.35850716, 3.73157758, 3.16819364,
       5.77248894, 2.35406875, 3.41013891, 3.35260298, 2.09703088,
       1.90196082, 5.87752   , 2.12163871, 4.36219971, 2.19861149,
       3.3954847 , 2.36620194, 1.91987029, 2.29150473, 3.28853057,
       3.38335151, 3.54665262, 2.34046354, 1.93805355, 2.10570201,
       2.8647823 , 3.07788872, 3.31856327, 3.85633902, 2.37530854,
       3.58162687, 4.93991617, 4.80343526, 2.06247571, 5.42440304,
       3.07274133, 2.94738254, 1.91154883, 2.94738254, 2.28922917,
       2.79609067, 3.04242284, 1.5246137 , 2.97139526, 5.64210216,
       1.84751037, 3.57311149, 1.80471166, 2.68997097, 4.094869  ,
       2.33208225, 2.07586054, 3.02801407, 2.90429662, 2.94498387,
       3.36207152, 4.73201452, 2.77131832, 4.39715539, 3.69771697,
       2.5022541 , 2.76014142, 2.57145054, 3.09305673, 4.48395754,
       4.01106752, 4.56801743, 4.76138471, 2.74565984, 2.8647823 ,
       3.10892712, 2.70258073, 4.30908727, 3.62047718, 1.73998077,
       3.71646091, 3.01384545, 3.90988081, 5.77248894, 3.27533698,
       4.4754162 , 2.81673203, 2.44735236, 4.06216025, 2.53754321,
       2.34421545, 3.10595175, 2.18628583, 2.45211293, 1.94762194,
       2.74565984, 3.07940979, 2.64198491, 2.65769094, 2.84844517,
       5.63996838, 5.78932861, 4.20061103, 2.28922917, 4.6928115 ,
       2.30828472, 3.30987163, 4.17763705, 2.76829328, 3.6811981 ,
       1.73399418, 2.74208975, 5.15907818, 3.19928674, 2.86081081,
       2.15749238, 3.0693235 , 3.0529984 , 6.07172079, 3.39088905,
       4.00798884, 3.73297978, 4.37027911, 2.99536334, 2.09703088,
       3.21580346, 3.23940796, 2.12195745, 3.3954847 , 2.85612515,
       1.99806239, 3.11934427, 1.7319769 , 4.96083875, 1.60069586,
       2.01280414, 1.79910779, 3.23642435, 3.59944481, 3.11186286,
       2.54019806, 3.07274133, 4.18387977, 3.27675105, 2.90583092,
       4.25455849, 3.68171193, 4.30233474, 3.04326422, 3.54817206,
       3.1360286 , 3.0551991 , 1.69541738, 4.15505812, 2.22295888,
       3.02949001, 2.42432868, 3.55613242, 2.56410218, 4.00798884,
       2.32747275, 2.95716276, 2.42745698, 3.14624668, 3.67333406,
       2.50482301, 2.68997097, 2.49842407, 4.91045928, 2.28662038,
       4.10003098, 3.5634741 , 2.83478421])
from sklearn import metrics
metrics.mean_squared_error(model.predict(X_test), Y_test)
0.30372099459200635
model.coef_
array([-1.44087637e-02, -5.88458915e-01, -3.69944601e-01, -7.10786377e-01,
       -1.40959934e-01, -9.55124367e-01, -7.43602416e-01, -8.44907451e-01,
       -7.22158788e-01,  7.31838479e-03,  4.11108334e-01, -5.53607152e-01,
        1.27018675e+00,  2.10620937e+00,  1.06421863e+00,  7.70508539e-01,
       -1.43821100e+00, -7.65648863e-01, -7.25699895e-01, -3.85104209e-01,
       -3.34462568e-01, -3.18062598e-01, -3.43394946e-02,  2.77555756e-16,
       -1.07187887e-02,  1.28991791e-01,  6.57366427e-01, -5.82059811e-01,
        4.07375703e-01,  4.13416185e-01,  7.77156117e-16,  1.23347953e+00,
        1.75367759e+00,  5.38855506e-01,  4.30840975e-01, -1.52068518e+00,
        5.50988700e-01, -4.01476142e-02, -1.82192128e-01,  5.27502507e-02,
       -6.74488194e-02, -2.57261956e-02, -8.27544267e-03, -3.35368041e-01,
       -3.54450446e-01])