sklearn 多项式回归github地址https://github.com/yangjinghit/tensorfl

github地址 github.com/yangjinghit…

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

/anaconda3/envs/py35/lib/python3.5/importlib/_bootstrap.py:222: RuntimeWarning: numpy.dtype size changed, may indicate binary incompatibility. Expected 96, got 88
  return f(*args, **kwds)

from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

plt.style.use('ggplot')

data = pd.read_csv('Advertising.csv')

data.head()

	Unnamed: 0	TV	radio	newspaper	sales
0	1	230.1	37.8	69.2	22.1
1	2	44.5	39.3	45.1	10.4
2	3	17.2	45.9	69.3	9.3
3	4	151.5	41.3	58.5	18.5
4	5	180.8	10.8	58.4	12.9

plt.scatter(data.TV, data.sales)

<matplotlib.collections.PathCollection at 0x1a179bb908>

plt.scatter(data.radio, data.sales)

<matplotlib.collections.PathCollection at 0x115dd47b8>

plt.scatter(data.newspaper, data.sales)

<matplotlib.collections.PathCollection at 0x1a17a132e8>

x = data[['TV', 'radio', 'newspaper']]

y = data.sales

x_train, x_test, y_train, y_test = train_test_split(x, y)

len(x_train), len(y_train)

(150, 150)

len(x_test)

model = LinearRegression()

model.fit(x_train, y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)

model.coef_

array([ 0.04466416,  0.19594144, -0.00469486])

for i in zip(x_train.columns, model.coef_):
    print(i)

('TV', 0.04466415613441986)
('radio', 0.1959414384329583)
('newspaper', -0.0046948632484331895)

mean_squared_error(model.predict(x_test), y_test)

3.927556655626268