本文已参与「新人创作礼」活动,一起开启掘金创作之路
import numpy as np
import pandas as pd
from sklearn.linear_model import ElasticNetCV, LassoCV, RidgeCV
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.kernel_ridge import KernelRidge
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import RobustScaler
from sklearn.model_selection import KFold, cross_val_score
from sklearn.metrics import mean_squared_error
from mlxtend.regressor import StackingCVRegressor
import xgboost as xgb
import lightgbm as lgb
import warnings
warnings.filterwarnings('ignore')
import pandas as pd
import numpy as np
import zipfile
from sklearn.model_selection import KFold
import lightgbm as lgb
import seaborn as sns
import matplotlib.pyplot as plt
In [3]:
df=pd.read_excel('去除设定恒定共线性使用标准化.xlsx')
In [4]:
df_train=pd.read_excel('训练验证集.xlsx')
In [5]:
df_test=pd.read_excel('测试集.xlsx')
In [6]:
y_train=df_train.iloc[:,-2:]
x_train=df_train.iloc[:,:-2]
y_test=df_test.iloc[:,-2:]
x_test=df_test.iloc[:,:-2]
y=df.iloc[:,-2:]
x=df.iloc[:,:-2]
In [7]:
# 转换成矩阵
x_train = np.array(x_train)
y_train = np.array(y_train)
x_test = np.array(x_test)
y_test = np.array(y_test)
特征两两相关性分析
In [9]:
plt.figure(figsize=(10,10))
sns.heatmap(df.corr(),annot=True,fmt=".1f",square=True)
plt.show()
plt.figure(figsize=(12,12))
sns.heatmap(df.corr(),vmax=.3,center=0,
square=True,
linewidths=.5,
cbar_kws={"shrink":.5},
annot=True,
fmt=".1f")
plt.tight_layout()
plt.show()
sns.pairplot(df)
plt.show()
df
Out[16]:
| 挤出机1区(℃)实际 | 挤出机2区(℃)实际 | 挤出机3区(℃)实际 | 挤出机3区(℃)偏差 | 挤出机4区(℃)实际 | 挤出机5区(℃)实际 | 挤出机6区(℃)实际 | 过渡区1区℃实际 | 过渡区2区℃实际 | 过渡区3区℃实际 | ... | 收卷机参数频率实际 (Hz) | 收卷机参数电流实际 (A) | 热风机参数频率设定 (Hz) | 热风机参数频率实际 (Hz) | 热风机参数电流实际 (A) | 抽吸风机参数频率设定 (Hz) | 抽吸风机参数频率实际 (Hz) | 抽吸风机参数电流实际 (A) | PFE过滤效率 | 阻力/pa | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | -0.676828 | -0.874057 | -0.785047 | -0.099374 | -0.619479 | -0.640760 | -0.719819 | 0.711896 | 0.355214 | -0.387147 | ... | 0.227962 | 0.225464 | 0.024059 | 0.023953 | 1.212971 | 0.088767 | 0.088956 | 0.983389 | 95.788889 | 24.133333 |
| 1 | 0.002809 | -0.521390 | -0.863245 | -0.338083 | -0.652002 | -0.894566 | -0.699741 | -1.242728 | 0.539588 | 0.819483 | ... | 0.227962 | 0.225464 | 0.024059 | 0.023953 | 0.924434 | 0.088767 | 0.088956 | -0.379162 | 94.700000 | 23.633333 |
| 2 | -0.903373 | -0.815279 | -0.824146 | -0.218728 | -0.782096 | -0.621237 | -0.719819 | 0.467568 | -0.382285 | -0.588252 | ... | 0.227962 | 0.225464 | 0.024059 | 0.023953 | 0.635897 | 0.088767 | 0.088956 | 0.302113 | 95.122222 | 23.833333 |
| 3 | 0.002809 | -0.521390 | -0.902343 | -0.457438 | -0.749573 | -0.796949 | -0.860365 | 0.223240 | -0.751034 | 0.216168 | ... | 0.227962 | 0.225464 | 0.024059 | 0.023953 | 1.212971 | 0.088767 | 0.088956 | 1.664664 | 95.011111 | 24.511111 |
| 4 | -0.450282 | -0.815279 | -0.902343 | -0.457438 | -0.814620 | -0.835996 | -0.639507 | -0.998400 | 0.539588 | 0.618378 | ... | 0.227962 | 0.225464 | 0.024059 | 0.023953 | 0.347360 | 0.088767 | 0.088956 | -1.060438 | 95.811111 | 23.633333 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2011 | -0.223736 | -0.109946 | 1.150337 | -0.159051 | 0.974172 | 1.194450 | 1.227744 | -0.021088 | -0.566659 | 0.216168 | ... | 0.227962 | 0.225464 | 0.716951 | 0.717110 | -1.095323 | 0.505909 | 0.506016 | -1.741713 | 94.541111 | 22.900000 |
| 2012 | -0.450282 | -0.227502 | 1.150337 | -0.159051 | 0.974172 | 1.174926 | 1.227744 | -0.021088 | -0.474472 | 1.020588 | ... | 0.227962 | 0.225464 | 0.716951 | 0.717110 | -1.095323 | 0.505909 | 0.506016 | -1.060438 | 94.108889 | 21.300000 |
| 2013 | -0.450282 | -0.227502 | 1.111238 | -0.278406 | 0.941649 | 1.194450 | 1.167510 | -0.265416 | -0.382285 | -0.186042 | ... | 0.227962 | 0.225464 | 0.716951 | 0.717110 | -0.806787 | 0.505909 | 0.506016 | -1.060438 | 95.563333 | 22.700000 |
| 2014 | -0.450282 | -0.109946 | 1.150337 | -0.159051 | 0.974172 | 1.194450 | 1.247822 | 0.223240 | -0.474472 | 0.819483 | ... | 0.227962 | 0.225464 | 0.716951 | 0.717110 | -0.806787 | 0.505909 | 0.506016 | -1.060438 | 94.924444 | 22.377778 |
| 2015 | -0.676828 | -0.109946 | 1.130787 | -0.218728 | 0.974172 | 1.155403 | 1.187588 | -0.754072 | -1.119783 | -0.186042 | ... | 0.227962 | 0.225464 | 0.716951 | 0.717110 | -0.806787 | 0.505909 | 0.506016 | -0.379162 | 94.675556 | 21.811111 |
2016 rows × 50 columns
In [17]:
sns.distplot(df["挤出机1区(℃)实际"])
plt.show()
df["挤出机1区(℃)实际"].max()
Out[18]:
4.533722163185907
In [21]:
df["挤出机1区(℃)实际"].unique()
Out[21]:
array([-6.76827577e-01, 2.80934574e-03, -9.03373218e-01, -4.50281936e-01,
-2.23736295e-01, 2.29354987e-01, 4.55900627e-01, 6.82446268e-01,
9.08991909e-01, -1.35646450e+00, -1.12991886e+00, -1.58301014e+00,
-2.26264706e+00, -2.71573834e+00, -2.48919270e+00, 1.13553755e+00,
1.81517447e+00, 2.04172011e+00, 2.26826575e+00, 2.72135704e+00,
1.58862883e+00, -1.80955578e+00, -2.94228399e+00, -2.03610142e+00,
1.36208319e+00, 2.49481140e+00, 2.94790268e+00, 3.17444832e+00,
-3.39537527e+00, -3.84846655e+00, -4.07501219e+00, -4.52810347e+00,
-3.62192091e+00, -4.30155783e+00, -4.98119475e+00, -5.43428604e+00,
-5.66083168e+00, -5.20774039e+00, -4.75464911e+00, -3.16882963e+00,
3.40099396e+00, 3.62753960e+00, 4.08063088e+00, 4.30717652e+00,
4.53372216e+00, 3.85408524e+00])
In [23]:
df["挤出机1区(℃)实际"].value_counts()
Out[23]:
0.002809 340
0.229355 339
-0.223736 270
0.455901 220
-0.450282 193
-0.676828 118
0.682446 107
-0.903373 75
0.908992 48
-1.129919 28
1.135538 28
-1.356464 26
1.362083 25
1.588629 20
-1.583010 20
1.815174 17
2.947903 15
-2.262647 13
2.268266 12
-2.715738 10
2.041720 9
-2.489193 8
-2.036101 7
2.494811 7
-1.809556 6
3.174448 6
2.721357 5
-4.075012 5
4.307177 5
-3.848467 4
3.627540 4
-2.942284 4
-5.434286 3
-3.395375 3
-5.207740 2
-4.981195 2
-4.301558 2
3.854085 2
-3.621921 1
-5.660832 1
4.080631 1
-4.754649 1
-3.168830 1
4.533722 1
3.400994 1
-4.528103 1
Name: 挤出机1区(℃)实际, dtype: int64
In [24]:
sns.countplot(x="PFE过滤效率",data=df,palette="bwr")