sns绘图

210 阅读2分钟

本文已参与「新人创作礼」活动,一起开启掘金创作之路

import numpy as np
import pandas as pd
from sklearn.linear_model import ElasticNetCV, LassoCV, RidgeCV
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.kernel_ridge import KernelRidge
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import RobustScaler
from sklearn.model_selection import KFold, cross_val_score
from sklearn.metrics import mean_squared_error
from mlxtend.regressor import StackingCVRegressor
import xgboost as xgb
import lightgbm as lgb
import warnings
warnings.filterwarnings('ignore')
import pandas as pd
import numpy as np
import zipfile
from sklearn.model_selection import KFold
import lightgbm as lgb
import seaborn as sns
import matplotlib.pyplot as plt

In [3]:

df=pd.read_excel('去除设定恒定共线性使用标准化.xlsx')

In [4]:

df_train=pd.read_excel('训练验证集.xlsx')

In [5]:

df_test=pd.read_excel('测试集.xlsx')

In [6]:

y_train=df_train.iloc[:,-2:]
x_train=df_train.iloc[:,:-2]
y_test=df_test.iloc[:,-2:]
x_test=df_test.iloc[:,:-2]
y=df.iloc[:,-2:]
x=df.iloc[:,:-2]

In [7]:

# 转换成矩阵
x_train = np.array(x_train)
y_train = np.array(y_train)
x_test = np.array(x_test)
y_test = np.array(y_test)

特征两两相关性分析

In [9]:

plt.figure(figsize=(10,10))
sns.heatmap(df.corr(),annot=True,fmt=".1f",square=True)
plt.show()

image.png

plt.figure(figsize=(12,12))
sns.heatmap(df.corr(),vmax=.3,center=0,
            square=True,
           linewidths=.5,
            cbar_kws={"shrink":.5},
            annot=True,
            fmt=".1f")
plt.tight_layout()
plt.show()

image.png

sns.pairplot(df)
plt.show()

image.png

df

Out[16]:

挤出机1区(℃)实际挤出机2区(℃)实际挤出机3区(℃)实际挤出机3区(℃)偏差挤出机4区(℃)实际挤出机5区(℃)实际挤出机6区(℃)实际过渡区1区℃实际过渡区2区℃实际过渡区3区℃实际...收卷机参数频率实际 (Hz)收卷机参数电流实际 (A)热风机参数频率设定 (Hz)热风机参数频率实际 (Hz)热风机参数电流实际 (A)抽吸风机参数频率设定 (Hz)抽吸风机参数频率实际 (Hz)抽吸风机参数电流实际 (A)PFE过滤效率阻力/pa
0-0.676828-0.874057-0.785047-0.099374-0.619479-0.640760-0.7198190.7118960.355214-0.387147...0.2279620.2254640.0240590.0239531.2129710.0887670.0889560.98338995.78888924.133333
10.002809-0.521390-0.863245-0.338083-0.652002-0.894566-0.699741-1.2427280.5395880.819483...0.2279620.2254640.0240590.0239530.9244340.0887670.088956-0.37916294.70000023.633333
2-0.903373-0.815279-0.824146-0.218728-0.782096-0.621237-0.7198190.467568-0.382285-0.588252...0.2279620.2254640.0240590.0239530.6358970.0887670.0889560.30211395.12222223.833333
30.002809-0.521390-0.902343-0.457438-0.749573-0.796949-0.8603650.223240-0.7510340.216168...0.2279620.2254640.0240590.0239531.2129710.0887670.0889561.66466495.01111124.511111
4-0.450282-0.815279-0.902343-0.457438-0.814620-0.835996-0.639507-0.9984000.5395880.618378...0.2279620.2254640.0240590.0239530.3473600.0887670.088956-1.06043895.81111123.633333
..................................................................
2011-0.223736-0.1099461.150337-0.1590510.9741721.1944501.227744-0.021088-0.5666590.216168...0.2279620.2254640.7169510.717110-1.0953230.5059090.506016-1.74171394.54111122.900000
2012-0.450282-0.2275021.150337-0.1590510.9741721.1749261.227744-0.021088-0.4744721.020588...0.2279620.2254640.7169510.717110-1.0953230.5059090.506016-1.06043894.10888921.300000
2013-0.450282-0.2275021.111238-0.2784060.9416491.1944501.167510-0.265416-0.382285-0.186042...0.2279620.2254640.7169510.717110-0.8067870.5059090.506016-1.06043895.56333322.700000
2014-0.450282-0.1099461.150337-0.1590510.9741721.1944501.2478220.223240-0.4744720.819483...0.2279620.2254640.7169510.717110-0.8067870.5059090.506016-1.06043894.92444422.377778
2015-0.676828-0.1099461.130787-0.2187280.9741721.1554031.187588-0.754072-1.119783-0.186042...0.2279620.2254640.7169510.717110-0.8067870.5059090.506016-0.37916294.67555621.811111

2016 rows × 50 columns

In [17]:

sns.distplot(df["挤出机1区(℃)实际"])
plt.show()

image.png

df["挤出机1区(℃)实际"].max()

Out[18]:

4.533722163185907

In [21]:

df["挤出机1区(℃)实际"].unique()

Out[21]:

array([-6.76827577e-01,  2.80934574e-03, -9.03373218e-01, -4.50281936e-01,
       -2.23736295e-01,  2.29354987e-01,  4.55900627e-01,  6.82446268e-01,
        9.08991909e-01, -1.35646450e+00, -1.12991886e+00, -1.58301014e+00,
       -2.26264706e+00, -2.71573834e+00, -2.48919270e+00,  1.13553755e+00,
        1.81517447e+00,  2.04172011e+00,  2.26826575e+00,  2.72135704e+00,
        1.58862883e+00, -1.80955578e+00, -2.94228399e+00, -2.03610142e+00,
        1.36208319e+00,  2.49481140e+00,  2.94790268e+00,  3.17444832e+00,
       -3.39537527e+00, -3.84846655e+00, -4.07501219e+00, -4.52810347e+00,
       -3.62192091e+00, -4.30155783e+00, -4.98119475e+00, -5.43428604e+00,
       -5.66083168e+00, -5.20774039e+00, -4.75464911e+00, -3.16882963e+00,
        3.40099396e+00,  3.62753960e+00,  4.08063088e+00,  4.30717652e+00,
        4.53372216e+00,  3.85408524e+00])

In [23]:

df["挤出机1区(℃)实际"].value_counts()

Out[23]:

 0.002809    340
 0.229355    339
-0.223736    270
 0.455901    220
-0.450282    193
-0.676828    118
 0.682446    107
-0.903373     75
 0.908992     48
-1.129919     28
 1.135538     28
-1.356464     26
 1.362083     25
 1.588629     20
-1.583010     20
 1.815174     17
 2.947903     15
-2.262647     13
 2.268266     12
-2.715738     10
 2.041720      9
-2.489193      8
-2.036101      7
 2.494811      7
-1.809556      6
 3.174448      6
 2.721357      5
-4.075012      5
 4.307177      5
-3.848467      4
 3.627540      4
-2.942284      4
-5.434286      3
-3.395375      3
-5.207740      2
-4.981195      2
-4.301558      2
 3.854085      2
-3.621921      1
-5.660832      1
 4.080631      1
-4.754649      1
-3.168830      1
 4.533722      1
 3.400994      1
-4.528103      1
Name: 挤出机1区(℃)实际, dtype: int64

In [24]:

sns.countplot(x="PFE过滤效率",data=df,palette="bwr")

image.png