第五届工业大数据创新竞赛代码

148 阅读4分钟

本文已参与「新人创作礼」活动,一起开启掘金创作之路

import numpy as np
import pandas as pd
from sklearn.linear_model import ElasticNetCV, LassoCV, RidgeCV
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.kernel_ridge import KernelRidge
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import RobustScaler
from sklearn.model_selection import KFold, cross_val_score
from sklearn.metrics import mean_squared_error
from mlxtend.regressor import StackingCVRegressor
import xgboost as xgb
import lightgbm as lgb
import zipfile
import matplotlib.pyplot as plt
from datetime import date
import holidays
%matplotlib inline
import warnings
warnings.filterwarnings('ignore')
from prophet import Prophet
import pystan
import seaborn as sns # 为了更好看的图
import datetime
import math

In [2]:

sales = pd.read_csv('train_matrl_id_info.csv')
sales

Out[2]:

matrl_idsale_numssale_time
07941.020180104
17942.020180106
27942.020180119
37942.020180124
47944.020180126
............
1325746555.020200626
1325756551.020200627
1325766555.020200628
1325776553.020200629
1325786552.020200630

132579 rows × 3 columns

In [3]:

# sale_time改为字符串格式
sales.sale_time=sales.sale_time.astype("str")

In [4]:

# 格式化日期列
sales.sale_time=sales.sale_time.apply(lambda x:datetime.datetime.strptime(x, '%Y%m%d'))
# 检查
print(sales.info())
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 132579 entries, 0 to 132578
Data columns (total 3 columns):
 #   Column     Non-Null Count   Dtype         
---  ------     --------------   -----         
 0   matrl_id   132579 non-null  int64         
 1   sale_nums  132579 non-null  float64       
 2   sale_time  132579 non-null  datetime64[ns]
dtypes: datetime64[ns](1), float64(1), int64(1)
memory usage: 3.0 MB
None

In [5]:

#sales格式化
sales=sales.loc[:,["matrl_id","sale_time","sale_nums"]]
sales.columns=["matrl_id","ds","y"]

In [6]:

sales

Out[6]:

matrl_iddsy
07942018-01-041.0
17942018-01-062.0
27942018-01-192.0
37942018-01-242.0
47942018-01-264.0
............
1325746552020-06-265.0
1325756552020-06-271.0
1325766552020-06-285.0
1325776552020-06-293.0
1325786552020-06-302.0

132579 rows × 3 columns

In [7]:

# dataframe分为多个小文件

In [8]:

classinformation=sales["matrl_id"].unique()
classinformation.shape

Out[8]:

(1200,)

In [9]:

classinformation

Out[9]:

array([794,  77, 769, ...,  85, 294, 655], dtype=int64)

In [10]:

for temp_classinformation in classinformation:
    temp_data=sales[sales["matrl_id"].isin([temp_classinformation])]
    exec("sales%s=temp_data"%temp_classinformation)

In [11]:

# fbprophet模型

In [12]:

res = pd.DataFrame(columns=('matrl_id', 'month', 'nums'))
res

Out[12]:

matrl_idmonthnums

In [13]:

qes = pd.DataFrame(columns=('matrl_id', 'month', 'nums'))
qes
for i in range(1,1201):
    #格式化sales的小文件
    u=eval("sales%s"%i)
    u=u.loc[:,["ds","y"]]
    #创建prophet对象
    m = Prophet(
        holidays_prior_scale=10,#控制适应假日效果的灵活性
        changepoint_prior_scale=0.05, 
    seasonality_mode='multiplicative',
    seasonality_prior_scale=10 #该参数控制季节性的灵活性
    )
    #拟合(实例化)prophet对象
    m.fit(u)
    future = m.make_future_dataframe(periods=130, freq='D')#预测时长
    forecast = m.predict(future)
    #添加7月
    forecastqiyue=forecast[(forecast["ds"]>="2020-07-01")&(forecast["ds"]<="2020-07-31")]
    qiyue=forecastqiyue["yhat"].sum()
    res = res.append([{'matrl_id':i,'month':202007,'nums':math.ceil(qiyue)}], ignore_index=True)
    qes = qes.append([{'matrl_id':i,'month':202007,'nums':qiyue}], ignore_index=True)
    #添加8月
    forecastbayue=forecast[(forecast["ds"]>="2020-08-01")&(forecast["ds"]<="2020-08-31")]
    bayue=forecastbayue["yhat"].sum()
    res = res.append([{'matrl_id':i,'month':202008,'nums':math.ceil(bayue)}], ignore_index=True)
    qes = qes.append([{'matrl_id':i,'month':202008,'nums':bayue}], ignore_index=True)
    #添加9月
    forecastjiuyue=forecast[(forecast["ds"]>="2020-09-01")&(forecast["ds"]<="2020-09-30")]
    jiuyue=forecastjiuyue["yhat"].sum()
    res = res.append([{'matrl_id':i,'month':202009,'nums':math.ceil(jiuyue)}], ignore_index=True)
    qes = qes.append([{'matrl_id':i,'month':202009,'nums':jiuyue}], ignore_index=True)
res

Out[15]:

matrl_idmonthnums
012020078
1120200820
212020096
32202007181
42202008182
............
3595119920200812648
359611992020091242
35971200202007791
35981200202008-818
359912002020092034

3600 rows × 3 columns

In [16]:

qes

Out[16]:

matrl_idmonthnums
012020077.131226
1120200819.440604
212020095.973726
32202007180.470566
42202008181.888941
............
3595119920200812647.735691
359611992020091241.694597
35971200202007790.090384
35981200202008-818.394460
359912002020092033.712995

3600 rows × 3 columns

In [17]:

res.to_csv("prophet8整数.csv")
qes.to_csv("prophet8小数.csv")