第五届工业大数据创新竞赛代码第五届工业大数据创新竞赛代码，使用fbprophet时间序列算法，对重型装备配件需求预测赛题

本文已参与「新人创作礼」活动，一起开启掘金创作之路

import numpy as np
import pandas as pd
from sklearn.linear_model import ElasticNetCV, LassoCV, RidgeCV
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.kernel_ridge import KernelRidge
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import RobustScaler
from sklearn.model_selection import KFold, cross_val_score
from sklearn.metrics import mean_squared_error
from mlxtend.regressor import StackingCVRegressor
import xgboost as xgb
import lightgbm as lgb
import zipfile
import matplotlib.pyplot as plt
from datetime import date
import holidays
%matplotlib inline
import warnings
warnings.filterwarnings('ignore')
from prophet import Prophet
import pystan
import seaborn as sns # 为了更好看的图
import datetime
import math

In [2]:

sales = pd.read_csv('train_matrl_id_info.csv')
sales

Out[2]:

	matrl_id	sale_nums	sale_time
0	794	1.0	20180104
1	794	2.0	20180106
2	794	2.0	20180119
3	794	2.0	20180124
4	794	4.0	20180126
...	...	...	...
132574	655	5.0	20200626
132575	655	1.0	20200627
132576	655	5.0	20200628
132577	655	3.0	20200629
132578	655	2.0	20200630

132579 rows × 3 columns

In [3]:

# sale_time改为字符串格式
sales.sale_time=sales.sale_time.astype("str")

In [4]:

# 格式化日期列
sales.sale_time=sales.sale_time.apply(lambda x:datetime.datetime.strptime(x, '%Y%m%d'))
# 检查
print(sales.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 132579 entries, 0 to 132578
Data columns (total 3 columns):
 #   Column     Non-Null Count   Dtype         
---  ------     --------------   -----         
 0   matrl_id   132579 non-null  int64         
 1   sale_nums  132579 non-null  float64       
 2   sale_time  132579 non-null  datetime64[ns]
dtypes: datetime64[ns](1), float64(1), int64(1)
memory usage: 3.0 MB
None

In [5]:

#sales格式化
sales=sales.loc[:,["matrl_id","sale_time","sale_nums"]]
sales.columns=["matrl_id","ds","y"]

In [6]:

sales

Out[6]:

	matrl_id	ds	y
0	794	2018-01-04	1.0
1	794	2018-01-06	2.0
2	794	2018-01-19	2.0
3	794	2018-01-24	2.0
4	794	2018-01-26	4.0
...	...	...	...
132574	655	2020-06-26	5.0
132575	655	2020-06-27	1.0
132576	655	2020-06-28	5.0
132577	655	2020-06-29	3.0
132578	655	2020-06-30	2.0

132579 rows × 3 columns

In [7]:

# dataframe分为多个小文件

In [8]:

classinformation=sales["matrl_id"].unique()
classinformation.shape

Out[8]:

(1200,)

In [9]:

classinformation

Out[9]:

array([794,  77, 769, ...,  85, 294, 655], dtype=int64)

In [10]:

for temp_classinformation in classinformation:
    temp_data=sales[sales["matrl_id"].isin([temp_classinformation])]
    exec("sales%s=temp_data"%temp_classinformation)

In [11]:

# fbprophet模型

In [12]:

res = pd.DataFrame(columns=('matrl_id', 'month', 'nums'))
res

Out[12]:

	matrl_id	month	nums

In [13]:

qes = pd.DataFrame(columns=('matrl_id', 'month', 'nums'))
qes

for i in range(1,1201):
    #格式化sales的小文件
    u=eval("sales%s"%i)
    u=u.loc[:,["ds","y"]]
    #创建prophet对象
    m = Prophet(
        holidays_prior_scale=10,#控制适应假日效果的灵活性
        changepoint_prior_scale=0.05, 
    seasonality_mode='multiplicative',
    seasonality_prior_scale=10 #该参数控制季节性的灵活性
    )
    #拟合（实例化）prophet对象
    m.fit(u)
    future = m.make_future_dataframe(periods=130, freq='D')#预测时长
    forecast = m.predict(future)
    #添加7月
    forecastqiyue=forecast[(forecast["ds"]>="2020-07-01")&(forecast["ds"]<="2020-07-31")]
    qiyue=forecastqiyue["yhat"].sum()
    res = res.append([{'matrl_id':i,'month':202007,'nums':math.ceil(qiyue)}], ignore_index=True)
    qes = qes.append([{'matrl_id':i,'month':202007,'nums':qiyue}], ignore_index=True)
    #添加8月
    forecastbayue=forecast[(forecast["ds"]>="2020-08-01")&(forecast["ds"]<="2020-08-31")]
    bayue=forecastbayue["yhat"].sum()
    res = res.append([{'matrl_id':i,'month':202008,'nums':math.ceil(bayue)}], ignore_index=True)
    qes = qes.append([{'matrl_id':i,'month':202008,'nums':bayue}], ignore_index=True)
    #添加9月
    forecastjiuyue=forecast[(forecast["ds"]>="2020-09-01")&(forecast["ds"]<="2020-09-30")]
    jiuyue=forecastjiuyue["yhat"].sum()
    res = res.append([{'matrl_id':i,'month':202009,'nums':math.ceil(jiuyue)}], ignore_index=True)
    qes = qes.append([{'matrl_id':i,'month':202009,'nums':jiuyue}], ignore_index=True)

res

Out[15]:

	matrl_id	month	nums
0	1	202007	8
1	1	202008	20
2	1	202009	6
3	2	202007	181
4	2	202008	182
...	...	...	...
3595	1199	202008	12648
3596	1199	202009	1242
3597	1200	202007	791
3598	1200	202008	-818
3599	1200	202009	2034

3600 rows × 3 columns

In [16]:

qes

Out[16]:

	matrl_id	month	nums
0	1	202007	7.131226
1	1	202008	19.440604
2	1	202009	5.973726
3	2	202007	180.470566
4	2	202008	181.888941
...	...	...	...
3595	1199	202008	12647.735691
3596	1199	202009	1241.694597
3597	1200	202007	790.090384
3598	1200	202008	-818.394460
3599	1200	202009	2033.712995

3600 rows × 3 columns

In [17]:

res.to_csv("prophet8整数.csv")
qes.to_csv("prophet8小数.csv")