本文已参与「新人创作礼」活动,一起开启掘金创作之路
import numpy as np
import pandas as pd
from sklearn.linear_model import ElasticNetCV, LassoCV, RidgeCV
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.kernel_ridge import KernelRidge
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import RobustScaler
from sklearn.model_selection import KFold, cross_val_score
from sklearn.metrics import mean_squared_error
from mlxtend.regressor import StackingCVRegressor
import xgboost as xgb
import lightgbm as lgb
import zipfile
import matplotlib.pyplot as plt
from datetime import date
import holidays
%matplotlib inline
import warnings
warnings.filterwarnings('ignore')
from prophet import Prophet
import pystan
import seaborn as sns # 为了更好看的图
import datetime
import math
In [2]:
sales = pd.read_csv('train_matrl_id_info.csv')
sales
Out[2]:
| matrl_id | sale_nums | sale_time | |
|---|---|---|---|
| 0 | 794 | 1.0 | 20180104 |
| 1 | 794 | 2.0 | 20180106 |
| 2 | 794 | 2.0 | 20180119 |
| 3 | 794 | 2.0 | 20180124 |
| 4 | 794 | 4.0 | 20180126 |
| ... | ... | ... | ... |
| 132574 | 655 | 5.0 | 20200626 |
| 132575 | 655 | 1.0 | 20200627 |
| 132576 | 655 | 5.0 | 20200628 |
| 132577 | 655 | 3.0 | 20200629 |
| 132578 | 655 | 2.0 | 20200630 |
132579 rows × 3 columns
In [3]:
# sale_time改为字符串格式
sales.sale_time=sales.sale_time.astype("str")
In [4]:
# 格式化日期列
sales.sale_time=sales.sale_time.apply(lambda x:datetime.datetime.strptime(x, '%Y%m%d'))
# 检查
print(sales.info())
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 132579 entries, 0 to 132578
Data columns (total 3 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 matrl_id 132579 non-null int64
1 sale_nums 132579 non-null float64
2 sale_time 132579 non-null datetime64[ns]
dtypes: datetime64[ns](1), float64(1), int64(1)
memory usage: 3.0 MB
None
In [5]:
#sales格式化
sales=sales.loc[:,["matrl_id","sale_time","sale_nums"]]
sales.columns=["matrl_id","ds","y"]
In [6]:
sales
Out[6]:
| matrl_id | ds | y | |
|---|---|---|---|
| 0 | 794 | 2018-01-04 | 1.0 |
| 1 | 794 | 2018-01-06 | 2.0 |
| 2 | 794 | 2018-01-19 | 2.0 |
| 3 | 794 | 2018-01-24 | 2.0 |
| 4 | 794 | 2018-01-26 | 4.0 |
| ... | ... | ... | ... |
| 132574 | 655 | 2020-06-26 | 5.0 |
| 132575 | 655 | 2020-06-27 | 1.0 |
| 132576 | 655 | 2020-06-28 | 5.0 |
| 132577 | 655 | 2020-06-29 | 3.0 |
| 132578 | 655 | 2020-06-30 | 2.0 |
132579 rows × 3 columns
In [7]:
# dataframe分为多个小文件
In [8]:
classinformation=sales["matrl_id"].unique()
classinformation.shape
Out[8]:
(1200,)
In [9]:
classinformation
Out[9]:
array([794, 77, 769, ..., 85, 294, 655], dtype=int64)
In [10]:
for temp_classinformation in classinformation:
temp_data=sales[sales["matrl_id"].isin([temp_classinformation])]
exec("sales%s=temp_data"%temp_classinformation)
In [11]:
# fbprophet模型
In [12]:
res = pd.DataFrame(columns=('matrl_id', 'month', 'nums'))
res
Out[12]:
| matrl_id | month | nums |
|---|
In [13]:
qes = pd.DataFrame(columns=('matrl_id', 'month', 'nums'))
qes
for i in range(1,1201):
#格式化sales的小文件
u=eval("sales%s"%i)
u=u.loc[:,["ds","y"]]
#创建prophet对象
m = Prophet(
holidays_prior_scale=10,#控制适应假日效果的灵活性
changepoint_prior_scale=0.05,
seasonality_mode='multiplicative',
seasonality_prior_scale=10 #该参数控制季节性的灵活性
)
#拟合(实例化)prophet对象
m.fit(u)
future = m.make_future_dataframe(periods=130, freq='D')#预测时长
forecast = m.predict(future)
#添加7月
forecastqiyue=forecast[(forecast["ds"]>="2020-07-01")&(forecast["ds"]<="2020-07-31")]
qiyue=forecastqiyue["yhat"].sum()
res = res.append([{'matrl_id':i,'month':202007,'nums':math.ceil(qiyue)}], ignore_index=True)
qes = qes.append([{'matrl_id':i,'month':202007,'nums':qiyue}], ignore_index=True)
#添加8月
forecastbayue=forecast[(forecast["ds"]>="2020-08-01")&(forecast["ds"]<="2020-08-31")]
bayue=forecastbayue["yhat"].sum()
res = res.append([{'matrl_id':i,'month':202008,'nums':math.ceil(bayue)}], ignore_index=True)
qes = qes.append([{'matrl_id':i,'month':202008,'nums':bayue}], ignore_index=True)
#添加9月
forecastjiuyue=forecast[(forecast["ds"]>="2020-09-01")&(forecast["ds"]<="2020-09-30")]
jiuyue=forecastjiuyue["yhat"].sum()
res = res.append([{'matrl_id':i,'month':202009,'nums':math.ceil(jiuyue)}], ignore_index=True)
qes = qes.append([{'matrl_id':i,'month':202009,'nums':jiuyue}], ignore_index=True)
res
Out[15]:
| matrl_id | month | nums | |
|---|---|---|---|
| 0 | 1 | 202007 | 8 |
| 1 | 1 | 202008 | 20 |
| 2 | 1 | 202009 | 6 |
| 3 | 2 | 202007 | 181 |
| 4 | 2 | 202008 | 182 |
| ... | ... | ... | ... |
| 3595 | 1199 | 202008 | 12648 |
| 3596 | 1199 | 202009 | 1242 |
| 3597 | 1200 | 202007 | 791 |
| 3598 | 1200 | 202008 | -818 |
| 3599 | 1200 | 202009 | 2034 |
3600 rows × 3 columns
In [16]:
qes
Out[16]:
| matrl_id | month | nums | |
|---|---|---|---|
| 0 | 1 | 202007 | 7.131226 |
| 1 | 1 | 202008 | 19.440604 |
| 2 | 1 | 202009 | 5.973726 |
| 3 | 2 | 202007 | 180.470566 |
| 4 | 2 | 202008 | 181.888941 |
| ... | ... | ... | ... |
| 3595 | 1199 | 202008 | 12647.735691 |
| 3596 | 1199 | 202009 | 1241.694597 |
| 3597 | 1200 | 202007 | 790.090384 |
| 3598 | 1200 | 202008 | -818.394460 |
| 3599 | 1200 | 202009 | 2033.712995 |
3600 rows × 3 columns
In [17]:
res.to_csv("prophet8整数.csv")
qes.to_csv("prophet8小数.csv")