这一节学习机器学习算法的对比,第一章学习如何将数据多维展示。
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import datetime
from sklearn.neighbors import KNeighborsClassifier
plt.style.use('seaborn')
#设置需要预测的指数
indexcode = '000016.SH'
#设置历史区间
startdate = '20140101'
enddate = '20190123'
#获取历史区间的交易列表
tradelist = list(get_trade_days(startdate, enddate, count=None).strftime('%Y%m%d'))
#设置数据分类标签
label = ['money rate %','net up rate % ','mean of updown %']#资金流向、涨跌比、平均涨幅
In [142]:
date = '20190123'
nextdate = '20190124'
stock = get_index_stocks(indexcode,date)
df = get_price(stock, date, date, '1d', ['quote_rate'], skip_paused = False, fq = 'pre', bar_count = 0, is_panel = 1)['quote_rate'].T.fillna(0)
label3 = round(df.mean()[0],3)
label2 = (len(list(df[df[date]>0][date]))-len(list(df[df[date]<0][date])))/len(list(df[date]))
moneydf = get_money_flow_step(stock,date,date,'1d',['net_flow_rate'],None,is_panel=1)['net_flow_rate'].T.fillna(0)
label1 = round(moneydf.mean()[0],3)
value = get_price(indexcode, date, nextdate, '1d', ['quote_rate'], skip_paused = False, fq = 'pre', bar_count = 0, is_panel = 1)['quote_rate']
dt = pd.DataFrame([label1,label2,label3],index =label,columns=[date]).T
dt['now up']=list(value)[0]
dt['now label'] = dt['now up'].apply(lambda x:1 if x>0 else -1)
dt['next up']=list(value)[1]
dt['label']=dt['next up'].apply(lambda x:1 if x>0 else -1)
dt
Out[142]:
| money rate % | net up rate % | mean of updown % | now up | now label | next up | label | |
|---|---|---|---|---|---|---|---|
| 20190123 | -4.998 | -0.02 | -0.088 | -0.1742 | -1 | 0.6054 | 1 |
In [143]:
dt = pd.DataFrame(columns = label)
for date in tradelist:
stock = get_index_stocks(indexcode,date)
df = get_price(stock, date, date, '1d', ['quote_rate'], skip_paused = False, fq = 'pre', bar_count = 0, is_panel = 1)['quote_rate'].T.fillna(0)
label3 = round(df.mean()[0],3)
label2 = (len(list(df[df[date]>0][date]))-len(list(df[df[date]<0][date])))/len(list(df[date]))
moneydf = get_money_flow_step(stock,date,date,'1d',['net_flow_rate'],None,is_panel=1)['net_flow_rate'].T.fillna(0)
label1 = round(moneydf.mean()[0],3)
dt.loc[date] = [label1,label2,label3]
value = list(get_price(indexcode, startdate, enddate, '1d', ['quote_rate'], skip_paused = False, fq = 'pre', bar_count = 0, is_panel = 1)['quote_rate'])
dt['now up']=value
dt['now label'] = dt['now up'].apply(lambda x:1 if x>0 else -1)
dt['next up']=list(dt['now up'])[1:]+[0]
dt['label']=dt['next up'].apply(lambda x:1 if x>0 else -1)
dt
Out[143]:
| money rate % | net up rate % | mean of updown % | now up | now label | next up | label | |
|---|---|---|---|---|---|---|---|
| 20140102 | -6.977 | -0.44 | -0.455 | -0.8688 | -1 | -1.6035 | -1 |
| 20140103 | -14.628 | -0.58 | -1.197 | -1.6035 | -1 | -1.5062 | -1 |
| 20140106 | -10.977 | -0.62 | -2.111 | -1.5062 | -1 | -0.1504 | -1 |
| 20140107 | -2.380 | -0.32 | 0.090 | -0.1504 | -1 | 0.3097 | 1 |
| 20140108 | -4.171 | -0.04 | -0.175 | 0.3097 | 1 | -0.7557 | -1 |
| 20140109 | -4.399 | -0.62 | -0.917 | -0.7557 | -1 | -0.3732 | -1 |
| 20140110 | -8.866 | -0.22 | -0.622 | -0.3732 | -1 | -0.2439 | -1 |
| 20140113 | -11.486 | -0.02 | -0.250 | -0.2439 | -1 | 0.3126 | 1 |
| 20140114 | -7.618 | 0.32 | 0.199 | 0.3126 | 1 | -0.7187 | -1 |
| 20140115 | -12.046 | -0.52 | -0.620 | -0.7187 | -1 | 0.3080 | 1 |
| 20140116 | -0.555 | 0.04 | 0.262 | 0.3080 | 1 | -1.1669 | -1 |
| 20140117 | -11.065 | -0.64 | -1.425 | -1.1669 | -1 | -0.7338 | -1 |
| 20140120 | -10.974 | -0.64 | -0.878 | -0.7338 | -1 | 0.9586 | 1 |
| 20140121 | -3.121 | 0.80 | 0.927 | 0.9586 | 1 | 2.3827 | 1 |
| 20140122 | 10.358 | 1.00 | 2.710 | 2.3827 | 1 | -0.9966 | -1 |
| 20140123 | -10.280 | -0.72 | -0.755 | -0.9966 | -1 | 0.2024 | 1 |
| 20140124 | 0.982 | 0.48 | 0.592 | 0.2024 | 1 | -1.5445 | -1 |
| 20140127 | -12.541 | -0.72 | -1.411 | -1.5445 | -1 | 0.6044 | 1 |
| 20140128 | -5.032 | 0.34 | 0.366 | 0.6044 | 1 | 0.4495 | 1 |
| 20140129 | -7.350 | 0.08 | 0.095 | 0.4495 | 1 | -1.1472 | -1 |
| 20140130 | -13.810 | -0.76 | -1.215 | -1.1472 | -1 | -0.1355 | -1 |
| 20140207 | -9.088 | -0.16 | 0.115 | -0.1355 | -1 | 2.0667 | 1 |
| 20140210 | 7.470 | 0.96 | 2.693 | 2.0667 | 1 | 1.4934 | 1 |
| 20140211 | 5.662 | 0.72 | 1.398 | 1.4934 | 1 | -0.0864 | -1 |
| 20140212 | -5.996 | -0.24 | -0.027 | -0.0864 | -1 | 0.0664 | 1 |
| 20140213 | -4.087 | -0.30 | -0.516 | 0.0664 | 1 | 0.4073 | 1 |
| 20140214 | -7.007 | 0.36 | 0.796 | 0.4073 | 1 | 0.2520 | 1 |
| 20140217 | -8.357 | 0.38 | 0.597 | 0.2520 | 1 | -1.9221 | -1 |
| 20140218 | -22.680 | -0.84 | -1.890 | -1.9221 | -1 | 1.6476 | 1 |
| 20140219 | 1.759 | 0.78 | 1.418 | 1.6476 | 1 | -0.8646 | -1 |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 20181211 | -5.442 | 0.30 | 0.410 | 0.2894 | 1 | 0.3204 | 1 |
| 20181212 | -2.328 | 0.60 | 0.376 | 0.3204 | 1 | 1.4272 | 1 |
| 20181213 | 4.618 | 0.88 | 1.545 | 1.4272 | 1 | -1.4046 | -1 |
| 20181214 | -9.521 | -0.94 | -1.470 | -1.4046 | -1 | 0.1058 | 1 |
| 20181217 | -5.410 | 0.46 | 0.399 | 0.1058 | 1 | -1.1574 | -1 |
| 20181218 | -9.453 | -0.72 | -1.188 | -1.1574 | -1 | -1.1810 | -1 |
| 20181219 | -12.110 | -0.70 | -1.012 | -1.1810 | -1 | -1.4613 | -1 |
| 20181220 | -8.132 | -0.64 | -1.128 | -1.4613 | -1 | -1.2324 | -1 |
| 20181221 | -7.126 | -0.60 | -1.204 | -1.2324 | -1 | 0.1339 | 1 |
| 20181224 | 0.890 | 0.16 | 0.319 | 0.1339 | 1 | -0.5153 | -1 |
| 20181225 | -1.902 | -0.46 | -0.801 | -0.5153 | -1 | -0.6867 | -1 |
| 20181226 | -2.723 | -0.44 | -0.569 | -0.6867 | -1 | -0.2411 | -1 |
| 20181227 | -4.277 | 0.02 | -0.454 | -0.2411 | -1 | 0.7487 | 1 |
| 20181228 | 1.770 | 0.48 | 0.564 | 0.7487 | 1 | -1.3217 | -1 |
| 20190102 | -5.086 | -0.64 | -1.368 | -1.3217 | -1 | 0.2851 | 1 |
| 20190103 | 0.290 | 0.18 | 0.151 | 0.2851 | 1 | 2.0008 | 1 |
| 20190104 | 7.130 | 0.90 | 2.067 | 2.0008 | 1 | -0.0141 | -1 |
| 20190107 | -4.997 | 0.08 | 0.212 | -0.0141 | -1 | -0.3953 | -1 |
| 20190108 | -2.920 | -0.34 | -0.415 | -0.3953 | -1 | 1.1951 | 1 |
| 20190109 | 5.368 | 0.70 | 1.068 | 1.1951 | 1 | -0.0372 | -1 |
| 20190110 | -4.994 | -0.48 | -0.376 | -0.0372 | -1 | 0.9712 | 1 |
| 20190111 | 0.116 | 0.74 | 0.777 | 0.9712 | 1 | -0.9923 | -1 |
| 20190114 | -6.029 | -0.62 | -0.862 | -0.9923 | -1 | 2.0262 | 1 |
| 20190115 | 6.696 | 0.90 | 1.631 | 2.0262 | 1 | 0.1198 | 1 |
| 20190116 | -3.404 | 0.16 | -0.150 | 0.1198 | 1 | -0.4145 | -1 |
| 20190117 | -2.421 | -0.38 | -0.432 | -0.4145 | -1 | 1.9405 | 1 |
| 20190118 | 4.543 | 0.86 | 1.558 | 1.9405 | 1 | 0.6256 | 1 |
| 20190121 | -0.123 | 0.32 | 0.640 | 0.6256 | 1 | -1.2849 | -1 |
| 20190122 | -10.820 | -0.82 | -1.257 | -1.2849 | -1 | -0.1742 | -1 |
| 20190123 | -4.998 | -0.02 | -0.088 | -0.1742 | -1 | 0.0000 | -1 |
1236 rows × 7 columns
In [206]:
import matplotlib.pyplot as plt
import numpy as np
from mpl_toolkits.mplot3d import Axes3D
updt = dt[dt['label']==1]
downdt = dt[dt['label']==-1]
for l in label:
fig = plt.figure()
axes = fig.add_axes([0.1, 0.1, 1, 0.618])
x1_list=list(updt[l])
y=np.array(x1_list)
x=np.array(range(0,len(x1_list)))
axes.scatter(x,y,c='tomato')
x1_list=list(downdt[l])
y=np.array(x1_list)
x=np.array(range(0,len(x1_list)))
axes.scatter(x,y,c='g')
axes.set_ylabel('value',fontsize=15)
axes.set_title(l,fontsize=20)
from mpl_toolkits.mplot3d import Axes3D
xsup1 = updt[label[0]]
xsup2 = updt[label[1]]
xsup3 = updt[label[2]]
xsdown1 = downdt[label[0]]
xsdown2 = downdt[label[1]]
xsdown3 = downdt[label[2]]
fig = plt.figure()
ax = Axes3D(fig)
ax.scatter(xsup1,xsup2,xsup3,c='tomato')
ax.scatter(xsdown1,xsdown2,xsdown3,c='g')
ax.set_xlabel(label[0],fontsize=12)
ax.set_ylabel(label[1],fontsize=12)
ax.set_zlabel(label[2],fontsize=12)
ax.set_title('Data space 3D',fontsize=20)
plt.show()
In [166]:
label = ['money rate %','net up rate % ','mean of updown %']
label1 = 'money rate %'
label2 = 'net up rate % '
label3 = 'mean of updown %'
#保留近一年的数据,用于测试,之前数据用于训练
train = dt[:-250]
test = dt[-250:]
X=train[label]
Y=train['label']
X_test=test[label]
Y_test=test['label']
from sklearn.neighbors import KNeighborsClassifier
model=KNeighborsClassifier(n_neighbors=30)
model.fit(X, Y)
print('训练时,预测成功率 {}'.format(round(np.mean(model.predict(X)==Y),2)))
print('测试时,预测成功率 {}'.format(round(np.mean(model.predict(X_test)==Y_test),2)))
训练时,预测成功率 0.56
测试时,预测成功率 0.55
In [207]:
#净值
test['Forecast'] = list(model.predict(X_test))
test['ref'] = test['next up'].loc[test['Forecast']==1]
test = test.fillna(0)
test['ref'] = test['ref'].apply(lambda x:1+x/100)
from operator import mul
from functools import reduce
test['date'] = test.index
test['net value'] = test['date'].apply(lambda x:reduce(mul,list(test['ref'])[:list(test['date']).index(x)+1]))
#基准净值
test['benchmark'] = test['now up'].apply(lambda x:1+x/100)
test['benchmark value'] = test['date'].apply(lambda x:reduce(mul,list(test['benchmark'])[:list(test['date']).index(x)+1]))
#风控净值
# model.predict_proba(X_test)[3][1]
test['risk ref'] = test['next up'].loc[test['Forecast']==1]
test = test.fillna(0)
test['rate'] = [model.predict_proba(X_test)[s][1] for s in range(0,len(model.predict_proba(X_test)))]
test['risk ref'] = (test['risk ref']/100)*test['rate']+1
test['net value (risk)'] = test['date'].apply(lambda x:reduce(mul,list(test['risk ref'])[:list(test['date']).index(x)+1]))
fig = plt.figure()
axes = fig.add_axes([0.1, 0.1, 1, 0.618]) #插入面板
color = ['tomato','green','darkorchid','b','y']
x1_list=list(test['net value'])
y=np.array(x1_list)
x=np.array(range(0,len(x1_list)))
axes.plot(x, y, 'tomato')
x1_list=list(test['benchmark value'])
y1=np.array(x1_list)
x1=np.array(range(0,len(x1_list)))
axes.plot(x1, y1, 'darkorchid')
axes.set_xlabel('Time',fontsize=15)
axes.set_ylabel('net value',fontsize=15)
axes.set_title('KNN return',fontsize=20)
axes.legend(['net value','benchmark'])
#设置X轴
mtradelist = list(test['date'])
numlist=[]
for s in list(range(0,len(mtradelist),60)):
numlist.append(mtradelist[s])
axes.set_xticks(list(range(0,len(mtradelist),60)))
axes.set_xticklabels(numlist, fontsize=10)
#风控
fig = plt.figure()
axes = fig.add_axes([0.1, 0.1, 1, 0.618]) #插入面板
color = ['tomato','green','darkorchid','b','y']
x1_list=list(test['net value'])
y=np.array(x1_list)
x=np.array(range(0,len(x1_list)))
axes.plot(x, y, 'tomato')
x1_list=list(test['benchmark value'])
y1=np.array(x1_list)
x1=np.array(range(0,len(x1_list)))
axes.plot(x1, y1, 'darkorchid')
x1_list=list(test['net value (risk)'])
y2=np.array(x1_list)
x2=np.array(range(0,len(x1_list)))
axes.plot(x2, y2, 'b')
axes.set_xlabel('Time',fontsize=15)
axes.set_ylabel('net value',fontsize=15)
axes.set_title('KNN return (risk)',fontsize=20)
axes.legend(['net value','benchmark','net value (risk)'])
#设置X轴
mtradelist = list(test['date'])
numlist=[]
for s in list(range(0,len(mtradelist),60)):
numlist.append(mtradelist[s])
axes.set_xticks(list(range(0,len(mtradelist),60)))
axes.set_xticklabels(numlist, fontsize=10)
Out[207]:
[<matplotlib.text.Text at 0x7f52b225dd30>,
<matplotlib.text.Text at 0x7f52b225dda0>,
<matplotlib.text.Text at 0x7f52b221cac8>,
<matplotlib.text.Text at 0x7f52b2220048>,
<matplotlib.text.Text at 0x7f52b2220b00>]
查看以上策略详情请到supermind量化交易官网查看:同花顺Supermind量化交易 机器学习算法对比--多维数据展示 附源代码