同花顺Supermind量化交易 机器学习算法对比--多维数据展示 附源代码

172 阅读7分钟

这一节学习机器学习算法的对比,第一章学习如何将数据多维展示。

import pandas as pd 
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import datetime
from sklearn.neighbors import KNeighborsClassifier
plt.style.use('seaborn')

#设置需要预测的指数
indexcode = '000016.SH'
#设置历史区间
startdate = '20140101'
enddate = '20190123'
#获取历史区间的交易列表
tradelist = list(get_trade_days(startdate, enddate, count=None).strftime('%Y%m%d'))
#设置数据分类标签
label = ['money rate %','net up rate % ','mean of updown %']#资金流向、涨跌比、平均涨幅

In [142]:

date = '20190123'
nextdate = '20190124'
stock = get_index_stocks(indexcode,date)
df = get_price(stock, date, date, '1d', ['quote_rate'], skip_paused = False, fq = 'pre', bar_count = 0, is_panel = 1)['quote_rate'].T.fillna(0)
label3 = round(df.mean()[0],3)
label2 = (len(list(df[df[date]>0][date]))-len(list(df[df[date]<0][date])))/len(list(df[date]))
moneydf = get_money_flow_step(stock,date,date,'1d',['net_flow_rate'],None,is_panel=1)['net_flow_rate'].T.fillna(0)
label1 = round(moneydf.mean()[0],3)
value = get_price(indexcode, date, nextdate, '1d', ['quote_rate'], skip_paused = False, fq = 'pre', bar_count = 0, is_panel = 1)['quote_rate']
dt = pd.DataFrame([label1,label2,label3],index =label,columns=[date]).T
dt['now up']=list(value)[0]
dt['now label'] = dt['now up'].apply(lambda x:1 if x>0 else -1)
dt['next up']=list(value)[1]
dt['label']=dt['next up'].apply(lambda x:1 if x>0 else -1)
dt

Out[142]:

money rate %net up rate %mean of updown %now upnow labelnext uplabel
20190123-4.998-0.02-0.088-0.1742-10.60541

In [143]:

dt = pd.DataFrame(columns = label)
for date in tradelist:
    stock = get_index_stocks(indexcode,date)
    df = get_price(stock, date, date, '1d', ['quote_rate'], skip_paused = False, fq = 'pre', bar_count = 0, is_panel = 1)['quote_rate'].T.fillna(0)
    label3 = round(df.mean()[0],3)
    label2 = (len(list(df[df[date]>0][date]))-len(list(df[df[date]<0][date])))/len(list(df[date]))
    moneydf = get_money_flow_step(stock,date,date,'1d',['net_flow_rate'],None,is_panel=1)['net_flow_rate'].T.fillna(0)
    label1 = round(moneydf.mean()[0],3)
    dt.loc[date] = [label1,label2,label3]
value = list(get_price(indexcode, startdate, enddate, '1d', ['quote_rate'], skip_paused = False, fq = 'pre', bar_count = 0, is_panel = 1)['quote_rate'])
dt['now up']=value
dt['now label'] = dt['now up'].apply(lambda x:1 if x>0 else -1)
dt['next up']=list(dt['now up'])[1:]+[0]
dt['label']=dt['next up'].apply(lambda x:1 if x>0 else -1)
dt

Out[143]:

money rate %net up rate %mean of updown %now upnow labelnext uplabel
20140102-6.977-0.44-0.455-0.8688-1-1.6035-1
20140103-14.628-0.58-1.197-1.6035-1-1.5062-1
20140106-10.977-0.62-2.111-1.5062-1-0.1504-1
20140107-2.380-0.320.090-0.1504-10.30971
20140108-4.171-0.04-0.1750.30971-0.7557-1
20140109-4.399-0.62-0.917-0.7557-1-0.3732-1
20140110-8.866-0.22-0.622-0.3732-1-0.2439-1
20140113-11.486-0.02-0.250-0.2439-10.31261
20140114-7.6180.320.1990.31261-0.7187-1
20140115-12.046-0.52-0.620-0.7187-10.30801
20140116-0.5550.040.2620.30801-1.1669-1
20140117-11.065-0.64-1.425-1.1669-1-0.7338-1
20140120-10.974-0.64-0.878-0.7338-10.95861
20140121-3.1210.800.9270.958612.38271
2014012210.3581.002.7102.38271-0.9966-1
20140123-10.280-0.72-0.755-0.9966-10.20241
201401240.9820.480.5920.20241-1.5445-1
20140127-12.541-0.72-1.411-1.5445-10.60441
20140128-5.0320.340.3660.604410.44951
20140129-7.3500.080.0950.44951-1.1472-1
20140130-13.810-0.76-1.215-1.1472-1-0.1355-1
20140207-9.088-0.160.115-0.1355-12.06671
201402107.4700.962.6932.066711.49341
201402115.6620.721.3981.49341-0.0864-1
20140212-5.996-0.24-0.027-0.0864-10.06641
20140213-4.087-0.30-0.5160.066410.40731
20140214-7.0070.360.7960.407310.25201
20140217-8.3570.380.5970.25201-1.9221-1
20140218-22.680-0.84-1.890-1.9221-11.64761
201402191.7590.781.4181.64761-0.8646-1
........................
20181211-5.4420.300.4100.289410.32041
20181212-2.3280.600.3760.320411.42721
201812134.6180.881.5451.42721-1.4046-1
20181214-9.521-0.94-1.470-1.4046-10.10581
20181217-5.4100.460.3990.10581-1.1574-1
20181218-9.453-0.72-1.188-1.1574-1-1.1810-1
20181219-12.110-0.70-1.012-1.1810-1-1.4613-1
20181220-8.132-0.64-1.128-1.4613-1-1.2324-1
20181221-7.126-0.60-1.204-1.2324-10.13391
201812240.8900.160.3190.13391-0.5153-1
20181225-1.902-0.46-0.801-0.5153-1-0.6867-1
20181226-2.723-0.44-0.569-0.6867-1-0.2411-1
20181227-4.2770.02-0.454-0.2411-10.74871
201812281.7700.480.5640.74871-1.3217-1
20190102-5.086-0.64-1.368-1.3217-10.28511
201901030.2900.180.1510.285112.00081
201901047.1300.902.0672.00081-0.0141-1
20190107-4.9970.080.212-0.0141-1-0.3953-1
20190108-2.920-0.34-0.415-0.3953-11.19511
201901095.3680.701.0681.19511-0.0372-1
20190110-4.994-0.48-0.376-0.0372-10.97121
201901110.1160.740.7770.97121-0.9923-1
20190114-6.029-0.62-0.862-0.9923-12.02621
201901156.6960.901.6312.026210.11981
20190116-3.4040.16-0.1500.11981-0.4145-1
20190117-2.421-0.38-0.432-0.4145-11.94051
201901184.5430.861.5581.940510.62561
20190121-0.1230.320.6400.62561-1.2849-1
20190122-10.820-0.82-1.257-1.2849-1-0.1742-1
20190123-4.998-0.02-0.088-0.1742-10.0000-1

1236 rows × 7 columns

In [206]:

import matplotlib.pyplot as plt
import numpy as np
from mpl_toolkits.mplot3d import Axes3D
updt = dt[dt['label']==1]
downdt = dt[dt['label']==-1]

for l in label:
    fig = plt.figure()
    axes = fig.add_axes([0.1, 0.1, 1, 0.618]) 
    x1_list=list(updt[l])
    y=np.array(x1_list)
    x=np.array(range(0,len(x1_list)))
    axes.scatter(x,y,c='tomato')

    x1_list=list(downdt[l])
    y=np.array(x1_list)
    x=np.array(range(0,len(x1_list)))
    axes.scatter(x,y,c='g')
    axes.set_ylabel('value',fontsize=15)
    axes.set_title(l,fontsize=20)
    
from mpl_toolkits.mplot3d import Axes3D
xsup1 = updt[label[0]]
xsup2 = updt[label[1]]
xsup3 = updt[label[2]]
xsdown1 = downdt[label[0]]
xsdown2 = downdt[label[1]]
xsdown3 = downdt[label[2]]
fig = plt.figure()
ax = Axes3D(fig)
ax.scatter(xsup1,xsup2,xsup3,c='tomato')
ax.scatter(xsdown1,xsdown2,xsdown3,c='g')
ax.set_xlabel(label[0],fontsize=12)
ax.set_ylabel(label[1],fontsize=12)
ax.set_zlabel(label[2],fontsize=12)
ax.set_title('Data space 3D',fontsize=20)
plt.show()

转存失败,建议直接上传图片文件

转存失败,建议直接上传图片文件

转存失败,建议直接上传图片文件

转存失败,建议直接上传图片文件

In [166]:

label = ['money rate %','net up rate % ','mean of updown %']
label1 = 'money rate %'
label2 = 'net up rate % '
label3 = 'mean of updown %'

#保留近一年的数据,用于测试,之前数据用于训练
train = dt[:-250]
test = dt[-250:]
X=train[label]
Y=train['label']
X_test=test[label]
Y_test=test['label']

from sklearn.neighbors import KNeighborsClassifier
model=KNeighborsClassifier(n_neighbors=30)

model.fit(X, Y)
print('训练时,预测成功率 {}'.format(round(np.mean(model.predict(X)==Y),2)))
print('测试时,预测成功率 {}'.format(round(np.mean(model.predict(X_test)==Y_test),2)))
训练时,预测成功率 0.56
测试时,预测成功率 0.55

In [207]:

#净值
test['Forecast'] = list(model.predict(X_test))
test['ref'] = test['next up'].loc[test['Forecast']==1]
test = test.fillna(0)
test['ref'] = test['ref'].apply(lambda x:1+x/100)
from operator import mul
from functools import reduce
test['date'] = test.index
test['net value'] = test['date'].apply(lambda x:reduce(mul,list(test['ref'])[:list(test['date']).index(x)+1]))

#基准净值
test['benchmark'] = test['now up'].apply(lambda x:1+x/100)
test['benchmark value'] = test['date'].apply(lambda x:reduce(mul,list(test['benchmark'])[:list(test['date']).index(x)+1]))

#风控净值
# model.predict_proba(X_test)[3][1]
test['risk ref'] = test['next up'].loc[test['Forecast']==1]
test = test.fillna(0)
test['rate'] = [model.predict_proba(X_test)[s][1] for s in range(0,len(model.predict_proba(X_test)))]
test['risk ref'] = (test['risk ref']/100)*test['rate']+1
test['net value (risk)'] = test['date'].apply(lambda x:reduce(mul,list(test['risk ref'])[:list(test['date']).index(x)+1]))

fig = plt.figure()
axes = fig.add_axes([0.1, 0.1, 1, 0.618]) #插入面板
color = ['tomato','green','darkorchid','b','y']
x1_list=list(test['net value'])
y=np.array(x1_list)
x=np.array(range(0,len(x1_list)))
axes.plot(x, y, 'tomato')

x1_list=list(test['benchmark value'])
y1=np.array(x1_list)
x1=np.array(range(0,len(x1_list)))
axes.plot(x1, y1, 'darkorchid')

axes.set_xlabel('Time',fontsize=15)
axes.set_ylabel('net value',fontsize=15)
axes.set_title('KNN return',fontsize=20)
axes.legend(['net value','benchmark'])
#设置X轴
mtradelist = list(test['date'])
numlist=[]
for s in list(range(0,len(mtradelist),60)):
    numlist.append(mtradelist[s])
axes.set_xticks(list(range(0,len(mtradelist),60)))
axes.set_xticklabels(numlist, fontsize=10)




#风控
fig = plt.figure()
axes = fig.add_axes([0.1, 0.1, 1, 0.618]) #插入面板
color = ['tomato','green','darkorchid','b','y']
x1_list=list(test['net value'])
y=np.array(x1_list)
x=np.array(range(0,len(x1_list)))
axes.plot(x, y, 'tomato')

x1_list=list(test['benchmark value'])
y1=np.array(x1_list)
x1=np.array(range(0,len(x1_list)))
axes.plot(x1, y1, 'darkorchid')

x1_list=list(test['net value (risk)'])
y2=np.array(x1_list)
x2=np.array(range(0,len(x1_list)))
axes.plot(x2, y2, 'b')

axes.set_xlabel('Time',fontsize=15)
axes.set_ylabel('net value',fontsize=15)
axes.set_title('KNN return (risk)',fontsize=20)
axes.legend(['net value','benchmark','net value (risk)'])
#设置X轴
mtradelist = list(test['date'])
numlist=[]
for s in list(range(0,len(mtradelist),60)):
    numlist.append(mtradelist[s])
axes.set_xticks(list(range(0,len(mtradelist),60)))
axes.set_xticklabels(numlist, fontsize=10)

Out[207]:

[<matplotlib.text.Text at 0x7f52b225dd30>,
 <matplotlib.text.Text at 0x7f52b225dda0>,
 <matplotlib.text.Text at 0x7f52b221cac8>,
 <matplotlib.text.Text at 0x7f52b2220048>,
 <matplotlib.text.Text at 0x7f52b2220b00>]

查看以上策略详情请到supermind量化交易官网查看:同花顺Supermind量化交易 机器学习算法对比--多维数据展示 附源代码