同花顺Supermind量化交易 机器算法运用--预测指数涨跌的算法对比 附源代码

106 阅读6分钟

这一节学习机器学习算法的对比,第三章就九大不同的算法对预测指数涨跌情况进行对比。

dt = pd.DataFrame(columns = label)
for date in tradelist:
    stock = get_index_stocks(indexcode,date)
    df = get_price(stock, date, date, '1d', ['quote_rate'], skip_paused = False, fq = 'pre', bar_count = 0, is_panel = 1)['quote_rate'].T.fillna(0)
    label3 = round(df.mean()[0],3)
    label2 = (len(list(df[df[date]>0][date]))-len(list(df[df[date]<0][date])))/len(list(df[date]))
    moneydf = get_money_flow_step(stock,date,date,'1d',['net_flow_rate'],None,is_panel=1)['net_flow_rate'].T.fillna(0)
    label1 = round(moneydf.mean()[0],3)
    dt.loc[date] = [label1,label2,label3]
value = list(get_price(indexcode, startdate, enddate, '1d', ['quote_rate'], skip_paused = False, fq = 'pre', bar_count = 0, is_panel = 1)['quote_rate'])
dt['now up']=value
dt['now label'] = dt['now up'].apply(lambda x:1 if x>0 else -1)
dt['next up']=list(dt['now up'])[1:]+[0]
dt['label']=dt['next up'].apply(lambda x:1 if x>0 else -1)
dt

Out[208]:

money rate %net up rate %mean of updown %now upnow labelnext uplabel
20140102-6.977-0.44-0.455-0.8688-1-1.6035-1
20140103-14.628-0.58-1.197-1.6035-1-1.5062-1
20140106-10.977-0.62-2.111-1.5062-1-0.1504-1
20140107-2.380-0.320.090-0.1504-10.30971
20140108-4.171-0.04-0.1750.30971-0.7557-1
20140109-4.399-0.62-0.917-0.7557-1-0.3732-1
20140110-8.866-0.22-0.622-0.3732-1-0.2439-1
20140113-11.486-0.02-0.250-0.2439-10.31261
20140114-7.6180.320.1990.31261-0.7187-1
20140115-12.046-0.52-0.620-0.7187-10.30801
20140116-0.5550.040.2620.30801-1.1669-1
20140117-11.065-0.64-1.425-1.1669-1-0.7338-1
20140120-10.974-0.64-0.878-0.7338-10.95861
20140121-3.1210.800.9270.958612.38271
2014012210.3581.002.7102.38271-0.9966-1
20140123-10.280-0.72-0.755-0.9966-10.20241
201401240.9820.480.5920.20241-1.5445-1
20140127-12.541-0.72-1.411-1.5445-10.60441
20140128-5.0320.340.3660.604410.44951
20140129-7.3500.080.0950.44951-1.1472-1
20140130-13.810-0.76-1.215-1.1472-1-0.1355-1
20140207-9.088-0.160.115-0.1355-12.06671
201402107.4700.962.6932.066711.49341
201402115.6620.721.3981.49341-0.0864-1
20140212-5.996-0.24-0.027-0.0864-10.06641
20140213-4.087-0.30-0.5160.066410.40731
20140214-7.0070.360.7960.407310.25201
20140217-8.3570.380.5970.25201-1.9221-1
20140218-22.680-0.84-1.890-1.9221-11.64761
201402191.7590.781.4181.64761-0.8646-1
........................
20181211-5.4420.300.4100.289410.32041
20181212-2.3280.600.3760.320411.42721
201812134.6180.881.5451.42721-1.4046-1
20181214-9.521-0.94-1.470-1.4046-10.10581
20181217-5.4100.460.3990.10581-1.1574-1
20181218-9.453-0.72-1.188-1.1574-1-1.1810-1
20181219-12.110-0.70-1.012-1.1810-1-1.4613-1
20181220-8.132-0.64-1.128-1.4613-1-1.2324-1
20181221-7.126-0.60-1.204-1.2324-10.13391
201812240.8900.160.3190.13391-0.5153-1
20181225-1.902-0.46-0.801-0.5153-1-0.6867-1
20181226-2.723-0.44-0.569-0.6867-1-0.2411-1
20181227-4.2770.02-0.454-0.2411-10.74871
201812281.7700.480.5640.74871-1.3217-1
20190102-5.086-0.64-1.368-1.3217-10.28511
201901030.2900.180.1510.285112.00081
201901047.1300.902.0672.00081-0.0141-1
20190107-4.9970.080.212-0.0141-1-0.3953-1
20190108-2.920-0.34-0.415-0.3953-11.19511
201901095.3680.701.0681.19511-0.0372-1
20190110-4.994-0.48-0.376-0.0372-10.97121
201901110.1160.740.7770.97121-0.9923-1
20190114-6.029-0.62-0.862-0.9923-12.02621
201901156.6960.901.6312.026210.11981
20190116-3.4040.16-0.1500.11981-0.4145-1
20190117-2.421-0.38-0.432-0.4145-11.94051
201901184.5430.861.5581.940510.62561
20190121-0.1230.320.6400.62561-1.2849-1
20190122-10.820-0.82-1.257-1.2849-1-0.1742-1
20190123-4.998-0.02-0.088-0.1742-10.0000-1

1236 rows × 7 columns

KNN

In [256]:

label = ['money rate %','net up rate % ','mean of updown %']
label1 = 'money rate %'
label2 = 'net up rate % '
label3 = 'mean of updown %'

#保留近一年的数据,用于测试,之前数据用于训练
train = dt[:-250]
test = dt[-250:]
X=train[label]
Y=train['label']
X_test=test[label]
Y_test=test['label']

from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn import tree
from sklearn import svm
from sklearn.naive_bayes import GaussianNB
from sklearn.cluster import KMeans
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from xgboost import XGBClassifier
dr = pd.DataFrame()
for s in ['tree','KNN','SVM','XGBClass','GBM', 'Random Forest','KMeans','GaussianNB','Logistic']:
    if s == 'tree':
        model = tree.DecisionTreeClassifier() 
    elif s == 'KNN':
        model=KNeighborsClassifier(n_neighbors=30)
    elif s == 'SVM':
        model = svm.SVC() 
    elif s == 'XGBClass':
        model=XGBClassifier()
    elif s == 'GBM':
        model= GradientBoostingClassifier()
    elif s == 'Random Forest':
        model= RandomForestClassifier()
    elif s == 'KMeans':
        model = KMeans()
    elif s == 'GaussianNB':
        model= GaussianNB()
    elif s == 'Logistic':
        model = LogisticRegression()


    model.fit(X, Y)
    print('训练时,预测成功率 {}'.format(round(np.mean(model.predict(X)==Y),2)))
    print('测试时,预测成功率 {}'.format(round(np.mean(model.predict(X_test)==Y_test),2)))

    name = str(s)+' net value'
    #净值
    test['Forecast'] = list(model.predict(X_test))
    test['ref'] = test['next up'].loc[test['Forecast']==1]
    test = test.fillna(0)
    test['ref'] = test['ref'].apply(lambda x:1+x/100)
    from operator import mul
    from functools import reduce
    test['date'] = test.index
    test[name] = test['date'].apply(lambda x:reduce(mul,list(test['ref'])[:list(test['date']).index(x)+1]))
    dr[name] = test[name]
dr
训练时,预测成功率 1.0
测试时,预测成功率 0.53
训练时,预测成功率 0.56
测试时,预测成功率 0.55
训练时,预测成功率 0.61
测试时,预测成功率 0.58
训练时,预测成功率 0.72
测试时,预测成功率 0.5
训练时,预测成功率 0.76
测试时,预测成功率 0.51
训练时,预测成功率 0.97
测试时,预测成功率 0.55
训练时,预测成功率 0.1
测试时,预测成功率 0.09
训练时,预测成功率 0.51
测试时,预测成功率 0.5
训练时,预测成功率 0.53
测试时,预测成功率 0.52

Out[256]:

tree net valueKNN net valueSVM net valueXGBClass net valueGBM net valueRandom Forest net valueKMeans net valueGaussianNB net valueLogistic net value
201801151.0049371.0000001.0049371.0000001.0000001.0000001.0000001.0049371.004937
201801161.0089781.0040211.0089781.0040211.0040211.0040211.0000001.0049371.008978
201801171.0189391.0040211.0189391.0139341.0139341.0139341.0098731.0049371.008978
201801181.0226351.0040211.0189391.0176111.0176111.0139341.0098731.0049371.012637
201801191.0267381.0040211.0189391.0216941.0216941.0139341.0098731.0049371.016700
201801221.0418831.0188311.0189391.0367651.0367651.0288901.0098731.0197611.031697
201801231.0431391.0188311.0189391.0367651.0367651.0288901.0098731.0197611.031697
201801241.0356371.0115041.0116111.0293081.0293081.0214901.0098731.0197611.024277
201801251.0406621.0115041.0165201.0293081.0293081.0214901.0098731.0247091.029247
201801261.0235780.9948991.0165201.0124111.0124111.0214901.0098731.0247091.029247
201801291.0099410.9816441.0165200.9989230.9989231.0078811.0098731.0110571.015535
201801301.0223120.9816441.0289711.0111591.0111591.0078811.0098731.0234411.027974
201801311.0303130.9893271.0370251.0111591.0111591.0157701.0098731.0234411.036020
201802011.0330720.9919771.0398021.0138671.0138671.0184901.0125771.0234411.036020
201802021.0330721.0019601.0502661.0240701.0138671.0184901.0125771.0337411.046446
201802051.0330720.9818511.0291881.0035170.9935180.9980490.9922551.0337411.025444
201802061.0039470.9541691.0001720.9752250.9655080.9699110.9642801.0045970.996534
201802071.0039470.9541691.0001720.9482640.9388160.9699110.9642800.9768240.968984
201802080.9576600.9541691.0001720.9045440.8955320.9699110.9642800.9317870.924309
201802090.9560190.9541691.0001720.9045440.8955320.9699110.9642800.9301900.922724
201802120.9722090.9703281.0001720.9045440.9106970.9863360.9642800.9459430.938351
201802130.9787080.9768151.0068580.9045440.9106970.9929300.9707270.9459430.938351
201802140.9787080.9768151.0068580.9045440.9106970.9929300.9707270.9459430.957987
201802220.9871500.9768151.0155430.9123470.9185531.0014950.9791000.9459430.957987
201802230.9926970.9768151.0212490.9123470.9185531.0071220.9846020.9459430.957987
201802260.9926970.9768151.0212490.9123470.9185531.0071220.9846020.9459430.957987
201802270.9926970.9768151.0043530.8972520.9033560.9904600.9846020.9302920.942137
201802280.9970530.9811011.0087600.9011890.9073200.9948060.9846020.9343740.946271
201803010.9970530.9811011.0087600.9011890.9073200.9854760.9846020.9256120.937397
201803020.9969860.9811011.0087600.9011290.9072590.9854760.9846020.9255500.937334
..............................
201812110.9652271.0989931.1910510.9039000.9127690.9865890.9872980.8824770.913479
201812120.9652271.1146781.2080500.9039000.9257961.0006690.9872980.8824770.913479
201812130.9652271.1146781.2080500.8912030.9257961.0006690.9734310.8824770.913479
201812140.9652271.1158571.2093280.8921460.9267761.0017280.9734310.8834110.914446
201812170.9652271.1158571.2093280.8818210.9160490.9901340.9734310.8834110.914446
201812180.9652271.1026791.1950460.8714060.9052310.9901340.9734310.8729780.903646
201812190.9511221.0865651.1775830.8714060.8920030.9901340.9734310.8602210.890441
201812200.9511221.0865651.1630700.8714060.8920030.9901340.9734310.8496200.879468
201812210.9523951.0865651.1630700.8714060.8920030.9901340.9734310.8507570.880645
201812240.9474881.0865651.1630700.8669160.8874060.9901340.9734310.8507570.876107
201812250.9409811.0865651.1550830.8669160.8813120.9901340.9734310.8449150.870091
201812260.9387131.0839461.1522990.8669160.8813120.9877470.9734310.8428780.867993
201812270.9387131.0839461.1522990.8669160.8813120.9877470.9734310.8491890.874492
201812280.9387131.0696191.1370690.8669160.8813120.9877470.9605650.8491890.874492
201901020.9387131.0726691.1403100.8669160.8813120.9877470.9605650.8516100.876985
201901030.9574941.0726691.1403100.8842610.8989461.0075100.9605650.8516100.894532
201901040.9573591.0725171.1401500.8841370.8988191.0073680.9605650.8516100.894532
201901070.9573591.0725171.1401500.8841370.8988191.0073680.9605650.8482430.890996
201901080.9688011.0853351.1537760.8947030.9095611.0194070.9605650.8583810.901644
201901090.9688011.0853351.1537760.8947030.9095611.0194070.9602080.8583810.901644
201901100.9782101.0958761.1537760.9033920.9095611.0194070.9602080.8667170.910401
201901110.9782101.0958761.1537760.9033920.9095611.0092910.9602080.8667170.910401
201901140.9980301.1180811.1537760.9216970.9279901.0297410.9602080.8842790.928847
201901150.9992261.1194201.1551580.9228010.9291021.0297410.9602080.8842790.928847
201901160.9950841.1147801.1503700.9189760.9252511.0297410.9602080.8806130.924997
201901171.0143941.1364121.1726930.9368090.9432051.0497230.9602080.8977020.942947
201901181.0143941.1364121.1726930.9426690.9432051.0497230.9662150.8977020.942947
201901211.0013601.1364121.1726930.9305570.9310861.0362360.9662150.8977020.930831
201901220.9996151.1344331.1706500.9305570.9294641.0344300.9662150.8961380.929209
201901230.9996151.1344331.1706500.9305570.9294641.0344300.9662150.8961380.929209

250 rows × 9 columns

In [257]:

fig = plt.figure()
axes = fig.add_axes([0.1, 0.1, 1, 1.382]) #插入面板
color = ['tomato','green','darkorchid','lightskyblue','y','gold','deeppink','lightgoldenrodyellow','red']
t = list(dr.columns)
for s in t:
    g = t.index(s)
    x1_list=list(dr[s])
    y=np.array(x1_list)
    x=np.array(range(0,len(x1_list)))
    axes.plot(x, y , color = color[g])

    axes.set_xlabel('Time',fontsize=15)
    axes.set_ylabel('net value',fontsize=15)
    axes.set_title('AI return ',fontsize=20)
    axes.legend(t)
    #设置X轴
    mtradelist = list(test['date'])
    numlist=[]
    for s in list(range(0,len(mtradelist),60)):
        numlist.append(mtradelist[s])
    axes.set_xticks(list(range(0,len(mtradelist),60)))
    axes.set_xticklabels(numlist, fontsize=10)

查看以上策略详情请到supermind量化交易官网查看:同花顺Supermind量化交易 机器算法运用--预测指数涨跌的算法对比 附源代码