import os.path
import pandas as pd
from tqdm import tqdm
import glob
from paddlets.datasets import TSDataset
from paddlets.transform import StandardScaler
from paddlets.models.forecasting import MLPRegressor, NHiTSModel, RNNBlockRegressor
from paddlets.ensemble import WeightingEnsembleForecaster
import numpy as np
result_center = "forecasting_all_result_center"
model_center = "model_forecasting_center_2048_a_b"
csv_paths = glob.glob(os.path.join("./tu_share_data_day", "*.csv"))
sum_dam_data = []
def handle_nan_and_inf(data):
data = data.ffill().fillna(0)
data.replace([np.inf, -np.inf], np.nan, inplace=True)
data.ffill(inplace=True)
data.fillna(0, inplace=True)
return data
def calculate_features(data):
data['MA5'] = data['close'].rolling(window=5).mean()
data['MA10'] = data['close'].rolling(window=10).mean()
data['MA20'] = data['close'].rolling(window=20).mean()
data['EMA12'] = data['close'].ewm(span=12, adjust=False).mean()
data['EMA26'] = data['close'].ewm(span=26, adjust=False).mean()
data['Volatility_5'] = data['close'].rolling(window=5).std()
data['Volatility_10'] = data['close'].rolling(window=10).std()
data['Volume_MA5'] = data['vol'].rolling(window=5).mean()
data['Volume_Change_Rate'] = data['vol'].pct_change()
delta = data['close'].diff()
gain = delta.where(delta > 0, 0)
loss = -delta.where(delta < 0, 0)
average_gain = gain.rolling(window=14).mean()
average_loss = loss.rolling(window=14).mean()
rs = average_gain / average_loss
data['RSI14'] = 100 - (100 / (1 + rs))
data['Momentum_3'] = data['close'].diff(3)
data['Momentum_7'] = data['close'].diff(7)
data['Middle_Band'] = data['close'].rolling(window=20).mean()
data['Upper_Band'] = data['Middle_Band'] + 2 * data['close'].rolling(window=20).std()
data['Lower_Band'] = data['Middle_Band'] - 2 * data['close'].rolling(window=20).std()
data = handle_nan_and_inf(data)
return data
nhits_params = {
'sampling_stride': 8,
'eval_metrics': ["mse", "mae"],
'batch_size': 32,
'max_epochs': 100,
'patience': 10
}
rnn_params = {
'sampling_stride': 8,
'eval_metrics': ["mse", "mae"],
'batch_size': 32,
'max_epochs': 100,
'patience': 10,
}
mlp_params = {
'sampling_stride': 8,
'eval_metrics': ["mse", "mae"],
'batch_size': 32,
'max_epochs': 100,
'patience': 10,
'use_bn': True,
}
reg = WeightingEnsembleForecaster(
in_chunk_len=64,
out_chunk_len=1,
skip_chunk_len=0,
estimators=[(NHiTSModel, nhits_params), (RNNBlockRegressor, rnn_params), (MLPRegressor, mlp_params)]
)
reg = reg.load(os.path.join(model_center, "low_high"))
for csv_path in tqdm(csv_paths):
new_data = pd.read_csv(csv_path)
new_data[['open', 'high', 'low', 'close', 'pre_close', 'change', 'pct_chg', 'vol', 'amount']] = \
new_data[['open', 'high', 'low', 'close', 'pre_close', 'change', 'pct_chg', 'vol', 'amount']].apply(pd.to_numeric, errors='coerce')
if len(new_data) < 4096:
continue
new_data = new_data.iloc[::-1]
new_data = new_data[-4096:-128]
new_data['index_new'] = range(1, len(new_data) + 1)
new_data = calculate_features(new_data)
sum_dam_data.append(new_data)
dam_data = pd.concat(sum_dam_data)
dam_data.reset_index(drop=True, inplace=True)
dataset = TSDataset.load_from_dataframe(
dam_data,
group_id='ts_code',
time_col="index_new",
target_cols=['open', 'high', 'low', 'close', 'pre_close', 'change', 'pct_chg', 'MA5', 'MA10', 'MA20',
'EMA12', 'EMA26', 'Volatility_5', 'Volatility_10', 'Volume_MA5', 'Volume_Change_Rate',
'RSI14', 'Momentum_3', 'Momentum_7', 'Middle_Band', 'Upper_Band', 'Lower_Band']
)
sum_dam_data = []
for csv_path in tqdm(csv_paths):
new_data = pd.read_csv(csv_path)
new_data[['open', 'high', 'low', 'close', 'pre_close', 'change', 'pct_chg', 'vol', 'amount']] = \
new_data[['open', 'high', 'low', 'close', 'pre_close', 'change', 'pct_chg', 'vol', 'amount']].apply(pd.to_numeric, errors='coerce')
if len(new_data) < 2048:
continue
new_data = new_data.iloc[::-1]
new_data = new_data[-128:]
new_data['index_new'] = range(1, len(new_data) + 1)
new_data = calculate_features(new_data)
sum_dam_data.append(new_data)
valid_dam_data = pd.concat(sum_dam_data)
valid_dam_data.reset_index(drop=True, inplace=True)
valid_tsdataset = TSDataset.load_from_dataframe(
valid_dam_data,
group_id='ts_code',
time_col="index_new",
target_cols=['open', 'high', 'low', 'close', 'pre_close', 'change', 'pct_chg', 'MA5', 'MA10', 'MA20',
'EMA12', 'EMA26', 'Volatility_5', 'Volatility_10', 'Volume_MA5', 'Volume_Change_Rate',
'RSI14', 'Momentum_3', 'Momentum_7', 'Middle_Band', 'Upper_Band', 'Lower_Band']
)
scaler = StandardScaler()
scaler = scaler.fit(dataset)
dataset = scaler.transform(dataset)
valid_tsdataset = scaler.transform(valid_tsdataset)
reg
valid_tsdataset
a
import ray
a = []
sum_dam_data = []
@ray.remote
def csv_predict(csv_path):
nhits_params = {
'sampling_stride': 8,
'eval_metrics': ["mse", "mae"],
'batch_size': 32,
'max_epochs': 100,
'patience': 10
}
rnn_params = {
'sampling_stride': 8,
'eval_metrics': ["mse", "mae"],
'batch_size': 32,
'max_epochs': 100,
'patience': 10,
}
mlp_params = {
'sampling_stride': 8,
'eval_metrics': ["mse", "mae"],
'batch_size': 32,
'max_epochs': 100,
'patience': 10,
'use_bn': True,
}
reg = WeightingEnsembleForecaster(
in_chunk_len=64,
out_chunk_len=1,
skip_chunk_len=0,
estimators=[(NHiTSModel, nhits_params), (RNNBlockRegressor, rnn_params), (MLPRegressor, mlp_params)]
)
reg = reg.load(os.path.join(model_center, "low_high"))
new_data = pd.read_csv(csv_path)
new_data[['open', 'high', 'low', 'close', 'pre_close', 'change', 'pct_chg', 'vol', 'amount']] = \
new_data[['open', 'high', 'low', 'close', 'pre_close', 'change', 'pct_chg', 'vol', 'amount']].apply(pd.to_numeric, errors='coerce')
if len(new_data) < 2048:
return {}
new_data = new_data.iloc[::-1]
new_data = new_data[-128:]
new_data['index_new'] = range(1, len(new_data) + 1)
new_data = calculate_features(new_data)
valid_tsdataset = TSDataset.load_from_dataframe(
new_data,
time_col="index_new",
target_cols=['open', 'high', 'low', 'close', 'pre_close', 'change', 'pct_chg', 'MA5', 'MA10', 'MA20',
'EMA12', 'EMA26', 'Volatility_5', 'Volatility_10', 'Volume_MA5', 'Volume_Change_Rate',
'RSI14', 'Momentum_3', 'Momentum_7', 'Middle_Band', 'Upper_Band', 'Lower_Band']
)
valid_tsdataset = scaler.transform(valid_tsdataset)
predicted = reg.recursive_predict(valid_tsdataset, 3)
predicted = scaler.inverse_transform(predicted)
predicted = predicted.to_dataframe()
high_value = predicted.max().to_dict()['high']
low_value = predicted.min().to_dict()['low']
round_value = round((high_value - low_value) / low_value, 3) * 1000
high_index = predicted[predicted['high'] == high_value].index.values[0] - len(new_data)
low_index = predicted[predicted['low'] == low_value].index.values[0] - len(new_data)
json_path = csv_path.replace("csv", "json")
return {
"round_value": round_value,
"stock_name": os.path.split(csv_path)[-1],
"low_index": low_index,
"low_value": round(low_value, 2),
"high_index": high_index,
"high_value": round(high_value, 2),
}
ray.shutdown()
ray.init()
a = []
for csv_path in tqdm(csv_paths):
a.append(csv_predict.remote(csv_path))
result = ray.get(a)
ray.shutdown()
for valid_tsdata in valid_tsdataset:
try:
predicted = reg.predict(valid_tsdata)
print(predicted)
except Exception as e:
print(e)
continue
predicted