import pandas as pd
import statsmodels.api as sm
merged_df = pd.merge(df1, df2, left_index=True, right_index=True)
X = merged_df['News Sentiment'].values.reshape(-1, 1)
y = merged_df['TRAN_AMT_HKE'].values
X = sm.add_constant(X)
model = sm.OLS(y, X)
results = model.fit()
print(results.summary())
import pandas as pd
def calculate_holding_time(df):
df.sort_values(by='TRAN_DT', inplace=True)
buy_stack = []
df['holding_time'] = 0
for index, row in df.iterrows():
if row['BUY_SELL_IND'] == 'B':
buy_stack.append(row)
elif row['BUY_SELL_IND'] == 'S':
if not buy_stack:
continue
buy_record = buy_stack.pop()
holding_time = (row['TRAN_DT'] - buy_record['TRAN_DT']).days
if abs(row['TRAN_AMT_HKE'] - buy_record['TRAN_AMT_HKE']) < 1e-6:
df.at[index, 'holding_time'] = holding_time
else:
buy_stack.append(buy_record)
return df
df_with_holding_time = calculate_holding_time(df)`