去极值
def depolarize(data:pd.DataFrame,column:str):
lower_quantile = data[column].quantile(0.025)
upper_quantile = data[column].quantile(0.975)
filtered_hqic = data[(data[column] >= lower_quantile) &
(data[column] <= upper_quantile)]
return filtered_hqic
画频率分布图
def histogram(data: pd.Series):
import matplotlib.pyplot as plt
import numpy as np
from scipy import stats
mu = data.mean()
std = data.std()
mean_hqic = data.mean()
median_hqic = data.median()
fig,ax1 = plt.subplots()
ax1.hist(data, bins=50, edgecolor='black', alpha=0.7, color='#ffae4c', label='Frequency Distribution')
ax2 = ax1.twinx()
x_values = np.linspace(data.min(), data.max(), 1000)
curve = stats.norm.pdf(x_values, mu, std)
ax2.plot(x_values, curve, 'k--', label=f'Normal Distribution ($\mu={mu:.2f}, \sigma={std:.2f})$')
ax2.axvline(mean_hqic, color='red', linestyle='--', label=f'Mean: {mean_hqic:.2f}')
ax2.axvline(median_hqic, color='green', linestyle='-', label=f'Median: {median_hqic:.2f}')
ax1.set_xlabel(data.name)
ax2.set_ylabel('Probability Density', color='black')
ax1.legend(loc='upper left', frameon=False, fontsize='small', handlelength=1, handletextpad=0.5)
ax2.legend(loc='upper right', frameon=False, fontsize='small', handlelength=1, handletextpad=0.5)
plt.tight_layout()
plt.show()