import pandas as pd
df = pd.DataFrame({
'A':range(1,11),
'B':[i**2 for i in range(1,11)],
'C':[i**3 for i in range(1,11)]
})
b_sum = df['B'].sum()
print(df)
print(b_sum)
import matplotlib.pyplot as plt
data = pd.read_csv(
'书店订单数据.csv',
encoding='utf-8',
index_col='订单ID'
)
print(data)
data['实际支付金额'] = data['图书定价 (元)'] * data['折扣率 (0.7-1.0)'] * data['购买数量']
print(data)
book = data.groupby('图书类别').agg({
'购买数量':'sum',
'实际支付金额':'sum'
})
print(book)
data['支付时间'] = pd.to_datetime(data['支付时间'])
data['小时数'] = data['支付时间'].dt.hour
print(data)
hour_counts = data.groupby('小时数')['购买数量'].sum()
print(hour_counts)
plt.plot(hour_counts.index,hour_counts)
plt.show()
states = data['实际支付金额'].agg(['mean','median','std'])
print(states)
data = data.sort_values('购买数量',ascending=False)
top3 = data [:3]
top3.to_csv('大额订单.csv')
df = pd.read_csv(
"考试成绩数据.csv"
)
print(df)
subjects = ['语文','数学','英语','理综/文综']
df['总分'] = df[subjects].sum(axis=1)
df['平均分'] = df[subjects].mean(axis=1)
print(df)
class_states = df.groupby('班级')[subjects].agg(['mean','max','min'])
print(class_states)
plt.rcParams['font.sans-serif'] = ['SimHei']
high_score_student = df[df['总分'] >= 600]
print(high_score_student)
count = high_score_student['班级'].value_counts().sort_index()
print(count)
plt.bar(count.index,count)
plt.show()
corr = df['数学'].corr(df['理综/文综'])
print(corr)
df['考试时间'] = pd.to_datetime(df['考试时间'])
df['月份'] = df['考试时间'].dt.month
mean_total = df.groupby('月份')['总分'].mean()
print(mean_total)
plt.plot(mean_total.index,mean_total)
plt.show()
data = pd.read_csv('教学评价数据.csv')
print(data)
data['评价日期'] = pd.to_datetime(data['评价日期'])
data['总评分'] = data['学生评分 (1-5分)'] * data['评价人数']
mean_data = data.groupby('教师ID')['学生评分 (1-5分)'].mean()
print(data)
print(mean_data)
counts = data.groupby('学科').agg({
'学生评分 (1-5分)':'mean',
'评价人数':'mean'
})
print(counts)
data['星期'] = data['评价日期'].dt.weekday + 1
counts_weekday = data.groupby('星期')['评价人数'].sum()
plt.bar(counts_weekday.index,counts_weekday)
plt.show()
mean_data = mean_data.sort_values(ascending=False)
print(mean_data)
top3 = mean_data[:3]
total = mean_data.sum()
top3_rate = top3 / total
plt.pie(top3_rate,labels=top3_rate.index,autopct="%1.1f4%%")
plt.show()




