Series 和 DataFrame 这两个核心数据结构
练习题
对于下表的数据,请使用 Pandas 中的 DataFrame 进行创建,并对数据进行清洗。同时新增一列“总和”计算每个人的三科成绩之和。
import pandas as pd
from pandas import DataFrame
dash_line = '=' * 40
data = [
['ZhangFei', 66, 65],
['GuanYv', 95, 85, 98],
['ZhaoYun', 95, 92, 96],
['HuangZhong', 90, 88, 77],
['DianWei', 80, 90, 90],
['DianWei', 80, 90, 90],
]
df = pd.DataFrame(data, columns=['name', 'language', 'english', 'math'])
print(df)
dfuni = df.drop_duplicates()
dfuni = dfuni.fillna(0)
print(dash_line)
print(dfuni)
def plus(df):
df['sum'] = 0
for s in df.iloc[1:-1]:
df['sum'] += s
return df
dfsum = dfuni.apply(plus, axis=1)
print(dash_line)
print(dfsum)
name language english math
0 ZhangFei 66 65 NaN
1 GuanYv 95 85 98.0
2 ZhaoYun 95 92 96.0
3 HuangZhong 90 88 77.0
4 DianWei 80 90 90.0
5 DianWei 80 90 90.0
========================================
name language english math
0 ZhangFei 66 65 0.0
1 GuanYv 95 85 98.0
2 ZhaoYun 95 92 96.0
3 HuangZhong 90 88 77.0
4 DianWei 80 90 90.0
========================================
name language english math sum
0 ZhangFei 66 65 0.0 131.0
1 GuanYv 95 85 98.0 278.0
2 ZhaoYun 95 92 96.0 283.0
3 HuangZhong 90 88 77.0 255.0
4 DianWei 80 90 90.0 260.0