Python科学计算:Pandas

39 阅读1分钟

Series 和 DataFrame 这两个核心数据结构

练习题

对于下表的数据,请使用 Pandas 中的 DataFrame 进行创建,并对数据进行清洗。同时新增一列“总和”计算每个人的三科成绩之和。

image.png

import pandas as pd
from pandas import DataFrame

dash_line = '=' * 40

data = [
    ['ZhangFei', 66, 65],
    ['GuanYv', 95, 85, 98],
    ['ZhaoYun', 95, 92, 96],
    ['HuangZhong', 90, 88, 77],
    ['DianWei', 80, 90, 90],
    ['DianWei', 80, 90, 90],
]

df = pd.DataFrame(data, columns=['name', 'language', 'english', 'math'])

print(df)

dfuni = df.drop_duplicates()
dfuni = dfuni.fillna(0)
print(dash_line)
print(dfuni)

def plus(df):
    df['sum'] = 0
    for s in df.iloc[1:-1]:
        df['sum'] += s
    return df

dfsum = dfuni.apply(plus, axis=1)

print(dash_line)
print(dfsum)
         name  language  english  math
0    ZhangFei        66       65   NaN
1      GuanYv        95       85  98.0
2     ZhaoYun        95       92  96.0
3  HuangZhong        90       88  77.0
4     DianWei        80       90  90.0
5     DianWei        80       90  90.0
========================================
         name  language  english  math
0    ZhangFei        66       65   0.0
1      GuanYv        95       85  98.0
2     ZhaoYun        95       92  96.0
3  HuangZhong        90       88  77.0
4     DianWei        80       90  90.0
========================================
         name  language  english  math    sum
0    ZhangFei        66       65   0.0  131.0
1      GuanYv        95       85  98.0  278.0
2     ZhaoYun        95       92  96.0  283.0
3  HuangZhong        90       88  77.0  255.0
4     DianWei        80       90  90.0  260.0