文章目录
DataFrame基本列操作
import pandas as pd
import numpy as np
l = [["p", 12, 12, 0], [np.nan, 12.3, 33., 0], ["q", 12.3, 0, 0], ["r", 1, 1, 0]]
df = pd.DataFrame(l, columns=["a", "b", "c", "d"])
print(df)
"""
a b c d
0 p 12.0 12.0 0
1 NaN 12.3 33.0 0
2 q 12.3 0.0 0
3 r 1.0 1.0 0
"""
df['c']=df['c']*8
print(df)
"""
a b c d
0 p 12.0 768.0 0
1 NaN 12.3 2112.0 0
2 q 12.3 0.0 0
3 r 1.0 64.0 0
"""
df['b*c']=df['b']*df['c']
print(df)
"""
a b c d b*c
0 p 12.0 768.0 0 9216.0
1 NaN 12.3 2112.0 0 25977.6
2 q 12.3 0.0 0 0.0
3 r 1.0 64.0 0 64.0
"""
df['a*8'] = df['a']*8
print(df)
"""
a b c d b*c a*8
0 p 12.0 768.0 0 9216.0 pppppppp
1 NaN 12.3 2112.0 0 25977.6 NaN
2 q 12.3 0.0 0 0.0 qqqqqqqq
3 r 1.0 64.0 0 64.0 rrrrrrrr
"""
df['b'].shift(1)
"""
0 NaN
1 12.0
2 12.3
3 12.3
Name: b, dtype: float64
"""
df['b'].shift(-1)
"""
0 12.3
1 12.3
2 1.0
3 NaN
Name: b, dtype: float64
"""
df['b'].diff(1)
"""
0 NaN
1 0.3
2 0.0
3 -11.3
Name: b, dtype: float64
"""
df['b'].pct_change()
"""
0 NaN
1 0.025000
2 0.000000
3 -0.918699
Name: b, dtype: float64
"""
df['c'].rank()
"""
0 3.0
1 4.0
2 1.0
3 2.0
Name: c, dtype: float64
"""
DataFrame常用列处理
df1 = df.loc[:, (df!=0).any()]
print(df1)
"""
a b c
0 p 12.0 12.0
1 NaN 12.3 33.0
2 q 12.3 0.0
3 r 1.0 1.0
"""
df.loc[:,'c'] = df.loc[:,'c'].apply(lambda x: np.nan if x==0 else x)
df.loc[:,'c'].fillna(value=df.loc[:,'c'].mean(), inplace=True)
print(df)
"""
a b c d
0 p 12.0 12.000000 0
1 NaN 12.3 33.000000 0
2 q 12.3 15.333333 0
3 r 1.0 1.000000 0
"""