pandas使用笔记(二)列操作

174 阅读2分钟

文章目录

DataFrame基本列操作

import pandas as pd
import numpy as np

# 设置实验数据
l = [["p", 12, 12, 0], [np.nan, 12.3, 33., 0], ["q", 12.3, 0, 0], ["r", 1, 1, 0]]
df = pd.DataFrame(l, columns=["a", "b", "c", "d"])

print(df)
"""
     a     b     c  d
0    p  12.0  12.0  0
1  NaN  12.3  33.0  0
2    q  12.3   0.0  0
3    r   1.0   1.0  0
"""


# 全为数字的列直接进行运算
df['c']=df['c']*8
print(df)
"""
     a     b       c  d
0    p  12.0   768.0  0
1  NaN  12.3  2112.0  0
2    q  12.3     0.0  0
3    r   1.0    64.0  0
"""

# 对两列也可以进行运算(注意数据类型)
df['b*c']=df['b']*df['c']
print(df)

"""
     a     b       c  d      b*c
0    p  12.0   768.0  0   9216.0
1  NaN  12.3  2112.0  0  25977.6
2    q  12.3     0.0  0      0.0
3    r   1.0    64.0  0     64.0
"""

# 含有字符串的列也有类似操作
df['a*8'] = df['a']*8
print(df)

"""
     a     b       c  d      b*c       a*8
0    p  12.0   768.0  0   9216.0  pppppppp
1  NaN  12.3  2112.0  0  25977.6       NaN
2    q  12.3     0.0  0      0.0  qqqqqqqq
3    r   1.0    64.0  0     64.0  rrrrrrrr
"""

# 列值上移或者下移
df['b'].shift(1) # 整体下移一行(默认参数即为1)
"""
0     NaN
1    12.0
2    12.3
3    12.3
Name: b, dtype: float64
"""
df['b'].shift(-1) # 整体上移一行
"""
0    12.3
1    12.3
2     1.0
3     NaN
Name: b, dtype: float64
"""

# 向下取得逐行差值(与.shift()方法的参数类似取负值为向上)
df['b'].diff(1)
"""
0     NaN
1     0.3
2     0.0
3   -11.3
Name: b, dtype: float64
"""

# 获得涨跌幅
df['b'].pct_change()
"""
0         NaN
1    0.025000
2    0.000000
3   -0.918699
Name: b, dtype: float64
"""


# 排序:获得排名
df['c'].rank()
"""
0    3.0
1    4.0
2    1.0
3    2.0
Name: c, dtype: float64
"""

DataFrame常用列处理

# 仍然沿用上一小节的数据

# 去掉全为零的列
df1 = df.loc[:, (df!=0).any()]
# df1 = df.drop(df.loc[:, (df==0).all()], axis=1) # 第二种方法
print(df1)
"""
     a     b     c
0    p  12.0  12.0
1  NaN  12.3  33.0
2    q  12.3   0.0
3    r   1.0   1.0
"""

# 将列中的0值用该列的平均值(不包含0)替代
# 先将0值改变成NaN
df.loc[:,'c'] = df.loc[:,'c'].apply(lambda x: np.nan if x==0 else x)
# 再将NaN用每一列的Mean填充
df.loc[:,'c'].fillna(value=df.loc[:,'c'].mean(), inplace=True)

print(df)
"""
     a     b          c  d
0    p  12.0  12.000000  0
1  NaN  12.3  33.000000  0
2    q  12.3  15.333333  0
3    r   1.0   1.000000  0
"""