import numpy as np
import pandas as pd
pd.__version__
'1.1.5'
文件的读取和写入
文件读取
- 使用pandas读取csv,excel,txt文件
df_csv = pd.read_csv('data/my_csv.csv')
df_csv
|
col1 |
col2 |
col3 |
col4 |
col5 |
0 |
2 |
a |
1.4 |
apple |
2020/1/1 |
1 |
3 |
b |
3.4 |
banana |
2020/1/2 |
2 |
6 |
c |
2.5 |
orange |
2020/1/5 |
3 |
5 |
d |
3.2 |
lemon |
2020/1/7 |
df_txt = pd.read_table('data/my_table.txt')
df_txt
|
col1 |
col2 |
col3 |
col4 |
0 |
2 |
a |
1.4 |
apple 2020/1/1 |
1 |
3 |
b |
3.4 |
banana 2020/1/2 |
2 |
6 |
c |
2.5 |
orange 2020/1/5 |
3 |
5 |
d |
3.2 |
lemon 2020/1/7 |
- 关于xlrd打不开xlsx文件问题
- xlrd更新到了2.0.1版本,只支持.xls文件。
- 可以用openpyxl代替xlrd打开.xlsx文件
!pip3 install openpyxl
df_excel = pd.read_excel('data/my_excel.xlsx',engine='openpyxl')
df_excel
|
col1 |
col2 |
col3 |
col4 |
col5 |
0 |
2 |
a |
1.4 |
apple |
2020/1/1 |
1 |
3 |
b |
3.4 |
banana |
2020/1/2 |
2 |
6 |
c |
2.5 |
orange |
2020/1/5 |
3 |
5 |
d |
3.2 |
lemon |
2020/1/7 |
pd.read_table('data/my_table.txt',header=None)
|
0 |
1 |
2 |
3 |
0 |
col1 |
col2 |
col3 |
col4 |
1 |
2 |
a |
1.4 |
apple 2020/1/1 |
2 |
3 |
b |
3.4 |
banana 2020/1/2 |
3 |
6 |
c |
2.5 |
orange 2020/1/5 |
4 |
5 |
d |
3.2 |
lemon 2020/1/7 |
pd.read_csv('data/my_csv.csv',index_col=['col1','col2'])
|
|
col3 |
col4 |
col5 |
col1 |
col2 |
|
|
|
2 |
a |
1.4 |
apple |
2020/1/1 |
3 |
b |
3.4 |
banana |
2020/1/2 |
6 |
c |
2.5 |
orange |
2020/1/5 |
5 |
d |
3.2 |
lemon |
2020/1/7 |
pd.read_table('data/my_table.txt',usecols=['col1','col2'])
|
col1 |
col2 |
0 |
2 |
a |
1 |
3 |
b |
2 |
6 |
c |
3 |
5 |
d |
pd.read_csv('data/my_csv.csv',parse_dates=['col5'])
|
col1 |
col2 |
col3 |
col4 |
col5 |
0 |
2 |
a |
1.4 |
apple |
2020-01-01 |
1 |
3 |
b |
3.4 |
banana |
2020-01-02 |
2 |
6 |
c |
2.5 |
orange |
2020-01-05 |
3 |
5 |
d |
3.2 |
lemon |
2020-01-07 |
pd.read_csv('data/my_csv.csv',nrows=2)
|
col1 |
col2 |
col3 |
col4 |
col5 |
0 |
2 |
a |
1.4 |
apple |
2020/1/1 |
1 |
3 |
b |
3.4 |
banana |
2020/1/2 |
pd.read_table('data/my_table_special_sep.txt')
|
col1 |||| col2 |
0 |
TS |||| This is an apple. |
1 |
GQ |||| My name is Bob. |
2 |
WT |||| Well done! |
3 |
PT |||| May I help you? |
pd.read_table('data/my_table_special_sep.txt',sep='\|\|\|\|',engine='python')
|
col1 |
col2 |
0 |
TS |
This is an apple. |
1 |
GQ |
My name is Bob. |
2 |
WT |
Well done! |
3 |
PT |
May I help you? |
数据写入
df_csv.to_csv('data/my_csv_saved.csv',index=False)
df_csv = pd.read_csv('data/my_csv_saved.csv')
df_csv
df_excel.to_excel('data/my_excel_saved.xlsx',index=False)
df_txt.to_csv('data/my_txt_saved.txt',sep='\t',index=False)
!pip3 install tabulate
Looking in indexes: https://pypi.tuna.tsinghua.edu.cn/simple
Collecting tabulate
Downloading https://pypi.tuna.tsinghua.edu.cn/packages/c4/f4/770ae9385990f5a19a91431163d262182d3203662ea2b5739d0fcfc080f1/tabulate-0.8.7-py3-none-any.whl (24 kB)
Installing collected packages: tabulate
Successfully installed tabulate-0.8.7
print(df_csv.to_markdown())
| | col1 | col2 | col3 | col4 | col5 |
|---:|-------:|:-------|-------:|:-------|:---------|
| 0 | 2 | a | 1.4 | apple | 2020/1/1 |
| 1 | 3 | b | 3.4 | banana | 2020/1/2 |
| 2 | 6 | c | 2.5 | orange | 2020/1/5 |
| 3 | 5 | d | 3.2 | lemon | 2020/1/7 |
print(df_csv.to_latex())
\begin{tabular}{lrlrll}
\toprule
{} & col1 & col2 & col3 & col4 & col5 \\
\midrule
0 & 2 & a & 1.4 & apple & 2020/1/1 \\
1 & 3 & b & 3.4 & banana & 2020/1/2 \\
2 & 6 & c & 2.5 & orange & 2020/1/5 \\
3 & 5 & d & 3.2 & lemon & 2020/1/7 \\
\bottomrule
\end{tabular}
基本数据结构
Series
,存储一维values
DataFrame
,存储二维的values
Series
data
,序列的值
index
,索引
dtype
,存储类型
name
,序列的名字
s = pd.Series(data = [100,'a',{'dic1':5}],
index = pd.Index(['id1',20,'third'], name='my_idx'),
dtype = 'object',
name = 'my_name')
s
my_idx
id1 100
20 a
third {
Name: my_name, dtype: object
-
object,混合类型
object
代表了一种混合类型,正如上面的例子中存储了整数、字符串以及Python的字典数据结构。
-
此外,目前 pandas把纯字符串序列也默认认为是一种object
类型的序列,但它也可以用string
类型存储。
s.values
array([100, 'a', {'dic1': 5}], dtype=object)
s.index
Index(['id1', 20, 'third'], dtype='object', name='my_idx')
s.dtype
dtype('O')
s.name
'my_name'
s.shape
(3,)
s['third']
{'dic1': 5}
DataFrame
DataFrame
在Series
基础上增加了列索引,一个数据框可以由二维的data
与行列索引
来构造
data = [[1,'a',1.2],[2,'b',2.2],[3,'c',3.2]]
df = pd.DataFrame(data = data,
index = ['row_%d'%i for i in range(3)],
columns=['col_0','col_1','col_2'])
df
|
col_0 |
col_1 |
col_2 |
row_0 |
1 |
a |
1.2 |
row_1 |
2 |
b |
2.2 |
row_2 |
3 |
c |
3.2 |
df = pd.DataFrame(data = {'col_0':[1,2,3],'col_1':list('abc'),'col_2':[1.2,2.2,3.2]},
index = ['row_%d'%i for i in range(3)])
df
|
col_0 |
col_1 |
col_2 |
row_0 |
1 |
a |
1.2 |
row_1 |
2 |
b |
2.2 |
row_2 |
3 |
c |
3.2 |
df['col_0']
row_0 1
row_1 2
row_2 3
Name: col_0, dtype: int64
df[['col_1','col_2']]
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
|
col_1 |
col_2 |
row_0 |
a |
1.2 |
row_1 |
b |
2.2 |
row_2 |
c |
3.2 |
df.values
array([[1, 'a', 1.2],
[2, 'b', 2.2],
[3, 'c', 3.2]], dtype=object)
df.index
Index(['row_0', 'row_1', 'row_2'], dtype='object')
df.columns
Index(['col_0', 'col_1', 'col_2'], dtype='object')
df.shape
(3, 3)
df.dtypes
col_0 int64
col_1 object
col_2 float64
dtype: object
df.T
|
row_0 |
row_1 |
row_2 |
col_0 |
1 |
2 |
3 |
col_1 |
a |
b |
c |
col_2 |
1.2 |
2.2 |
3.2 |
常用基本函数
df = pd.read_csv('data/learn_pandas.csv')
df.columns
Index(['School', 'Grade', 'Name', 'Gender', 'Height', 'Weight', 'Transfer', 'Test_Number', 'Test_Date', 'Time_Record'],
dtype='object')
df = df[df.columns[:7]]
df
|
School |
Grade |
Name |
Gender |
Height |
Weight |
Transfer |
0 |
Shanghai Jiao Tong University |
Freshman |
Gaopeng Yang |
Female |
158.9 |
46.0 |
N |
1 |
Peking University |
Freshman |
Changqiang You |
Male |
166.5 |
70.0 |
N |
2 |
Shanghai Jiao Tong University |
Senior |
Mei Sun |
Male |
188.9 |
89.0 |
N |
3 |
Fudan University |
Sophomore |
Xiaojuan Sun |
Female |
NaN |
41.0 |
N |
4 |
Fudan University |
Sophomore |
Gaojuan You |
Male |
174.0 |
74.0 |
N |
... |
... |
... |
... |
... |
... |
... |
... |
195 |
Fudan University |
Junior |
Xiaojuan Sun |
Female |
153.9 |
46.0 |
N |
196 |
Tsinghua University |
Senior |
Li Zhao |
Female |
160.9 |
50.0 |
N |
197 |
Shanghai Jiao Tong University |
Senior |
Chengqiang Chu |
Female |
153.9 |
45.0 |
N |
198 |
Shanghai Jiao Tong University |
Senior |
Chengmei Shen |
Male |
175.3 |
71.0 |
N |
199 |
Tsinghua University |
Sophomore |
Chunpeng Lv |
Male |
155.7 |
51.0 |
N |
200 rows × 7 columns
汇总函数
- head,tail 返回表/序列的前/后n行,n默认为5
- info,信息概况
- describe,数值列对应的主要统计量
df.head(2)
|
School |
Grade |
Name |
Gender |
Height |
Weight |
Transfer |
0 |
Shanghai Jiao Tong University |
Freshman |
Gaopeng Yang |
Female |
158.9 |
46.0 |
N |
1 |
Peking University |
Freshman |
Changqiang You |
Male |
166.5 |
70.0 |
N |
df.tail(3)
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
|
School |
Grade |
Name |
Gender |
Height |
Weight |
Transfer |
197 |
Shanghai Jiao Tong University |
Senior |
Chengqiang Chu |
Female |
153.9 |
45.0 |
N |
198 |
Shanghai Jiao Tong University |
Senior |
Chengmei Shen |
Male |
175.3 |
71.0 |
N |
199 |
Tsinghua University |
Sophomore |
Chunpeng Lv |
Male |
155.7 |
51.0 |
N |
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 200 entries, 0 to 199
Data columns (total 7 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 School 200 non-null object
1 Grade 200 non-null object
2 Name 200 non-null object
3 Gender 200 non-null object
4 Height 183 non-null float64
5 Weight 189 non-null float64
6 Transfer 188 non-null object
dtypes: float64(2), object(5)
memory usage: 11.1+ KB
df.describe()
|
Height |
Weight |
count |
183.000000 |
189.000000 |
mean |
163.218033 |
55.015873 |
std |
8.608879 |
12.824294 |
min |
145.400000 |
34.000000 |
25% |
157.150000 |
46.000000 |
50% |
161.900000 |
51.000000 |
75% |
167.500000 |
65.000000 |
max |
193.900000 |
89.000000 |
特征统计函数
- 常见函数
sum
:求和, mean
:平均值 , median
:中位数, var
:方差, std
:标准差, max
:最大值, min
:最小值
quantile
, 返回分位数
count
, 非缺失值个数
idxmax
,最大值对应的索引
df_demo = df[['Height','Weight']]
df_demo.mean()
Height 163.218033
Weight 55.015873
dtype: float64
df_demo.max()
Height 193.9
Weight 89.0
dtype: float64
df_demo.var()
Height 74.112805
Weight 164.462513
dtype: float64
df_demo.quantile()
Height 161.9
Weight 51.0
Name: 0.5, dtype: float64
df_demo.count()
Height 183
Weight 189
dtype: int64
df_demo.idxmax()
Height 193
Weight 2
dtype: int64
- 上面这些所有的函数,由于操作后返回的是标量,所以又称为聚合函数,它们有一个公共参数
axis
,默认为0
代表逐列聚合
,如果设置为1
则表示逐行聚合
df_demo.mean(axis=1).head()
0 102.45
1 118.25
2 138.95
3 41.00
4 124.00
dtype: float64
唯一值函数
- 观察某列组合的唯一值
unique
,唯一值组成的列表
nunique
,唯一值组成的个数
value_counts
,可以得到唯一值和其对应出现的频数
- 观察多列组合的唯一值
drop_duplicates
,其中的关键参数是 keep
- 默认值
first
表示每个组合保留第一次出现的所在行
last
表示保留最后一次出现的所在行,
False
表示把所有重复组合所在的行剔除
df['School'].unique()
array(['Shanghai Jiao Tong University', 'Peking University', 'Fudan University', 'Tsinghua University'], dtype=object)
df['School'].nunique()
4
df['School'].value_counts()
Tsinghua University 69
Shanghai Jiao Tong University 57
Fudan University 40
Peking University 34
Name: School, dtype: int64
df_demo = df[['Gender','Transfer','Name']]
df_demo.drop_duplicates(['Gender','Transfer'])
|
Gender |
Transfer |
Name |
0 |
Female |
N |
Gaopeng Yang |
1 |
Male |
N |
Changqiang You |
12 |
Female |
NaN |
Peng You |
21 |
Male |
NaN |
Xiaopeng Shen |
36 |
Male |
Y |
Xiaojuan Qin |
43 |
Female |
Y |
Gaoli Feng |
df_demo.drop_duplicates(['Gender','Transfer'],keep='last')
|
Gender |
Transfer |
Name |
147 |
Male |
NaN |
Juan You |
150 |
Male |
Y |
Chengpeng You |
169 |
Female |
Y |
Chengquan Qin |
194 |
Female |
NaN |
Yanmei Qian |
197 |
Female |
N |
Chengqiang Chu |
199 |
Male |
N |
Chunpeng Lv |
df_demo.drop_duplicates(['Name','Gender'],keep=False).head()
|
Gender |
Transfer |
Name |
0 |
Female |
N |
Gaopeng Yang |
1 |
Male |
N |
Changqiang You |
2 |
Male |
N |
Mei Sun |
4 |
Male |
N |
Gaojuan You |
5 |
Female |
N |
Xiaoli Qian |
df['School'].drop_duplicates()
0 Shanghai Jiao Tong University
1 Peking University
3 Fudan University
5 Tsinghua University
Name: School, dtype: object
替换函数
- 一般,替换操做都是替换某一列
- 替换函数
- 映射替换
replace
方法,支持特殊的方向替换
method
:参数为ffill
使用前1个值进行替换
bfill
:使用后1个值进行替换。
str.replace
cat.codes
- 逻辑替换
where
,传入条件为False
进行替换
mask
,传入条件为True
进行替换
- 数值替换
round
,按照给定精度四舍五入
abs
,取值
clip
,截断
df['Gender'].replace({'Female':0, 'Male':1}).head()
0 0
1 1
2 1
3 0
4 1
Name: Gender, dtype: int64
df['Gender'].replace([0, 1],['Female', 'Male']).head()
0 Female
1 Male
2 Male
3 Female
4 Male
Name: Gender, dtype: object
s = pd.Series(['a',1,'b',2,1,1,'a'])
s
0 a
1 1
2 b
3 2
4 1
5 1
6 a
dtype: object
s.replace([1,2],method='ffill')
0 a
1 a
2 b
3 b
4 b
5 b
6 a
dtype: object
s.replace([1,2],method='bfill')
0 a
1 b
2 b
3 a
4 a
5 a
6 a
dtype: object
s = pd.Series([-1,1.2345,100,-50])
s
0 -1.0000
1 1.2345
2 100.0000
3 -50.0000
dtype: float64
s.where(s<0,100)
0 -1.0
1 NaN
2 NaN
3 -50.0
dtype: float64
s.mask(s<0,99)
0 99.0000
1 1.2345
2 100.0000
3 99.0000
dtype: float64
s_condition= pd.Series([True,False,False,True],index=s.index)
s.mask(s_condition, 99)
0 99.0000
1 1.2345
2 100.0000
3 99.0000
dtype: float64
s.round(2)
0 -1.00
1 1.23
2 100.00
3 -50.00
dtype: float64
s.abs()
0 1.0000
1 1.2345
2 100.0000
3 50.0000
dtype: float64
s.clip(0,2)
0 0.0000
1 1.2345
2 2.0000
3 0.0000
dtype: float64
排序函数
- 排序共有2种方式
- 值排序,
sort_values
- 索引排序,
sort_index
df_demo = df[['Grade','Name','Height','Weight']].set_index(['Grade','Name'])
df_demo
|
|
Height |
Weight |
Grade |
Name |
|
|
Freshman |
Gaopeng Yang |
158.9 |
46.0 |
Changqiang You |
166.5 |
70.0 |
Senior |
Mei Sun |
188.9 |
89.0 |
Sophomore |
Xiaojuan Sun |
NaN |
41.0 |
Gaojuan You |
174.0 |
74.0 |
... |
... |
... |
... |
Junior |
Xiaojuan Sun |
153.9 |
46.0 |
Senior |
Li Zhao |
160.9 |
50.0 |
Chengqiang Chu |
153.9 |
45.0 |
Chengmei Shen |
175.3 |
71.0 |
Sophomore |
Chunpeng Lv |
155.7 |
51.0 |
200 rows × 2 columns
df_demo.sort_values('Height').head()
|
|
Height |
Weight |
Grade |
Name |
|
|
Junior |
Xiaoli Chu |
145.4 |
34.0 |
Senior |
Gaomei Lv |
147.3 |
34.0 |
Sophomore |
Peng Han |
147.8 |
34.0 |
Senior |
Changli Lv |
148.7 |
41.0 |
Sophomore |
Changjuan You |
150.5 |
40.0 |
df_demo.sort_values('Height',ascending=False).head()
|
|
Height |
Weight |
Grade |
Name |
|
|
Senior |
Xiaoqiang Qin |
193.9 |
79.0 |
Mei Sun |
188.9 |
89.0 |
Gaoli Zhao |
186.5 |
83.0 |
Freshman |
Qiang Han |
185.3 |
87.0 |
Senior |
Qiang Zheng |
183.9 |
87.0 |
df_demo.sort_values(['Weight','Height'],ascending=[True,False]).head()
|
|
Height |
Weight |
Grade |
Name |
|
|
Sophomore |
Peng Han |
147.8 |
34.0 |
Senior |
Gaomei Lv |
147.3 |
34.0 |
Junior |
Xiaoli Chu |
145.4 |
34.0 |
Sophomore |
Qiang Zhou |
150.5 |
36.0 |
Freshman |
Yanqiang Xu |
152.4 |
38.0 |
df_demo.sort_index(level=['Grade','Name'],ascending=[False,True]).head()
|
|
Height |
Weight |
Grade |
Name |
|
|
Sophomore |
Changjuan You |
150.5 |
40.0 |
Changmei Xu |
151.6 |
43.0 |
Changqiang Qian |
167.6 |
64.0 |
Chengli You |
164.1 |
57.0 |
Chengqiang Lv |
166.8 |
53.0 |
apply方法
- apply方法常用于DataFrame的行或者列迭代
- 是一个以序列为输入的函数
df_demo = df[['Height','Weight']]
def my_mean(x):
res = x.mean()
return res
df_demo.apply(my_mean)
Height 163.218033
Weight 55.015873
dtype: float64
df_demo.apply(lambda x:x.mean())
Height 163.218033
Weight 55.015873
dtype: float64
df_demo.apply(lambda x:x.mean(),axis=1).head()
0 102.45
1 118.25
2 138.95
3 41.00
4 124.00
dtype: float64
- mad函数返回的是一个序列中偏离该序列均值的绝对值大小的均值.
- 例如序列1,3,7,10中,均值为5.25,每一个元素偏离的绝对值为4.25,2.25,1.75,4.75,这个偏离序列的均值为3.25。
df_demo.apply(lambda x:(x-x.mean()).abs().mean())
Height 6.707229
Weight 10.391870
dtype: float64
df_demo.mad()
Height 6.707229
Weight 10.391870
dtype: float64
窗口对象
- 滑动窗口rolling
- 扩张窗口expanding
- 加权窗口ewm
滑窗对象
- 对序列使用.rolling得到滑窗对象,对其最重要对参数为窗口大小window
- window
- window = 数值型int --> 计算统计量的观测值的数量,及从第0个元素开始,向前数window个,然后在使用后面定义的函数
- window = offset时间偏移量 --> 表示时间窗的大小
s = pd.Series(np.arange(1,6,1))
roller = s.rolling(window = 3)
roller
Rolling [window=3,center=False,axis=0]
roller.mean()
0 NaN
1 NaN
2 2.0
3 3.0
4 4.0
dtype: float64
解释:
s = pd.Series([1,2,3,4,5]) --> [1,2,3,4,5]
经过:s.rolling(window=3) -->
从第0个下标,往前数3个,[空,空 ,1]
从第1个下标,往前数3个,[空,1 , 2]
从第2个下标,往前数3个,[1, 2, 3]
从第3个下标,往前数3个,[2, 3, 4]
从第4个下标,往前数3个,[3, 4, 5]
然后得到的元素,在经过统计函数,mean()进行计算。
第n个元素的值将是n,n-1和n-2元素的平均值
便形成了:
0 NaN
1 NaN
2 2.0
3 3.0
4 4.0
roller.sum()
0 NaN
1 NaN
2 6.0
3 9.0
4 12.0
dtype: float64
s1 = pd.Series([1,4,18,29])
roller.cov(s1)
0 NaN
1 NaN
2 8.5
3 12.5
4 NaN
dtype: float64
roller.corr(s1)
0 NaN
1 NaN
2 0.936766
3 0.997609
4 NaN
dtype: float64
roller.apply(lambda x:x.mean())
0 NaN
1 NaN
2 2.0
3 3.0
4 4.0
dtype: float64
- 类滑窗函数
shift
,diff
,pct_change
- 公共参数为
periods=n
,默认为1
shift
,取向前第n个元素的值
diff
,与向前第n个数做差
pct_change
,与向前第n个元素比计算增长率
- 其中,n可以为负,表示反向类似操作
s
0 1
1 2
2 3
3 4
4 5
dtype: int64
s.shift(2)
0 NaN
1 NaN
2 1.0
3 2.0
4 3.0
dtype: float64
s.diff(2)
0 NaN
1 NaN
2 2.0
3 2.0
4 2.0
dtype: float64
s.pct_change(1)
0 NaN
1 1.000000
2 0.500000
3 0.333333
4 0.250000
dtype: float64
s.shift(-1)
0 2.0
1 3.0
2 4.0
3 5.0
4 NaN
dtype: float64
s.diff(-2)
0 -2.0
1 -2.0
2 -2.0
3 NaN
4 NaN
dtype: float64
- 类滑窗函数可以用窗口大小为n+1的rolling方法等价代替
s.rolling(3).apply(lambda x:list(x)[0])
0 NaN
1 NaN
2 1.0
3 2.0
4 3.0
dtype: float64
s.rolling(3).apply(lambda x:list(x)[-1]-list(x)[0])
0 NaN
1 NaN
2 2.0
3 2.0
4 2.0
dtype: float64
def my_pct(x):
L = list(x)
return L[-1]/L[0]-1
s.rolling(2).apply(my_pct)
0 NaN
1 1.000000
2 0.500000
3 0.333333
4 0.250000
dtype: float64
扩张窗口
扩张窗口又称累计窗口,可以理解为一个动态长度的窗口,其窗口的大小就是从序列开始处到具体操作的对应位置,其使用的聚合函数会作用于这些逐步扩张的窗口上。具体地说,设序列为a1, a2, a3, a4,则其每个位置对应的窗口即[a1]、[a1, a2]、[a1, a2, a3]、[a1, a2, a3, a4]。
s.expanding().mean()
0 1.0
1 1.5
2 2.0
3 2.5
4 3.0
dtype: float64
s.expanding().max()
0 1.0
1 2.0
2 3.0
3 4.0
4 5.0
dtype: float64
s.expanding().sum()
0 1.0
1 3.0
2 6.0
3 10.0
4 15.0
dtype: float64
s.expanding().apply(lambda x:x.prod())
0 1.0
1 2.0
2 6.0
3 24.0
4 120.0
dtype: float64
参考