| F | 123 | 138 | 55 |
| J | 100 | 70 | 63 |
cond = (df.Python > 70) & (df.Math > 70)
df[cond]
cond = df.index.isin(['C','E','H','K'])
df[cond]
| Python | Math | En |
|---|
| C | 9 | 14 | 71 |
| E | 1 | 71 | 71 |
| H | 59 | 17 | 140 |
| K | 79 | 37 | 72 |
4.5 赋值操作
df['Python']['A'] = 150
df
| Python | Math | En |
|---|
| A | 150 | 52 | 48 |
| B | 78 | 62 | 94 |
| C | 9 | 14 | 71 |
| D | 86 | 15 | 21 |
| E | 1 | 71 | 71 |
| F | 123 | 138 | 55 |
| H | 59 | 17 | 140 |
| I | 68 | 8 | 58 |
| J | 100 | 70 | 63 |
| K | 79 | 37 | 72 |
df['Java'] = np.random.randint(0,151,size = 10)
df
| Python | Math | En | Java |
|---|
| A | 150 | 52 | 48 | 65 |
| B | 78 | 62 | 94 | 25 |
| C | 9 | 14 | 71 | 82 |
| D | 86 | 15 | 21 | 139 |
| E | 1 | 71 | 71 | 67 |
| F | 123 | 138 | 55 | 145 |
| H | 59 | 17 | 140 | 53 |
| I | 68 | 8 | 58 | 141 |
| J | 100 | 70 | 63 | 11 |
| K | 79 | 37 | 72 | 127 |
df.loc[['C','D','E'],'Math'] = 147
df
| Python | Math | En | Java |
|---|
| A | 150 | 52 | 48 | 65 |
| B | 78 | 62 | 94 | 25 |
| C | 9 | 147 | 71 | 82 |
| D | 86 | 147 | 21 | 139 |
| E | 1 | 147 | 71 | 67 |
| F | 123 | 138 | 55 | 145 |
| H | 59 | 17 | 140 | 53 |
| I | 68 | 8 | 58 | 141 |
| J | 100 | 70 | 63 | 11 |
| K | 79 | 37 | 72 | 127 |
cond = df < 60
df[cond] = 60
df
| Python | Math | En | Java |
|---|
| A | 150 | 60 | 60 | 65 |
| B | 78 | 62 | 94 | 60 |
| C | 60 | 147 | 71 | 82 |
| D | 86 | 147 | 60 | 139 |
| E | 60 | 147 | 71 | 67 |
| F | 123 | 138 | 60 | 145 |
| H | 60 | 60 | 140 | 60 |
| I | 68 | 60 | 60 | 141 |
| J | 100 | 70 | 63 | 60 |
| K | 79 | 60 | 72 | 127 |
df.iloc[3::3,[0,2]] += 100
df
| Python | Math | En | Java |
|---|
| A | 150 | 60 | 60 | 65 |
| B | 78 | 62 | 94 | 60 |
| C | 60 | 147 | 71 | 82 |
| D | 186 | 147 | 160 | 139 |
| E | 60 | 147 | 71 | 67 |
| F | 123 | 138 | 60 | 145 |
| H | 160 | 60 | 240 | 60 |
| I | 68 | 60 | 60 | 141 |
| J | 100 | 70 | 63 | 60 |
| K | 179 | 60 | 172 | 127 |
第五部分:数据集成
5.1 concat数据串联
df1 = pd.DataFrame(np.random.randint(0,151,size = (10,3)),
columns=['Python','Math','En'],
index = list('ABCDEFHIJK'))
df2 = pd.DataFrame(np.random.randint(0,151,size = (10,3)),
columns = ['Python','Math','En'],
index = list('QWRTUYOPLM'))
df3 = pd.DataFrame(np.random.randint(0,151,size = (10,2)),
columns=['Java','Chinese'],index = list('ABCDEFHIJK'))
pd.concat([df1,df2],axis = 0)
| Python | Math | En |
|---|
| A | 108 | 74 | 53 |
| B | 98 | 16 | 47 |
| C | 71 | 77 | 128 |
| D | 9 | 123 | 131 |
| E | 25 | 90 | 132 |
| F | 105 | 106 | 86 |
| H | 146 | 42 | 81 |
| I | 83 | 4 | 36 |
| J | 102 | 79 | 8 |
| K | 92 | 11 | 47 |
| Q | 119 | 59 | 43 |
| W | 20 | 62 | 106 |
| R | 77 | 82 | 128 |
| T | 44 | 119 | 15 |
| U | 49 | 149 | 62 |
| Y | 94 | 90 | 88 |
| O | 105 | 72 | 133 |
| P | 87 | 109 | 123 |
| L | 125 | 140 | 149 |
| M | 148 | 22 | 102 |
pd.concat([df1,df3],axis = 1)
| Python | Math | En | Java | Chinese |
|---|
| A | 108 | 74 | 53 | 61 | 81 |
| B | 98 | 16 | 47 | 117 | 117 |
| C | 71 | 77 | 128 | 48 | 4 |
| D | 9 | 123 | 131 | 149 | 115 |
| E | 25 | 90 | 132 | 113 | 73 |
| F | 105 | 106 | 86 | 140 | 26 |
| H | 146 | 42 | 81 | 117 | 118 |
| I | 83 | 4 | 36 | 103 | 91 |
| J | 102 | 79 | 8 | 43 | 20 |
| K | 92 | 11 | 47 | 93 | 72 |
df1.append(df2) # append追加,在行后面直接进行追加
| Python | C++ | Math | En |
|---|
| A | 108 | 59.0 | 74 | 53 |
| B | 98 | 4.0 | 16 | 47 |
| C | 71 | 27.0 | 77 | 128 |
| D | 9 | 17.0 | 123 | 131 |
| E | 25 | 60.0 | 90 | 132 |
| F | 105 | 136.0 | 106 | 86 |
| H | 146 | 112.0 | 42 | 81 |
| I | 83 | 120.0 | 4 | 36 |
| J | 102 | 28.0 | 79 | 8 |
| K | 92 | 53.0 | 11 | 47 |
| Q | 119 | NaN | 59 | 43 |
| W | 20 | NaN | 62 | 106 |
| R | 77 | NaN | 82 | 128 |
| T | 44 | NaN | 119 | 15 |
| U | 49 | NaN | 149 | 62 |
| Y | 94 | NaN | 90 | 88 |
| O | 105 | NaN | 72 | 133 |
| P | 87 | NaN | 109 | 123 |
| L | 125 | NaN | 140 | 149 |
| M | 148 | NaN | 22 | 102 |
df1.append(df3)
| Python | C++ | Math | En | Java | Chinese |
|---|
| A | 108.0 | 59.0 | 74.0 | 53.0 | NaN | NaN |
| B | 98.0 | 4.0 | 16.0 | 47.0 | NaN | NaN |
| C | 71.0 | 27.0 | 77.0 | 128.0 | NaN | NaN |
| D | 9.0 | 17.0 | 123.0 | 131.0 | NaN | NaN |
| E | 25.0 | 60.0 | 90.0 | 132.0 | NaN | NaN |
| F | 105.0 | 136.0 | 106.0 | 86.0 | NaN | NaN |
| H | 146.0 | 112.0 | 42.0 | 81.0 | NaN | NaN |
| I | 83.0 | 120.0 | 4.0 | 36.0 | NaN | NaN |
| J | 102.0 | 28.0 | 79.0 | 8.0 | NaN | NaN |
| K | 92.0 | 53.0 | 11.0 | 47.0 | NaN | NaN |
| A | NaN | NaN | NaN | NaN | 61.0 | 81.0 |
| B | NaN | NaN | NaN | NaN | 117.0 | 117.0 |
| C | NaN | NaN | NaN | NaN | 48.0 | 4.0 |
| D | NaN | NaN | NaN | NaN | 149.0 | 115.0 |
| E | NaN | NaN | NaN | NaN | 113.0 | 73.0 |
| F | NaN | NaN | NaN | NaN | 140.0 | 26.0 |
| H | NaN | NaN | NaN | NaN | 117.0 | 118.0 |
| I | NaN | NaN | NaN | NaN | 103.0 | 91.0 |
| J | NaN | NaN | NaN | NaN | 43.0 | 20.0 |
| K | NaN | NaN | NaN | NaN | 93.0 | 72.0 |
pd.concat([df1,df3],axis = 0)
| Python | C++ | Math | En | Java | Chinese |
|---|
| A | 108.0 | 59.0 | 74.0 | 53.0 | NaN | NaN |
| B | 98.0 | 4.0 | 16.0 | 47.0 | NaN | NaN |
| C | 71.0 | 27.0 | 77.0 | 128.0 | NaN | NaN |
| D | 9.0 | 17.0 | 123.0 | 131.0 | NaN | NaN |
| E | 25.0 | 60.0 | 90.0 | 132.0 | NaN | NaN |
| F | 105.0 | 136.0 | 106.0 | 86.0 | NaN | NaN |
| H | 146.0 | 112.0 | 42.0 | 81.0 | NaN | NaN |
| I | 83.0 | 120.0 | 4.0 | 36.0 | NaN | NaN |
| J | 102.0 | 28.0 | 79.0 | 8.0 | NaN | NaN |
| K | 92.0 | 53.0 | 11.0 | 47.0 | NaN | NaN |
| A | NaN | NaN | NaN | NaN | 61.0 | 81.0 |
| B | NaN | NaN | NaN | NaN | 117.0 | 117.0 |
| C | NaN | NaN | NaN | NaN | 48.0 | 4.0 |
| D | NaN | NaN | NaN | NaN | 149.0 | 115.0 |
| E | NaN | NaN | NaN | NaN | 113.0 | 73.0 |
| F | NaN | NaN | NaN | NaN | 140.0 | 26.0 |
| H | NaN | NaN | NaN | NaN | 117.0 | 118.0 |
| I | NaN | NaN | NaN | NaN | 103.0 | 91.0 |
| J | NaN | NaN | NaN | NaN | 43.0 | 20.0 |
| K | NaN | NaN | NaN | NaN | 93.0 | 72.0 |
5.2 数据插入
df1
| Python | Math | En |
|---|
| A | 108 | 74 | 53 |
| B | 98 | 16 | 47 |
| C | 71 | 77 | 128 |
| D | 9 | 123 | 131 |
| E | 25 | 90 | 132 |
| F | 105 | 106 | 86 |
| H | 146 | 42 | 81 |
| I | 83 | 4 | 36 |
| J | 102 | 79 | 8 |
| K | 92 | 11 | 47 |
df1.insert(loc = 1,
column='C++',
value = np.random.randint(0,151,size = 10))
df1
| Python | C++ | Math | En |
|---|
| A | 108 | 59 | 74 | 53 |
| B | 98 | 4 | 16 | 47 |
| C | 71 | 27 | 77 | 128 |
| D | 9 | 17 | 123 | 131 |
| E | 25 | 60 | 90 | 132 |
| F | 105 | 136 | 106 | 86 |
| H | 146 | 112 | 42 | 81 |
| I | 83 | 120 | 4 | 36 |
| J | 102 | 28 | 79 | 8 |
| K | 92 | 53 | 11 | 47 |
5.3 Join SQL风格合并
df1 = pd.DataFrame(data = {'name':['softpo','Brandon','Ella','Daniel','张三'],
'height':[175,180,169,177,168]})
df2 = pd.DataFrame(data = {'name':['softpo','Brandon','Ella','Daniel','李四'],
'weight':[70,65,74,63,88]})
df3 = pd.DataFrame(data = {'名字':['softpo','Brandon','Ella','Daniel','张三'],
'salary':np.random.randint(20,100,size = 5)})
display(df1,df2,df3)
| name | height |
|---|
| 0 | softpo | 175 |
| 1 | Brandon | 180 |
| 2 | Ella | 169 |
| 3 | Daniel | 177 |
| 4 | 张三 | 168 |
| name | weight |
|---|
| 0 | softpo | 70 |
| 1 | Brandon | 65 |
| 2 | Ella | 74 |
| 3 | Daniel | 63 |
| 4 | 李四 | 88 |
| 名字 | salary |
|---|
| 0 | softpo | 64 |
| 1 | Brandon | 48 |
| 2 | Ella | 25 |
| 3 | Daniel | 26 |
| 4 | 张三 | 96 |
pd.concat([df1,df2],axis = 1)
| name | height | name | weight |
|---|
| 0 | softpo | 175 | softpo | 70 |
| 1 | Brandon | 180 | Brandon | 65 |
| 2 | Ella | 169 | Ella | 74 |
| 3 | Daniel | 177 | Daniel | 63 |
| 4 | 张三 | 168 | 李四 | 88 |
pd.merge(df1,df2,how = 'inner')
| name | height | weight |
|---|
| 0 | softpo | 175 | 70 |
| 1 | Brandon | 180 | 65 |
| 2 | Ella | 169 | 74 |
| 3 | Daniel | 177 | 63 |
pd.merge(df1,df2,how = 'outer')
| name | height | weight |
|---|
| 0 | softpo | 175.0 | 70.0 |
| 1 | Brandon | 180.0 | 65.0 |
| 2 | Ella | 169.0 | 74.0 |
| 3 | Daniel | 177.0 | 63.0 |
| 4 | 张三 | 168.0 | NaN |
| 5 | 李四 | NaN | 88.0 |
pd.merge(df1,df2,how = 'left')
| name | height | weight |
|---|
| 0 | softpo | 175 | 70.0 |
| 1 | Brandon | 180 | 65.0 |
| 2 | Ella | 169 | 74.0 |
| 3 | Daniel | 177 | 63.0 |
| 4 | 张三 | 168 | NaN |
pd.merge(df1,df3,left_on='name',right_on='名字')
| name | height | 名字 | salary |
|---|
| 0 | softpo | 175 | softpo | 64 |
| 1 | Brandon | 180 | Brandon | 48 |
| 2 | Ella | 169 | Ella | 25 |
| 3 | Daniel | 177 | Daniel | 26 |
| 4 | 张三 | 168 | 张三 | 96 |
df4 = pd.DataFrame(data = np.random.randint(0,151,size = (10,3)),
columns=['Python','Math','En'],index = list('ABCDEFHIJK'))
df4
| Python | Math | En |
|---|
| A | 71 | 7 | 89 |
| B | 145 | 116 | 40 |
| C | 56 | 150 | 139 |
| D | 88 | 66 | 41 |
| E | 87 | 139 | 117 |
| F | 141 | 45 | 18 |
| H | 93 | 119 | 114 |
| I | 110 | 89 | 2 |
| J | 2 | 35 | 96 |
| K | 125 | 59 | 9 |
score_mean = df4.mean(axis = 1).round(1)
score_mean
A 55.7
B 100.3
C 115.0
D 65.0
E 114.3
F 68.0
H 108.7
I 67.0
J 44.3
K 64.3
dtype: float64
df4.insert(loc = 2,column='平均分',value=score_mean)
df4
| Python | Math | 平均分 | En |
|---|
| A | 71 | 7 | 55.7 | 89 |
| B | 145 | 116 | 100.3 | 40 |
| C | 56 | 150 | 115.0 | 139 |
| D | 88 | 66 | 65.0 | 41 |
| E | 87 | 139 | 114.3 | 117 |
| F | 141 | 45 | 68.0 | 18 |
| H | 93 | 119 | 108.7 | 114 |
| I | 110 | 89 | 67.0 | 2 |
| J | 2 | 35 | 44.3 | 96 |
| K | 125 | 59 | 64.3 | 9 |
df5 = df4.iloc[:,[0,1,3]]
df5
| Python | Math | En |
|---|
| A | 71 | 7 | 89 |
| B | 145 | 116 | 40 |
| C | 56 | 150 | 139 |
| D | 88 | 66 | 41 |
| E | 87 | 139 | 117 |
| F | 141 | 45 | 18 |
| H | 93 | 119 | 114 |
| I | 110 | 89 | 2 |
| J | 2 | 35 | 96 |
| K | 125 | 59 | 9 |
score_mean.name = '平均分'
score_mean
A 55.7
B 100.3
C 115.0
D 65.0
E 114.3
F 68.0
H 108.7
I 67.0
J 44.3
K 64.3
Name: 平均分, dtype: float64
df5
| Python | Math | En |
|---|
| A | 71 | 7 | 89 |
| B | 145 | 116 | 40 |
| C | 56 | 150 | 139 |
| D | 88 | 66 | 41 |
| E | 87 | 139 | 117 |
| F | 141 | 45 | 18 |
| H | 93 | 119 | 114 |
| I | 110 | 89 | 2 |
| J | 2 | 35 | 96 |
| K | 125 | 59 | 9 |
pd.merge(df5,score_mean,
left_index=True,
right_index=True)
| Python | Math | En | 平均分 |
|---|
| A | 71 | 7 | 89 | 55.7 |
| B | 145 | 116 | 40 | 100.3 |
| C | 56 | 150 | 139 | 115.0 |
| D | 88 | 66 | 41 | 65.0 |
| E | 87 | 139 | 117 | 114.3 |
| F | 141 | 45 | 18 | 68.0 |
| H | 93 | 119 | 114 | 108.7 |
| I | 110 | 89 | 2 | 67.0 |
| J | 2 | 35 | 96 | 44.3 |
| K | 125 | 59 | 9 | 64.3 |
第六部分:数据清洗
df = pd.DataFrame(data = {'color':['red','blue','red','green','green','blue',None,np.NaN,'green'],
'price':[20,15,20,18,18,22,30,30,22]})
df
| color | price |
|---|
| 0 | red | 20 |
| 1 | blue | 15 |
| 2 | red | 20 |
| 3 | green | 18 |
| 4 | green | 18 |
| 5 | blue | 22 |
| 6 | None | 30 |
| 7 | NaN | 30 |
| 8 | green | 22 |
df.drop_duplicates()
| color | price |
|---|
| 0 | red | 20 |
| 1 | blue | 15 |
| 3 | green | 18 |
| 5 | blue | 22 |
| 6 | None | 30 |
| 8 | green | 22 |
df
| color | price |
|---|
| 0 | red | 20 |
| 1 | blue | 15 |
| 2 | red | 20 |
| 3 | green | 18 |
| 4 | green | 18 |
| 5 | blue | 22 |
| 6 | None | 30 |
| 7 | NaN | 30 |
| 8 | green | 22 |
df.dropna()
| color | price |
|---|
| 0 | red | 20 |
| 1 | blue | 15 |
| 2 | red | 20 |
| 3 | green | 18 |
| 4 | green | 18 |
| 5 | blue | 22 |
| 8 | green | 22 |
df.drop(labels=[2,4,6,8])
| color | price |
|---|
| 0 | red | 20 |
| 1 | blue | 15 |
| 3 | green | 18 |
| 5 | blue | 22 |
| 7 | NaN | 30 |
df.drop(labels='color',axis = 1)
| price |
|---|
| 0 | 20 |
| 1 | 15 |
| 2 | 20 |
| 3 | 18 |
| 4 | 18 |
| 5 | 22 |
| 6 | 30 |
| 7 | 30 |
| 8 | 22 |
df.filter(items=['price'])
| price |
|---|
| 0 | 20 |
| 1 | 15 |
| 2 | 20 |
| 3 | 18 |
| 4 | 18 |
| 5 | 22 |
| 6 | 30 |
| 7 | 30 |
| 8 | 22 |
df['size'] = 1024
df
| color | price | size |
|---|
| 0 | red | 20 | 1024 |
| 1 | blue | 15 | 1024 |
| 2 | red | 20 | 1024 |
| 3 | green | 18 | 1024 |
| 4 | green | 18 | 1024 |
| 5 | blue | 22 | 1024 |
| 6 | None | 30 | 1024 |
| 7 | NaN | 30 | 1024 |
| 8 | green | 22 | 1024 |
df.filter(like = 'i')
| price | size |
|---|
| 0 | 20 | 1024 |
| 1 | 15 | 1024 |
| 2 | 20 | 1024 |
| 3 | 18 | 1024 |
| 4 | 18 | 1024 |
| 5 | 22 | 1024 |
| 6 | 30 | 1024 |
| 7 | 30 | 1024 |
| 8 | 22 | 1024 |
df['hello'] = 512
df
| color | price | size | hello |
|---|
| 0 | red | 20 | 1024 | 512 |
| 1 | blue | 15 | 1024 | 512 |
| 2 | red | 20 | 1024 | 512 |
| 3 | green | 18 | 1024 | 512 |
| 4 | green | 18 | 1024 | 512 |
| 5 | blue | 22 | 1024 | 512 |
| 6 | None | 30 | 1024 | 512 |
| 7 | NaN | 30 | 1024 | 512 |
| 8 | green | 22 | 1024 | 512 |
df.filter(regex = 'e$')
| price | size |
|---|
| 0 | 20 | 1024 |
| 1 | 15 | 1024 |
| 2 | 20 | 1024 |
| 3 | 18 | 1024 |
| 4 | 18 | 1024 |
| 5 | 22 | 1024 |
| 6 | 30 | 1024 |
| 7 | 30 | 1024 |
| 8 | 22 | 1024 |
df.filter(regex='e')
| price | size | hello |
|---|
| 0 | 20 | 1024 | 512 |
| 1 | 15 | 1024 | 512 |
| 2 | 20 | 1024 | 512 |
| 3 | 18 | 1024 | 512 |
| 4 | 18 | 1024 | 512 |
| 5 | 22 | 1024 | 512 |
| 6 | 30 | 1024 | 512 |
| 7 | 30 | 1024 | 512 |
| 8 | 22 | 1024 | 512 |
a = np.random.randint(0,1000,size = 200)
a
array([647, 871, 35, 738, 789, 587, 413, 559, 648, 993, 579, 129, 825,
904, 356, 316, 997, 800, 35, 601, 1, 208, 465, 614, 680, 619,
922, 346, 994, 135, 5, 650, 165, 475, 95, 194, 225, 455, 634,
717, 836, 678, 156, 203, 263, 180, 143, 248, 407, 56, 202, 947,
46, 408, 686, 530, 545, 273, 125, 964, 323, 775, 313, 238, 242,
804, 228, 322, 322, 768, 556, 9, 629, 938, 932, 859, 955, 707,
729, 541, 280, 493, 255, 681, 428, 992, 420, 650, 267, 32, 662,
185, 756, 319, 313, 271, 229, 711, 803, 85, 527, 853, 670, 685,
423, 458, 628, 701, 253, 495, 548, 879, 503, 115, 90, 978, 665,
532, 198, 482, 412, 850, 879, 913, 96, 177, 778, 337, 502, 128,
49, 747, 591, 22, 557, 105, 136, 775, 626, 515, 959, 869, 245,
437, 51, 236, 438, 489, 854, 49, 163, 687, 488, 175, 428, 517,
493, 377, 100, 728, 717, 926, 689, 186, 777, 639, 79, 83, 620,
623, 931, 918, 721, 315, 133, 423, 161, 999, 341, 55, 837, 582,
530, 805, 22, 301, 177, 322, 708, 14, 50, 864, 889, 929, 967,
497, 624, 127, 539, 14])
# 异常值,大于800,小于 100算作异常,认为定义的。根据实际情况。
cond = (a <=800) & (a >=100)
a[cond]
array([647, 738, 789, 587, 413, 559, 648, 579, 129, 356, 316, 800, 601,
208, 465, 614, 680, 619, 346, 135, 650, 165, 475, 194, 225, 455,
634, 717, 678, 156, 203, 263, 180, 143, 248, 407, 202, 408, 686,
530, 545, 273, 125, 323, 775, 313, 238, 242, 228, 322, 322, 768,
556, 629, 707, 729, 541, 280, 493, 255, 681, 428, 420, 650, 267,
662, 185, 756, 319, 313, 271, 229, 711, 527, 670, 685, 423, 458,
628, 701, 253, 495, 548, 503, 115, 665, 532, 198, 482, 412, 177,
778, 337, 502, 128, 747, 591, 557, 105, 136, 775, 626, 515, 245,
437, 236, 438, 489, 163, 687, 488, 175, 428, 517, 493, 377, 100,
728, 717, 689, 186, 777, 639, 620, 623, 721, 315, 133, 423, 161,
341, 582, 530, 301, 177, 322, 708, 497, 624, 127, 539])
b = np.random.randn(100000)
b
array([-1.17335196, 2.02215212, -0.29891071, ..., -1.6762474 ,
-1.27071523, -1.15187761])
cond = np.abs(b) > 3*1
b[cond]
array([ 3.46554243, 3.08127362, 3.55119821, 3.62774922, 3.11823028,
3.22620922, -3.10381164, -3.20067563, -3.04607325, -3.04427703,
3.09111414, -3.28220862, 3.00499105, -3.06179762, -3.17331972,
-3.37172359, 3.93766782, -3.22895232, -3.13737479, 3.07612751,
-3.43215209, -3.27660651, -3.35116041, 4.74328695, 3.25586636,
-3.54090785, 3.08881127, 3.00635551, 3.5018534 , -3.14463788,
-3.0182886 , -3.12145648, -3.24276219, 3.08087834, 3.04820238,
-3.24173442, -3.14648209, 3.87748281, -3.07660111, -3.16083928,
3.32641202, -3.05228179, 3.04924043, 3.02825131, -3.08360056,
-3.04890894, -3.27258041, -3.07339115, -3.38375287, -3.14267022,
-3.7207377 , 3.4813841 , -3.12866105, -3.17122631, 3.0599701 ,
3.12393087, 3.20253178, -3.05221958, -3.35532417, 3.02450167,
-3.28385568, 3.3422833 , -3.11052755, -3.09647003, 3.32353664,
-3.70215812, -3.07916575, -3.13546874, 3.20575826, -3.67982084,
-3.17055893, 3.4836615 , -3.30039879, -3.27774497, 3.02125912,
3.12332885, 3.01456477, 3.15958151, -3.34101369, 3.32444673,
3.06479889, 3.14506863, 3.15670827, 3.15066995, 3.14705869,
-3.20526898, -3.0761338 , 3.20716127, -3.20941307, -3.7212859 ,
-3.51785834, -3.06096986, -3.05425748, -3.47049261, 3.22285172,
-3.32233224, -3.04630606, 3.41215312, -3.16482337, -3.01813609,
-3.05441573, -3.10394416, 3.03469642, 3.01493847, -3.11901071,
3.5996865 , 3.48194227, -3.77734847, 3.04588004, 3.10611158,
-3.20473003, -3.4377999 , 3.22680244, -3.1536921 , -3.22798726,
3.34569796, 3.06046948, -3.16955677, 3.12613756, 3.04286964,
3.01148054, 3.18525226, -4.08971624, -3.55427596, -5.39879049,
3.05203254, 3.08944491, -3.02258209, 3.17316913, -3.1615401 ,
3.17205118, -3.24221772, -3.14421237, -3.74675036, 3.61678522,
3.59097443, -3.0302881 , 3.23236707, -3.00850012, 3.33608986,
-3.02859152, -3.7000766 , -3.10992575, -3.00412636, -3.05657102,
-3.05208781, 3.14017797, 3.46457731, 3.15619413, -3.43236114,
3.08259529, -3.84578168, 3.04203424, -3.29444028, -3.01764756,
3.11300256, 3.23071233, 3.20785451, -3.15668756, 3.44176099,
-3.19985577, -3.14126853, -3.26482841, -3.62208271, -3.55305069,
3.09639491, -3.18178713, -3.03662021, 3.17247227, 3.3908074 ,
-3.63563705, -3.56417097, 3.02823554, -3.06955375, 3.74305364,
3.63993306, -3.14193492, -3.04032527, -3.28310908, -3.37949723,
-3.25915912, -3.01206123, -3.10871377, -3.22982732, 3.8136103 ,
3.48893313, 3.9918267 , 3.4526763 , -3.46595488, -3.29996013,
-3.42965097, 3.151502 , 3.10548689, -3.44707735, 3.21881565,
3.50932999, -3.12410382, 3.30296386, 3.02454576, -3.20072608,
3.54339754, -3.17847739, -3.21475045, 3.03546088, -3.06225619,
3.48158164, 3.15243123, -3.06358376, 3.27300242, 3.32577453,
3.23535167, -3.04681725, 3.33439387, 3.10620079, 3.52883469,
-3.1790272 , 3.02641222, -3.45636819, 3.21009424, 3.08045954,
-3.59721754, 3.24693695, 3.05920919, -3.43674159, -3.00370946,
-3.48031594, -3.28748467, 3.42581649, 3.46912521, -3.28384157,
3.76358974, -3.34035865, 3.12978233, 3.44856854, -3.04074246,
3.50018071, 3.33188267, -3.09775514, -3.49356906, -3.09902374,
3.12068562, -3.1776565 , -3.44282129, 3.19286374, -3.28304596,
-3.10080963, -3.37189709, 3.77743156, 3.03547536, 3.22045459,
-3.44007263, 3.01331408, 3.49733677, 3.28831922, 3.62147013,
3.03458981, 3.15447237, -3.33931478, 3.09858431, -3.23592306,
3.3144797 , 3.37067342, -3.18749118, 3.09319307, -3.34390567,
3.29819563, 3.3120354 , 3.04166958, -3.00975323, 3.0347423 ,
-3.82502331, -3.13125028, -3.0876424 , 3.13929221, 3.570775 ,
-3.37420738, 3.17527797, 3.13396148, -3.70088631, -3.04054948,
3.05399103, 3.24908851, 3.19666266, -3.64071456, -3.85271081,
3.06864652, 3.53367592, 3.54650649, 3.6355438 , 3.657715 ,
4.03831601, 3.61651925])
第七部分:数据转换
7.1 轴和元素转换
import numpy as np
import pandas as pd
df = pd.DataFrame(data = np.random.randint(0,10,size = (10,3)),
columns=['Python','Tensorflow','Keras'],
index = list('ABCDEFHIJK'))
df
| Python | Tensorflow | Keras |
|---|
| A | 2 | 5 | 3 |
| B | 5 | 0 | 0 |
| C | 7 | 0 | 4 |
| D | 0 | 4 | 7 |
| E | 8 | 6 | 9 |
| F | 8 | 2 | 6 |
| H | 6 | 7 | 8 |
| I | 7 | 6 | 9 |
| J | 4 | 7 | 9 |
| K | 6 | 7 | 1 |
df.rename(index = {'A':'X','K':'Y'},
columns={'Python':'人工智能'},
inplace=True)
df.replace(5,50,inplace=True)
df
| 人工智能 | Tensorflow | Keras |
|---|
| X | 2 | 50 | 3 |
| B | 50 | 0 | 0 |
| C | 7 | 0 | 4 |
| D | 0 | 4 | 7 |
| E | 8 | 6 | 9 |
| F | 8 | 2 | 6 |
| H | 6 | 7 | 8 |
| I | 7 | 6 | 9 |
| J | 4 | 7 | 9 |
| Y | 6 | 7 | 1 |
df.replace([2,7],1024,inplace=True)
df
| 人工智能 | Tensorflow | Keras |
|---|
| X | 1024 | 50 | 3 |
| B | 50 | 0 | 0 |
| C | 1024 | 0 | 4 |
| D | 0 | 4 | 1024 |
| E | 8 | 6 | 9 |
| F | 8 | 1024 | 6 |
| H | 6 | 1024 | 8 |
| I | 1024 | 6 | 9 |
| J | 4 | 1024 | 9 |
| Y | 6 | 1024 | 1 |
df.iloc[4,2] = np.NaN
df.replace({0:2048,np.nan:-100},inplace=True)
df
| 人工智能 | Tensorflow | Keras |
|---|
| X | 1024 | 50 | 3.0 |
| B | 50 | 2048 | 2048.0 |
| C | 1024 | 2048 | 4.0 |
| D | 2048 | 4 | 1024.0 |
| E | 8 | 6 | -100.0 |
| F | 8 | 1024 | 6.0 |
| H | 6 | 1024 | 8.0 |
| I | 1024 | 6 | 9.0 |
| J | 4 | 1024 | 9.0 |
| Y | 6 | 1024 | 1.0 |
df.replace({'Tensorflow':1024},-1024)
| 人工智能 | Tensorflow | Keras |
|---|
| X | 1024 | 50 | 3.0 |
| B | 50 | 2048 | 2048.0 |
| C | 1024 | 2048 | 4.0 |
| D | 2048 | 4 | 1024.0 |
| E | 8 | 6 | -100.0 |
| F | 8 | -1024 | 6.0 |
| H | 6 | -1024 | 8.0 |
| I | 1024 | 6 | 9.0 |
| J | 4 | -1024 | 9.0 |
| Y | 6 | -1024 | 1.0 |
7.2 map映射元素转变
df['人工智能'].map({1024:3.14,2048:2.718,6:1108})
X 3.140
B NaN
C 3.140
D 2.718
E NaN
F NaN
H 1108.000
I 3.140
J NaN
Y 1108.000
Name: 人工智能, dtype: float64
df['Keras'].map(lambda x :True if x > 0 else False)
X True
B True
C True
D True
E False
F True
H True
I True
J True
Y True
Name: Keras, dtype: bool
def convert(x):
if x >= 1024:
return True
else:
return False
df['level'] = df['Tensorflow'].map(convert)
df
| 人工智能 | Tensorflow | Keras | level |
|---|
| X | 1024 | 50 | 3.0 | False |
| B | 50 | 2048 | 2048.0 | True |
| C | 1024 | 2048 | 4.0 | True |
| D | 2048 | 4 | 1024.0 | False |
| E | 8 | 6 | -100.0 | False |
| F | 8 | 1024 | 6.0 | True |
| H | 6 | 1024 | 8.0 | True |
| I | 1024 | 6 | 9.0 | False |
| J | 4 | 1024 | 9.0 | True |
| Y | 6 | 1024 | 1.0 | True |
7.3 apply映射元素转变
df['人工智能'].apply(lambda x : x + 100)
X 1124
B 150
C 1124
D 2148
E 108
F 108
H 106
I 1124
J 104
Y 106
Name: 人工智能, dtype: int64
df['level'].apply(lambda x:1 if x else 0)
X 0
B 1
C 1
D 0
E 0
F 1
H 1
I 0
J 1
Y 1
Name: level, dtype: int64
df.apply(lambda x : x + 1000)
| 人工智能 | Tensorflow | Keras | level |
|---|
| X | 2024 | 1050 | 1003.0 | 1000 |
| B | 1050 | 3048 | 3048.0 | 1001 |
| C | 2024 | 3048 | 1004.0 | 1001 |
| D | 3048 | 1004 | 2024.0 | 1000 |
| E | 1008 | 1006 | 900.0 | 1000 |
| F | 1008 | 2024 | 1006.0 | 1001 |
| H | 1006 | 2024 | 1008.0 | 1001 |
| I | 2024 | 1006 | 1009.0 | 1000 |
| J | 1004 | 2024 | 1009.0 | 1001 |
| Y | 1006 | 2024 | 1001.0 | 1001 |
def convert(x):
return (x.median(),x.count(),x.min(),x.max(),x.std()) # 返回中位数,返回的是计数
df.apply(convert).round(1) # 默认操作列数据
| 人工智能 | Tensorflow | Keras | level |
|---|
| 0 | 29.0 | 1024.0 | 7.0 | 1 |
| 1 | 10.0 | 10.0 | 10.0 | 10 |
| 2 | 4.0 | 4.0 | -100.0 | False |
| 3 | 2048.0 | 2048.0 | 2048.0 | True |
| 4 | 717.8 | 800.4 | 694.9 | 0.516398 |
df
| 人工智能 | Tensorflow | Keras | level |
|---|
| X | 1024 | 50 | 3.0 | False |
| B | 50 | 2048 | 2048.0 | True |
| C | 1024 | 2048 | 4.0 | True |
| D | 2048 | 4 | 1024.0 | False |
| E | 8 | 6 | -100.0 | False |
| F | 8 | 1024 | 6.0 | True |
| H | 6 | 1024 | 8.0 | True |
| I | 1024 | 6 | 9.0 | False |
| J | 4 | 1024 | 9.0 | True |
| Y | 6 | 1024 | 1.0 | True |
df.apply(convert,axis = 1)
X (26.5, 4, False, 1024, 503.68732033541073)
B (1049.0, 4, True, 2048, 1167.8622564326668)
C (514.0, 4, True, 2048, 979.1007353689405)
D (514.0, 4, False, 2048, 979.3623776042588)
E (3.0, 4, -100.0, 8, 52.443620520834884)
F (7.0, 4, True, 1024, 509.5085049993441)
H (7.0, 4, True, 1024, 509.5085049993441)
I (7.5, 4, False, 1024, 509.51373877453)
J (6.5, 4, True, 1024, 509.6773489179208)
Y (3.5, 4, 1.0, 1024, 510.6721061503164)
dtype: object
7.4 transform元素转变
df = pd.DataFrame(np.random.randint(0,10,size = (10,3)),
columns=['Python','Tensorflow','Keras'],
index = list('ABCDEFHIJK'))
display(df)
df['Python'].transform(lambda x : 1024 if x > 5 else -1024)
| Python | Tensorflow | Keras |
|---|
| A | 1 | 1 | 9 |
| B | 6 | 9 | 1 |
| C | 1 | 4 | 6 |
| D | 9 | 5 | 1 |
| E | 4 | 1 | 8 |
| F | 2 | 5 | 7 |
| H | 7 | 2 | 3 |
| I | 7 | 8 | 9 |
| J | 5 | 4 | 2 |
| K | 7 | 0 | 7 |
A -1024
B 1024
C -1024
D 1024
E -1024
F -1024
H 1024
I 1024
J -1024
K 1024
Name: Python, dtype: int64
df['Tensorflow'].apply([np.sqrt,np.square,np.cumsum]) # 针对一列,进行不同的操作
| sqrt | square | cumsum |
|---|
| A | 1.000000 | 1 | 1 |
| B | 3.000000 | 81 | 10 |
| C | 2.000000 | 16 | 14 |
| D | 2.236068 | 25 | 19 |
| E | 1.000000 | 1 | 20 |
| F | 2.236068 | 25 | 25 |
| H | 1.414214 | 4 | 27 |
| I | 2.828427 | 64 | 35 |
| J | 2.000000 | 16 | 39 |
| K | 0.000000 | 0 | 39 |
df['Tensorflow'].transform([np.sqrt,np.square,np.cumsum])
| sqrt | square | cumsum |
|---|
| A | 1.000000 | 1 | 1 |
| B | 3.000000 | 81 | 10 |
| C | 2.000000 | 16 | 14 |
| D | 2.236068 | 25 | 19 |
| E | 1.000000 | 1 | 20 |
| F | 2.236068 | 25 | 25 |
| H | 1.414214 | 4 | 27 |
| I | 2.828427 | 64 | 35 |
| J | 2.000000 | 16 | 39 |
| K | 0.000000 | 0 | 39 |
def convert(x):
if x > 5:
return True
else:
return False
df.transform({'Python':np.cumsum,'Tensorflow':np.square,'Keras':convert})
| Python | Tensorflow | Keras |
|---|
| A | 1 | 1 | True |
| B | 7 | 81 | False |
| C | 8 | 16 | True |
| D | 17 | 25 | False |
| E | 21 | 1 | True |
| F | 23 | 25 | True |
| H | 30 | 4 | False |
| I | 37 | 64 | True |
| J | 42 | 16 | False |
| K | 49 | 0 | True |
df.apply({'Python':np.cumsum,'Tensorflow':np.square,'Keras':convert})
| Python | Tensorflow | Keras |
|---|
| A | 1 | 1 | True |
| B | 7 | 81 | False |
| C | 8 | 16 | True |
| D | 17 | 25 | False |
| E | 21 | 1 | True |
| F | 23 | 25 | True |
| H | 30 | 4 | False |
| I | 37 | 64 | True |
| J | 42 | 16 | False |
| K | 49 | 0 | True |
7.5 重排随机抽样哑变量
df
| Python | Tensorflow | Keras |
|---|
| A | 1 | 1 | 9 |
| B | 6 | 9 | 1 |
| C | 1 | 4 | 6 |
| D | 9 | 5 | 1 |
| E | 4 | 1 | 8 |
| F | 2 | 5 | 7 |
| H | 7 | 2 | 3 |
| I | 7 | 8 | 9 |
| J | 5 | 4 | 2 |
| K | 7 | 0 | 7 |
index = np.random.permutation(10)
index
array([3, 4, 1, 2, 7, 9, 0, 8, 5, 6])
df.take(index)
| Python | Tensorflow | Keras |
|---|
| D | 9 | 5 | 1 |
| E | 4 | 1 | 8 |
| B | 6 | 9 | 1 |
| C | 1 | 4 | 6 |
| I | 7 | 8 | 9 |
| K | 7 | 0 | 7 |
| A | 1 | 1 | 9 |
| J | 5 | 4 | 2 |
| F | 2 | 5 | 7 |
| H | 7 | 2 | 3 |
df.take(np.random.randint(0,10,size = 20))
| Python | Tensorflow | Keras |
|---|
| J | 5 | 4 | 2 |
| J | 5 | 4 | 2 |
| D | 9 | 5 | 1 |
| K | 7 | 0 | 7 |
| H | 7 | 2 | 3 |
| I | 7 | 8 | 9 |
| J | 5 | 4 | 2 |
| A | 1 | 1 | 9 |
| C | 1 | 4 | 6 |
| J | 5 | 4 | 2 |
| I | 7 | 8 | 9 |
| D | 9 | 5 | 1 |
| I | 7 | 8 | 9 |
| K | 7 | 0 | 7 |
| A | 1 | 1 | 9 |
| B | 6 | 9 | 1 |
| H | 7 | 2 | 3 |
| D | 9 | 5 | 1 |
| B | 6 | 9 | 1 |
| H | 7 | 2 | 3 |
df2 = pd.DataFrame(data = {'key':['a','b','a','b','c','b','c']})
df2
pd.get_dummies(df2,prefix='',prefix_sep='')
| a | b | c |
|---|
| 0 | 1 | 0 | 0 |
| 1 | 0 | 1 | 0 |
| 2 | 1 | 0 | 0 |
| 3 | 0 | 1 | 0 |
| 4 | 0 | 0 | 1 |
| 5 | 0 | 1 | 0 |
| 6 | 0 | 0 | 1 |
第八部分:数据重塑
df
| Python | Tensorflow | Keras |
|---|
| A | 1 | 1 | 9 |
| B | 6 | 9 | 1 |
| C | 1 | 4 | 6 |
| D | 9 | 5 | 1 |
| E | 4 | 1 | 8 |
| F | 2 | 5 | 7 |
| H | 7 | 2 | 3 |
| I | 7 | 8 | 9 |
| J | 5 | 4 | 2 |
| K | 7 | 0 | 7 |
df.T
| A | B | C | D | E | F | H | I | J | K |
|---|
| Python | 1 | 6 | 1 | 9 | 4 | 2 | 7 | 7 | 5 | 7 |
| Tensorflow | 1 | 9 | 4 | 5 | 1 | 5 | 2 | 8 | 4 | 0 |
| Keras | 9 | 1 | 6 | 1 | 8 | 7 | 3 | 9 | 2 | 7 |
df2 = pd.DataFrame(np.random.randint(0,10,size = (20,3)),
columns=['Python','Math','En'],
index = pd.MultiIndex.from_product([list('ABCDEFHIJK'),['期中','期末']]))
df2
| | Python | Math | En |
|---|
| A | 期中 | 3 | 3 | 0 |
| 期末 | 6 | 5 | 8 | |
| B | 期中 | 5 | 5 | 9 |
| 期末 | 7 | 5 | 2 | |
| C | 期中 | 0 | 7 | 9 |
| 期末 | 9 | 7 | 5 | |
| D | 期中 | 5 | 6 | 5 |
| 期末 | 7 | 9 | 6 | |
| E | 期中 | 7 | 3 | 9 |
| 期末 | 9 | 1 | 4 | |
| F | 期中 | 9 | 9 | 5 |
| 期末 | 0 | 8 | 9 | |
| H | 期中 | 7 | 0 | 0 |
| 期末 | 1 | 6 | 6 | |
| I | 期中 | 8 | 1 | 8 |
| 期末 | 7 | 9 | 9 | |
| J | 期中 | 5 | 0 | 8 |
| 期末 | 3 | 6 | 6 | |
| K | 期中 | 8 | 2 | 2 |
| 期末 | 3 | 5 | 2 | |
df2.unstack(level = 1)
| Python | Math | En |
|---|
| 期中 | 期末 | 期中 |
| — | — | — | — |
| A | 3 | 6 | 3 |
| B | 5 | 7 | 5 |
| C | 0 | 9 | 7 |
| D | 5 | 7 | 6 |
| E | 7 | 9 | 3 |
| F | 9 | 0 | 9 |
| H | 7 | 1 | 0 |
| I | 8 | 7 | 1 |
| J | 5 | 3 | 0 |
| K | 8 | 3 | 2 |
df2.unstack(level = -1)
| Python | Math | En |
|---|
| 期中 | 期末 | 期中 |
| — | — | — | — |
| A | 3 | 6 | 3 |
| B | 5 | 7 | 5 |
| C | 0 | 9 | 7 |
| D | 5 | 7 | 6 |
| E | 7 | 9 | 3 |
| F | 9 | 0 | 9 |
| H | 7 | 1 | 0 |
| I | 8 | 7 | 1 |
| J | 5 | 3 | 0 |
| K | 8 | 3 | 2 |
df2.stack()
A 期中 Python 3
Math 3
En 0
期末 Python 6
Math 5
En 8
B 期中 Python 5
Math 5
En 9
期末 Python 7
Math 5
En 2
C 期中 Python 0
Math 7
En 9
期末 Python 9
Math 7
En 5
D 期中 Python 5
Math 6
En 5
期末 Python 7
Math 9
En 6
E 期中 Python 7
Math 3
En 9
期末 Python 9
Math 1
En 4
F 期中 Python 9
Math 9
En 5
期末 Python 0
Math 8
En 9
H 期中 Python 7
Math 0
En 0
期末 Python 1
Math 6
En 6
I 期中 Python 8
Math 1
En 8
期末 Python 7
Math 9
En 9
J 期中 Python 5
Math 0
En 8
期末 Python 3
Math 6
En 6
K 期中 Python 8
Math 2
En 2
期末 Python 3
Math 5
En 2
dtype: int64
df2.unstack().stack(level = 0)
| | 期中 | 期末 |
|---|
| A | En | 0 | 8 |
| Math | 3 | 5 | |
| Python | 3 | 6 | |
| B | En | 9 | 2 |
| Math | 5 | 5 | |
| Python | 5 | 7 | |
| C | En | 9 | 5 |
| Math | 7 | 7 | |
| Python | 0 | 9 | |
| D | En | 5 | 6 |
| Math | 6 | 9 | |
| Python | 5 | 7 | |
| E | En | 9 | 4 |
| Math | 3 | 1 | |
| Python | 7 | 9 | |
| F | En | 5 | 9 |
| Math | 9 | 8 | |
| Python | 9 | 0 | |
| H | En | 0 | 6 |
| Math | 0 | 6 | |
| Python | 7 | 1 | |
| I | En | 8 | 9 |
| Math | 1 | 9 | |
| Python | 8 | 7 | |
| J | En | 8 | 6 |
| Math | 0 | 6 | |
| Python | 5 | 3 | |
| K | En | 2 | 2 |
| Math | 2 | 5 | |
| Python | 8 | 3 | |
df2.mean()
Python 5.45
Math 4.85
En 5.60
dtype: float64
df2.mean(axis = 1)
A 期中 2.000000
期末 6.333333
B 期中 6.333333
期末 4.666667
C 期中 5.333333
期末 7.000000
D 期中 5.333333
期末 7.333333
E 期中 6.333333
期末 4.666667
F 期中 7.666667
期末 5.666667
H 期中 2.333333
期末 4.333333
I 期中 5.666667
期末 8.333333
J 期中 4.333333
期末 5.000000
K 期中 4.000000
期末 3.333333
dtype: float64
df2.mean(level=1)
| Python | Math | En |
|---|
| 期中 | 5.7 | 3.6 | 5.5 |
| 期末 | 5.2 | 6.1 | 5.7 |
df2.mean(level = 0)
| Python | Math | En |
|---|
| A | 4.5 | 4.0 | 4.0 |
| B | 6.0 | 5.0 | 5.5 |
| C | 4.5 | 7.0 | 7.0 |
| D | 6.0 | 7.5 | 5.5 |
| E | 8.0 | 2.0 | 6.5 |
| F | 4.5 | 8.5 | 7.0 |
| H | 4.0 | 3.0 | 3.0 |
| I | 7.5 | 5.0 | 8.5 |
| J | 4.0 | 3.0 | 7.0 |
| K | 5.5 | 3.5 | 2.0 |
第九部分:数学和统计方法
9.1 简单统计指标
df = pd.DataFrame(np.random.randint(0,10,size = (20,3)),
columns=['Python','Math','En'],index = list('QWERTYUIOPASDFGHJKLZ'))
df
| Python | Math | En |
|---|
| Q | 1 | 4 | 3 |
| W | 8 | 2 | 0 |
| E | 3 | 0 | 9 |
| R | 9 | 8 | 9 |
| T | 9 | 1 | 3 |
| Y | 5 | 3 | 1 |
| U | 5 | 8 | 0 |
| I | 1 | 8 | 3 |
| O | 0 | 3 | 5 |
| P | 6 | 6 | 1 |
| A | 0 | 4 | 0 |
| S | 3 | 9 | 4 |
| D | 5 | 2 | 8 |
| F | 2 | 9 | 2 |
| G | 1 | 9 | 8 |
| H | 9 | 5 | 2 |
| J | 5 | 7 | 5 |
| K | 2 | 6 | 5 |
| L | 2 | 7 | 3 |
| Z | 2 | 9 | 8 |
df.iloc[6,2] = np.NAN
display(df)
| Python | Math | En |
|---|
| Q | 1 | 4 | 3.0 |
| W | 8 | 2 | 0.0 |
| E | 3 | 0 | 9.0 |
| R | 9 | 8 | 9.0 |
| T | 9 | 1 | 3.0 |
| Y | 5 | 3 | 1.0 |
| U | 5 | 8 | NaN |
| I | 1 | 8 | 3.0 |
| O | 0 | 3 | 5.0 |
| P | 6 | 6 | 1.0 |
| A | 0 | 4 | 0.0 |
| S | 3 | 9 | 4.0 |
| D | 5 | 2 | 8.0 |
| F | 2 | 9 | 2.0 |
| G | 1 | 9 | 8.0 |
| H | 9 | 5 | 2.0 |
| J | 5 | 7 | 5.0 |
| K | 2 | 6 | 5.0 |
| L | 2 | 7 | 3.0 |
| Z | 2 | 9 | 8.0 |
df.count()
Python 20
Math 20
En 19
dtype: int64
display(df.mean(),df.median()) # 平均值,中位数
Python 3.900000
Math 5.500000
En 4.157895
dtype: float64
Python 3.0
Math 6.0
En 3.0
dtype: float64
display(df.min(),df.max())
Python 0.0
Math 0.0
En 0.0
dtype: float64
Python 9.0
Math 9.0
En 9.0
dtype: float64
df['Python'].unique()
array([1, 8, 3, 9, 5, 0, 6, 2])
df['Math'].value_counts()
9 4
8 3
7 2
6 2
4 2
3 2
2 2
5 1
1 1
0 1
Name: Math, dtype: int64
df.quantile(q = [0,0.25,0.5,0.75,1])
| Python | Math | En |
|---|
| 0.00 | 0.00 | 0.0 | 0.0 |
| 0.25 | 1.75 | 3.0 | 2.0 |
| 0.50 | 3.00 | 6.0 | 3.0 |
| 0.75 | 5.25 | 8.0 | 6.5 |
| 1.00 | 9.00 | 9.0 | 9.0 |
df.describe().round(1)
| Python | Math | En |
|---|
| count | 20.0 | 20.0 | 19.0 |
| mean | 3.9 | 5.5 | 4.2 |
| std | 3.0 | 2.9 | 3.0 |
| min | 0.0 | 0.0 | 0.0 |
| 25% | 1.8 | 3.0 | 2.0 |
| 50% | 3.0 | 6.0 | 3.0 |
| 75% | 5.2 | 8.0 | 6.5 |
| max | 9.0 | 9.0 | 9.0 |
9.2 索引标签、位置获取
df['Python'].argmax()
3
df['En'].argmin()
1
df.idxmax()
Python R
Math S
En E
dtype: object
df.idxmin()
Python O
Math E
En W
dtype: object
9.3 更多统计指标
df.cumsum()
| Python | Math | En |
|---|
| Q | 1 | 4 | 3.0 |
| W | 9 | 6 | 3.0 |
| E | 12 | 6 | 12.0 |
| R | 21 | 14 | 21.0 |
| T | 30 | 15 | 24.0 |
| Y | 35 | 18 | 25.0 |
| U | 40 | 26 | NaN |
| I | 41 | 34 | 28.0 |
| O | 41 | 37 | 33.0 |
| P | 47 | 43 | 34.0 |
| A | 47 | 47 | 34.0 |
| S | 50 | 56 | 38.0 |
| D | 55 | 58 | 46.0 |
| F | 57 | 67 | 48.0 |
| G | 58 | 76 | 56.0 |
| H | 67 | 81 | 58.0 |
| J | 72 | 88 | 63.0 |
| K | 74 | 94 | 68.0 |
| L | 76 | 101 | 71.0 |
| Z | 78 | 110 | 79.0 |
df.cumprod()
| Python | Math | En |
|---|
| Q | 1 | 4 | 3.0 |
| W | 8 | 8 | 0.0 |
| E | 24 | 0 | 0.0 |
| R | 216 | 0 | 0.0 |
| T | 1944 | 0 | 0.0 |
| Y | 9720 | 0 | 0.0 |
| U | 48600 | 0 | NaN |
| I | 48600 | 0 | 0.0 |
| O | 0 | 0 | 0.0 |
| P | 0 | 0 | 0.0 |
| A | 0 | 0 | 0.0 |
| S | 0 | 0 | 0.0 |
| D | 0 | 0 | 0.0 |
| F | 0 | 0 | 0.0 |
| G | 0 | 0 | 0.0 |
| H | 0 | 0 | 0.0 |
| J | 0 | 0 | 0.0 |
| K | 0 | 0 | 0.0 |
| L | 0 | 0 | 0.0 |
| Z | 0 | 0 | 0.0 |
df.cummin()
| Python | Math | En |
|---|
| Q | 1 | 4 | 3.0 |
| W | 1 | 2 | 0.0 |
| E | 1 | 0 | 0.0 |
| R | 1 | 0 | 0.0 |
| T | 1 | 0 | 0.0 |
| Y | 1 | 0 | 0.0 |
| U | 1 | 0 | NaN |
| I | 1 | 0 | 0.0 |
| O | 0 | 0 | 0.0 |
| P | 0 | 0 | 0.0 |
| A | 0 | 0 | 0.0 |
| S | 0 | 0 | 0.0 |
| D | 0 | 0 | 0.0 |
| F | 0 | 0 | 0.0 |
| G | 0 | 0 | 0.0 |
| H | 0 | 0 | 0.0 |
| J | 0 | 0 | 0.0 |
| K | 0 | 0 | 0.0 |
| L | 0 | 0 | 0.0 |
| Z | 0 | 0 | 0.0 |
df.cummax()
| Python | Math | En |
|---|
| Q | 1 | 4 | 3.0 |
| W | 8 | 4 | 3.0 |
| E | 8 | 4 | 9.0 |
| R | 9 | 8 | 9.0 |
| T | 9 | 8 | 9.0 |
| Y | 9 | 8 | 9.0 |
| U | 9 | 8 | NaN |
| I | 9 | 8 | 9.0 |
| O | 9 | 8 | 9.0 |
| P | 9 | 8 | 9.0 |
| A | 9 | 8 | 9.0 |
| S | 9 | 9 | 9.0 |
| D | 9 | 9 | 9.0 |
| F | 9 | 9 | 9.0 |
| G | 9 | 9 | 9.0 |
| H | 9 | 9 | 9.0 |
| J | 9 | 9 | 9.0 |
| K | 9 | 9 | 9.0 |
| L | 9 | 9 | 9.0 |
| Z | 9 | 9 | 9.0 |
df.std() # 标准差
Python 3.041814
Math 2.946898
En 3.004869
dtype: float64
df.var()
Python 9.252632
Math 8.684211
En 9.029240
dtype: float64
df.diff()
| Python | Math | En |
|---|
| Q | NaN | NaN | NaN |
| W | 7.0 | -2.0 | -3.0 |
| E | -5.0 | -2.0 | 9.0 |
| R | 6.0 | 8.0 | 0.0 |
| T | 0.0 | -7.0 | -6.0 |
| Y | -4.0 | 2.0 | -2.0 |
| U | 0.0 | 5.0 | NaN |
| I | -4.0 | 0.0 | NaN |
| O | -1.0 | -5.0 | 2.0 |
| P | 6.0 | 3.0 | -4.0 |
| A | -6.0 | -2.0 | -1.0 |
| S | 3.0 | 5.0 | 4.0 |
| D | 2.0 | -7.0 | 4.0 |
| F | -3.0 | 7.0 | -6.0 |
| G | -1.0 | 0.0 | 6.0 |
| H | 8.0 | -4.0 | -6.0 |
| J | -4.0 | 2.0 | 3.0 |
| K | -3.0 | -1.0 | 0.0 |
| L | 0.0 | 1.0 | -2.0 |
| Z | 0.0 | 2.0 | 5.0 |
df.pct_change().round(3)
| Python | Math | En |
|---|
| Q | NaN | NaN | NaN |
| W | 7.000 | -0.500 | -1.000 |
| E | -0.625 | -1.000 | inf |
| R | 2.000 | inf | 0.000 |
| T | 0.000 | -0.875 | -0.667 |
| Y | -0.444 | 2.000 | -0.667 |
| U | 0.000 | 1.667 | 0.000 |
| I | -0.800 | 0.000 | 2.000 |
| O | -1.000 | -0.625 | 0.667 |
| P | inf | 1.000 | -0.800 |
| A | -1.000 | -0.333 | -1.000 |
| S | inf | 1.250 | inf |
| D | 0.667 | -0.778 | 1.000 |
| F | -0.600 | 3.500 | -0.750 |
| G | -0.500 | 0.000 | 3.000 |
| H | 8.000 | -0.444 | -0.750 |
| J | -0.444 | 0.400 | 1.500 |
| K | -0.600 | -0.143 | 0.000 |
| L | 0.000 | 0.167 | -0.400 |
| Z | 0.000 | 0.286 | 1.667 |
9.4 高级统计指标
df.cov()
| Python | Math | En |
|---|
| Python | 9.252632 | -2.157895 | -0.695906 |
| Math | -2.157895 | 8.684211 | 1.160819 |
| En | -0.695906 | 1.160819 | 9.029240 |
df.var() # 方差: 自己和自己计算
Python 9.252632
Math 8.684211
En 9.029240
dtype: float64
df['Python'].cov(df['Math'])
-2.157894736842105
df.corr()
| Python | Math | En |
|---|
| Python | 1.000000 | -0.240731 | -0.074376 |
| Math | -0.240731 | 1.000000 | 0.130217 |
| En | -0.074376 | 0.130217 | 1.000000 |
df.corrwith(df['En'])
Python -0.074376
Math 0.130217
En 1.000000
dtype: float64
第十部分:排序
df = pd.DataFrame(np.random.randint(0,20,size = (20,3)),
columns=['Python','Tensorflow','Keras'],index = list('QWERTYUIOPASDFGHJKLZ'))
df
| Python | Tensorflow | Keras |
|---|
| Q | 17 | 3 | 4 |
| W | 13 | 18 | 7 |
| E | 12 | 11 | 0 |
| R | 3 | 5 | 14 |
| T | 11 | 15 | 7 |
| Y | 5 | 15 | 4 |
| U | 18 | 2 | 7 |
| I | 7 | 3 | 6 |
| O | 1 | 18 | 5 |
| P | 12 | 6 | 0 |
| A | 4 | 18 | 4 |
| S | 15 | 5 | 8 |
| D | 8 | 11 | 14 |
| F | 3 | 2 | 17 |
| G | 4 | 17 | 8 |
| H | 12 | 1 | 4 |
| J | 1 | 2 | 6 |
| K | 17 | 9 | 16 |
| L | 11 | 14 | 4 |
| Z | 16 | 13 | 4 |
df.sort_index(axis = 0,ascending=False)
| Python | Tensorflow | Keras |
|---|
| Z | 16 | 13 | 4 |
| Y | 5 | 15 | 4 |
| W | 13 | 18 | 7 |
| U | 18 | 2 | 7 |
| T | 11 | 15 | 7 |
| S | 15 | 5 | 8 |
| R | 3 | 5 | 14 |
| Q | 17 | 3 | 4 |
| P | 12 | 6 | 0 |
| O | 1 | 18 | 5 |
| L | 11 | 14 | 4 |
| K | 17 | 9 | 16 |
| J | 1 | 2 | 6 |
| I | 7 | 3 | 6 |
| H | 12 | 1 | 4 |
| G | 4 | 17 | 8 |
| F | 3 | 2 | 17 |
| E | 12 | 11 | 0 |
| D | 8 | 11 | 14 |
| A | 4 | 18 | 4 |
df.sort_index(ascending=True)
| Python | Tensorflow | Keras |
|---|
| A | 4 | 18 | 4 |
| D | 8 | 11 | 14 |
| E | 12 | 11 | 0 |
| F | 3 | 2 | 17 |
| G | 4 | 17 | 8 |
| H | 12 | 1 | 4 |
| I | 7 | 3 | 6 |
| J | 1 | 2 | 6 |
| K | 17 | 9 | 16 |
| L | 11 | 14 | 4 |
| O | 1 | 18 | 5 |
| P | 12 | 6 | 0 |
| Q | 17 | 3 | 4 |
| R | 3 | 5 | 14 |
| S | 15 | 5 | 8 |
| T | 11 | 15 | 7 |
| U | 18 | 2 | 7 |
| W | 13 | 18 | 7 |
| Y | 5 | 15 | 4 |
| Z | 16 | 13 | 4 |
df.sort_values(by = 'Python',ascending=True)
| Python | Tensorflow | Keras |
|---|
| J | 1 | 2 | 6 |
| O | 1 | 18 | 5 |
| R | 3 | 5 | 14 |
| F | 3 | 2 | 17 |
| A | 4 | 18 | 4 |
| G | 4 | 17 | 8 |
| Y | 5 | 15 | 4 |
| I | 7 | 3 | 6 |
| D | 8 | 11 | 14 |
| T | 11 | 15 | 7 |
| L | 11 | 14 | 4 |
最后
Python崛起并且风靡,因为优点多、应用领域广、被大牛们认可。学习 Python 门槛很低,但它的晋级路线很多,通过它你能进入机器学习、数据挖掘、大数据,CS等更加高级的领域。Python可以做网络应用,可以做科学计算,数据分析,可以做网络爬虫,可以做机器学习、自然语言处理、可以写游戏、可以做桌面应用…Python可以做的很多,你需要学好基础,再选择明确的方向。这里给大家分享一份全套的 Python 学习资料,给那些想学习 Python 的小伙伴们一点帮助!
👉Python所有方向的学习路线👈
Python所有方向的技术点做的整理,形成各个领域的知识点汇总,它的用处就在于,你可以按照上面的知识点去找对应的学习资源,保证自己学得较为全面。

👉Python必备开发工具👈
工欲善其事必先利其器。学习Python常用的开发软件都在这里了,给大家节省了很多时间。

👉Python全套学习视频👈
我们在看视频学习的时候,不能光动眼动脑不动手,比较科学的学习方法是在理解之后运用它们,这时候练手项目就很适合了。

👉实战案例👈
学python就与学数学一样,是不能只看书不做题的,直接看步骤和答案会让人误以为自己全都掌握了,但是碰到生题的时候还是会一筹莫展。
因此在学习python的过程中一定要记得多动手写代码,教程只需要看一两遍即可。

👉大厂面试真题👈
我们学习Python必然是为了找到高薪的工作,下面这些面试题是来自阿里、腾讯、字节等一线互联网大厂最新的面试资料,并且有阿里大佬给出了权威的解答,刷完这一套面试资料相信大家都能找到满意的工作。

了解详情:docs.qq.com/doc/DSnl3ZG…