pandas Dataframe

374 阅读1分钟

9. 学习 + 练习

import pandas as pd
import numpy as np

'''
 Dataframe 是一个表格型的数据结构  "带有标签的二维数组"
 带有index和 columns
'''

data = {'name': ['jack', 'marry', 'jasper'], 'age': [17, 18, 19], 'gender': ['n', 'n', 'v']}

frame = pd.DataFrame(data)
print(frame)

print(type(frame))

print(frame.index, '--', type(frame.index))
print(frame.columns, '---', type(frame.columns))
print(frame.values, type(frame.values))

# 创建
# 1. 数组/list 组成的字典  key是列名
data = {'name': ['jack', 'marry', 'jasper'], 'age': [17, 18, 19], 'gender': ['n', 'n', 'v']}
frame = pd.DataFrame(data)

data2 = {'one': np.random.rand(3), 'two': np.random.rand(3)}
frame2 = pd.DataFrame(data2, index=list('abc'))
print(frame2)

data1 = {'a': [1, 2, 3], 'b': [2, 3, 4], 'c': [4, 5, 6]}

frame3 = pd.DataFrame(data1, columns=['b', 'c', 'd', 'e'])
print(frame3)

# frame2 = pd.DataFrame(data2)
frame4 = pd.DataFrame(data2, columns=['one', 'a', 'b'])
print(frame4)

# 出错
# frame2 = pd.DataFrame(data2, index=['b','c','d','e','f'])
# print(frame2)

# 2.由Series组成的字典   不用元素一样  会自动对齐
data3 = {'one': pd.Series(np.random.rand(2)), 'two': pd.Series(np.random.rand(3))}
data4 = {'one': pd.Series(np.random.rand(2), index=['a', 'b']),
         'two': pd.Series(np.random.rand(3), index=['a', 'b', 'c'])}
print(data3)
print(data4)
frame5 = pd.DataFrame(data3)
frame6 = pd.DataFrame(data4)
print(frame5)
print(frame6)

# 3.通过二维数组之间创建  
# 1,2,3使用较多
ar = np.random.rand(9).reshape(3, 3)
print(ar)

df1 = pd.DataFrame(ar)
# 长度必须保持一致
df2 = pd.DataFrame(ar, index=['a', 'b', 'c'], columns=['one', 'two', 'three'])
print(df1)
print(df2)

# 4. 由字典组成的列表
data = [{'ONE': 1, 'TWO': 2, }, {'ONE': 1, 'TWO': 2, 'THREE': 3}]
print(data)

df1 = pd.DataFrame(data)
df2 = pd.DataFrame(data, index=['a', 'b'])
df3 = pd.DataFrame(data, columns=['ONE', 'TWO'])
print(df1)
print(df2)
print(df3)

# 5. 由字典组成的字典
data = {'jack': {'math': 80, 'c': '90'},
        'marry': {'math': 80, 'c': '90'},
        'tom': {'math': 80, 'c': '90'}}

df1 = pd.DataFrame(data)
print(df1)

df2 = pd.DataFrame(data, columns=['jack', 'marry', 'tom'])
print(df2)

df3 = pd.DataFrame(data, index=['a', 'b', 'd'])
print(df3)