python数据分析-猴痘病例及感染率分析

115 阅读1分钟

本文已参与「新人创作礼」活动,一起开启掘金创作之路

导入相关的库

import numpy as np 
import pandas as pd
# 打印当前目录下的文件
import os
for dirname, _, filenames in os.walk('./'):
    for filename in filenames:
        print(os.path.join(dirname, filename))
from matplotlib import pyplot as plt
from matplotlib import dates as md

data = pd.read_csv("Monkeypox Dataset(Transposed Aug 822).csv")
# 设置时间格式
data["Dates"] = pd.to_datetime(data["Dates"],infer_datetime_format=True)

data.head()

image.png

# top 20 countries by infection from feb. 2022 to Aug. 2022
# 2022年2月至2022年8月感染率前20个国家

# 对每一列进行求和,跳过空值
tot = data.sum(axis = 0, skipna=True)

top = tot.reset_index(name = "No of infections")

top20 = top.sort_values(by="No of infections", ascending=False)

# 因为有时候对dataframe做处理后索引可能是乱的。drop=True就是把原来的索引index列去掉,重置index。drop=False就是保留原来的索引
Top20 = top20.reset_index(drop=True)

# 重命名列名
Top20cleaned = Top20.rename(columns={"index":"Country"})

Tp20 = Top20cleaned[0:20]

Tp20

image.png

# 转化为列表
x = Tp20["Country"].to_list()
y = Tp20["No of infections"].to_list()

# 反向排序
x.reverse()
y.reverse()
plt.figure(figsize=(15,7))
plt.barh(x,y,color="brown")
plt.title("Top20 Total Monkeypox Infection by country")
plt.xlabel("total infection")

plt.show()

image.png

# infection rate in top 5 [USA,Spain,Germany,England and Brazil]
# 感染率排名前5名[美国、西班牙、德国、英国和法国]
x = data["Dates"]
y = data["Spain"]
z = data["England"]
zz = data["United States"]
aa = data["Germany"]
bb = data["France"]
plt.figure(figsize=(15,7))
plt.plot(x,y,color = "Green", label = "Spain")
plt.plot(x,z,color="Blue", label ="England")
plt.plot(x,zz,color ="Red", label = "USA")
plt.plot(x,aa,color = "Yellow", label = "Germany")
plt.plot(x,bb,color="Black", label ="Brazil")
plt.title("Monkeypox infection rate")
plt.legend()
plt.xlabel("date")
plt.show()
image.png