本文已参与「新人创作礼」活动,一起开启掘金创作之路
导入相关的库
import numpy as np
import pandas as pd
# 打印当前目录下的文件
import os
for dirname, _, filenames in os.walk('./'):
for filename in filenames:
print(os.path.join(dirname, filename))
from matplotlib import pyplot as plt
from matplotlib import dates as md
data = pd.read_csv("Monkeypox Dataset(Transposed Aug 822).csv")
# 设置时间格式
data["Dates"] = pd.to_datetime(data["Dates"],infer_datetime_format=True)
data.head()
# top 20 countries by infection from feb. 2022 to Aug. 2022
# 2022年2月至2022年8月感染率前20个国家
# 对每一列进行求和,跳过空值
tot = data.sum(axis = 0, skipna=True)
top = tot.reset_index(name = "No of infections")
top20 = top.sort_values(by="No of infections", ascending=False)
# 因为有时候对dataframe做处理后索引可能是乱的。drop=True就是把原来的索引index列去掉,重置index。drop=False就是保留原来的索引
Top20 = top20.reset_index(drop=True)
# 重命名列名
Top20cleaned = Top20.rename(columns={"index":"Country"})
Tp20 = Top20cleaned[0:20]
Tp20
# 转化为列表
x = Tp20["Country"].to_list()
y = Tp20["No of infections"].to_list()
# 反向排序
x.reverse()
y.reverse()
plt.figure(figsize=(15,7))
plt.barh(x,y,color="brown")
plt.title("Top20 Total Monkeypox Infection by country")
plt.xlabel("total infection")
plt.show()
# infection rate in top 5 [USA,Spain,Germany,England and Brazil]
# 感染率排名前5名[美国、西班牙、德国、英国和法国]
x = data["Dates"]
y = data["Spain"]
z = data["England"]
zz = data["United States"]
aa = data["Germany"]
bb = data["France"]
plt.figure(figsize=(15,7))
plt.plot(x,y,color = "Green", label = "Spain")
plt.plot(x,z,color="Blue", label ="England")
plt.plot(x,zz,color ="Red", label = "USA")
plt.plot(x,aa,color = "Yellow", label = "Germany")
plt.plot(x,bb,color="Black", label ="Brazil")
plt.title("Monkeypox infection rate")
plt.legend()
plt.xlabel("date")
plt.show()