使用 aiohttp 和 asyncio 协程的方式,轻量地进行高并发请求
模板代码
from typing import Coroutine, List
from aiohttp import ClientSession
import asyncio
# 配置公用 event loop
LOOP = asyncio.new_event_loop()
asyncio.set_event_loop(LOOP)
async def schedule(coros: List[Coroutine]):
"""
调度执行所有协程, 并返回一个结果列表
"""
tasks = [asyncio.ensure_future(coro) for coro in coros]
return await asyncio.gather(*tasks)
def execute(job: Coroutine):
"""
执行一个协程job
"""
future = asyncio.ensure_future(job, loop=LOOP)
LOOP.run_until_complete(future)
# 主下载函数
async def download():
async with ClientSession() as session:
urls = [] # get urls from somewhere
tasks = [sub_download(url, session) for url in urls]
results = await schedule(tasks)
# do something with results
# 次下载函数, 作为并发执行单元
async def sub_download(url: str, session: ClientSession):
async with session.get(url) as response:
data = await response.read()
# do something with data
return data
if __name__ == '__main__':
execute(download())
| 函数 | 说明 |
|---|---|
asyncio.ensure_future(coros) | 把一个协程对象包装成 future 对象 |
asyncio.gather(futures) | 返回一个聚合了 futures 结果的 future。任务会在事件循环中调度,不一定按照传入的顺序执行。 |
LOOP.run_until_complete(future) | 执行 event loop 直到 future 完成,返回 future 的结果或抛出异常 |
关键在于让协程对象(sub_download)们同时在事件循环中调度执行(schedule),这样会(近乎)同时发出 HTTP Request,(近乎)同时等待 HTTP Server 响应和接受 HTTP Response,因此就节省了大量的时间。
案例—下载1621个英雄联盟皮肤壁纸
以下载英雄联盟所有皮肤壁纸为例
from typing import Coroutine, List
import os
from aiohttp import ClientSession
import asyncio
import json
import logging
import time
LOGGER_FORMAT = '%(asctime)s %(levelname)s - %(message)s'
logging.basicConfig(format=LOGGER_FORMAT, level='INFO')
HERO_LIST_URL = 'https://game.gtimg.cn/images/lol/act/img/js/heroList/hero_list.js'
SKINS_URL = 'https://game.gtimg.cn/images/lol/act/img/js/hero/{}.js'
LOOP = asyncio.new_event_loop()
asyncio.set_event_loop(LOOP)
async def schedule(coros: List[Coroutine]):
"""
调度执行所有协程, 并返回一个结果列表
"""
tasks = [asyncio.ensure_future(coro) for coro in coros]
return await asyncio.gather(*tasks)
def execute(job: Coroutine):
"""
执行一个协程job
"""
future = asyncio.ensure_future(job, loop=LOOP)
LOOP.run_until_complete(future)
COUNTER = {}
async def async_download_skins():
async with ClientSession() as session:
hero_list = []
async with session.get(HERO_LIST_URL) as response:
data = await response.read()
hero_list = json.loads(data)['hero']
tasks = [download_save_hero_skins(hero['heroId'], session) for hero in hero_list]
await schedule(tasks)
logging.info("Hero total: %s, skins total: %s", len(COUNTER.keys()), sum(COUNTER.values()))
async def download_save_hero_skins(hero_id: int, session: ClientSession):
url = SKINS_URL.format(hero_id)
hero = {}
async with session.get(url) as response:
hero = json.loads(await response.read())
hero_name = hero['hero']['name'] + ' ' + hero['hero']['title']
path = os.path.join(os.curdir, hero_name)
os.makedirs(path, exist_ok=True)
tasks = []
for skin in hero['skins']:
if skin['mainImg']:
file_path = os.path.join(path, skin['name'] + ".jpg")
file_path = file_path.replace('/', '')
task = download_save_skin(skin['mainImg'], file_path, session)
tasks.append(task)
await schedule(tasks)
COUNTER[hero['hero']['title']] = len(tasks)
logging.info("Download OK: %s, %s skins", hero_name, len(tasks))
async def download_save_skin(url: str, file_path: str, session: ClientSession):
async with session.get(url) as response:
data = await response.read()
with open(file_path, 'wb') as f:
f.write(data)
if __name__ == '__main__':
path = "lol-skins"
os.makedirs(path, exist_ok=True)
os.chdir(path)
start = time.time()
execute(async_download_skins())
end = time.time()
logging.info("Speed time: %ss", round(end-start,2))
HTTP Request 数量: 1784 (1+162+1621)
总用时: 40.54s
平均每 Request 用时: 0.02s
发现当图片请求量大时,响应耗时会增加