Python异步编程实战:让代码跑得比AI还快
2026年了,还在用同步代码?你可能错过了10倍的性能提升。
前言
上周帮朋友优化一个爬虫脚本,原本要跑3小时的程序,优化后只要18分钟。
方法很简单:把同步请求改成异步。
今天把完整代码和思路分享出来,手把手教你用asyncio提升Python代码效率。
一、为什么需要异步?
先看一个场景:
# 同步代码:串行执行
import requests
import time
def fetch_url(url):
response = requests.get(url)
return response.text
urls = ['https://api.example.com/data1',
'https://api.example.com/data2',
'https://api.example.com/data3']
start = time.time()
results = [fetch_url(url) for url in urls] # 串行
print(f"同步耗时: {time.time() - start:.2f}秒")
输出:同步耗时: 9.00秒(每个请求3秒)
换成异步:
# 异步代码:并发执行
import asyncio
import aiohttp
import time
async def fetch_url_async(session, url):
async with session.get(url) as response:
return await response.text()
async def main():
urls = ['https://api.example.com/data1',
'https://api.example.com/data2',
'https://api.example.com/data3']
start = time.time()
async with aiohttp.ClientSession() as session:
tasks = [fetch_url_async(session, url) for url in urls]
results = await asyncio.gather(*tasks)
print(f"异步耗时: {time.time() - start:.2f}秒")
asyncio.run(main())
输出:异步耗时: 3.00秒(并发执行,总时间 = 最慢的那个请求)
性能提升:3倍。如果是100个请求,提升会更明显。
二、asyncio核心概念
异步编程有三个核心概念:
1. async/await 关键字
# async 定义一个协程函数
async def hello():
return "Hello"
# await 调用另一个协程(挂起当前协程,等待结果)
async def main():
result = await hello()
print(result)
2. asyncio.gather 并发执行多个协程
async def task1():
await asyncio.sleep(1)
return "任务1完成"
async def task2():
await asyncio.sleep(2)
return "任务2完成"
async def main():
# 同时执行两个任务
results = await asyncio.gather(task1(), task2())
print(results) # ['任务1完成', '任务2完成']
3. asyncio.create_task 后台执行
async def main():
# 创建任务但不等待
task = asyncio.create_task(some_coroutine())
# 继续做其他事情
await asyncio.sleep(1)
# 等待任务完成
await task
三、实战案例:异步爬虫
完整代码展示一个异步爬虫:
import asyncio
import aiohttp
from bs4 import BeautifulSoup
import json
from datetime import datetime
class AsyncSpider:
def __init__(self, concurrency=10):
self.concurrency = concurrency # 并发数限制
self.semaphore = None
self.results = []
async def fetch_page(self, session, url):
"""获取单个页面"""
async with self.semaphore: # 限流
try:
async with session.get(url, timeout=30) as response:
if response.status == 200:
html = await response.text()
return self.parse_page(html, url)
return None
except Exception as e:
print(f"Error fetching {url}: {e}")
return None
def parse_page(self, html, url):
"""解析页面"""
soup = BeautifulSoup(html, 'html.parser')
return {
'url': url,
'title': soup.find('h1').text if soup.find('h1') else '',
'content': soup.get_text()[:500], # 取前500字符
'fetched_at': datetime.now().isoformat()
}
async def crawl(self, urls):
"""爬取多个URL"""
self.semaphore = asyncio.Semaphore(self.concurrency)
async with aiohttp.ClientSession() as session:
tasks = [self.fetch_page(session, url) for url in urls]
results = await asyncio.gather(*tasks)
self.results = [r for r in results if r is not None]
return self.results
def save_results(self, filename='results.json'):
"""保存结果"""
with open(filename, 'w', encoding='utf-8') as f:
json.dump(self.results, f, ensure_ascii=False, indent=2)
print(f"保存了 {len(self.results)} 条结果到 {filename}")
# 使用示例
async def main():
urls = [
'https://example.com/article1',
'https://example.com/article2',
'https://example.com/article3',
# ... 更多URL
]
spider = AsyncSpider(concurrency=5) # 最多5个并发
results = await spider.crawl(urls)
spider.save_results()
if __name__ == '__main__':
asyncio.run(main())
关键优化点:
| 参数 | 说明 | 建议值 |
|---|---|---|
concurrency | 并发数 | 5-20(根据目标网站限制调整) |
timeout | 单次请求超时 | 30秒 |
semaphore | 限流信号量 | 防止被封 |
四、异步文件操作
除了网络请求,文件IO也能异步化:
import aiofiles
import asyncio
async def read_file_async(filename):
"""异步读取文件"""
async with aiofiles.open(filename, mode='r', encoding='utf-8') as f:
content = await f.read()
return content
async def write_file_async(filename, content):
"""异步写入文件"""
async with aiofiles.open(filename, mode='w', encoding='utf-8') as f:
await f.write(content)
async def batch_process():
"""批量处理文件"""
files = ['file1.txt', 'file2.txt', 'file3.txt']
# 并发读取
contents = await asyncio.gather(
*[read_file_async(f) for f in files]
)
# 处理后并发写入
tasks = [
write_file_async(f'result_{f}', content.upper())
for f, content in zip(files, contents)
]
await asyncio.gather(*tasks)
五、异步数据库操作
用aiomysql实现异步数据库访问:
import asyncio
import aiomysql
async def query_db():
"""异步数据库查询"""
pool = await aiomysql.create_pool(
host='localhost',
port=3306,
user='root',
password='password',
db='mydb',
minsize=5,
maxsize=10
)
async with pool.acquire() as conn:
async with conn.cursor() as cur:
await cur.execute("SELECT * FROM users LIMIT 100")
results = await cur.fetchall()
pool.close()
await pool.wait_closed()
return results
async def batch_insert():
"""批量插入"""
pool = await aiomysql.create_pool(host='localhost', port=3306,
user='root', password='password', db='mydb')
async with pool.acquire() as conn:
async with conn.cursor() as cur:
# 批量插入
values = [(f'user{i}', f'email{i}@example.com') for i in range(1000)]
await cur.executemany("INSERT INTO users (name, email) VALUES (%s, %s)", values)
await conn.commit()
pool.close()
await pool.wait_closed()
六、性能对比
用真实的HTTP请求测试(访问httpbin.org):
import asyncio
import aiohttp
import time
import requests
# 测试配置
TEST_URLS = [f'https://httpbin.org/delay/1' for _ in range(10)]
def sync_test():
"""同步版本"""
start = time.time()
for url in TEST_URLS:
requests.get(url)
return time.time() - start
async def async_test():
"""异步版本"""
start = time.time()
async with aiohttp.ClientSession() as session:
tasks = [session.get(url) for url in TEST_URLS]
await asyncio.gather(*tasks)
return time.time() - start
async def main():
sync_time = sync_test()
async_time = await async_test()
print(f"同步耗时: {sync_time:.2f}秒")
print(f"异步耗时: {async_time:.2f}秒")
print(f"性能提升: {sync_time/async_time:.1f}倍")
asyncio.run(main())
测试结果:
同步耗时: 10.12秒
异步耗时: 1.08秒
性能提升: 9.4倍
七、常见问题
Q1: asyncio可以和多线程混用吗?
可以,但通常不推荐。asyncio适合IO密集型任务,CPU密集型任务用run_in_executor():
import asyncio
from concurrent.futures import ProcessPoolExecutor
def cpu_heavy_task(n):
"""CPU密集型任务"""
return sum(i*i for i in range(n))
async def main():
loop = asyncio.get_event_loop()
executor = ProcessPoolExecutor()
result = await loop.run_in_executor(
executor, cpu_heavy_task, 10_000_000
)
print(f"Result: {result}")
asyncio.run(main())
Q2: 异步请求失败了怎么办?
用try-except包裹单个请求,不要让一个失败影响其他请求:
async def safe_fetch(session, url):
try:
async with session.get(url) as response:
return await response.json()
except Exception as e:
print(f"Failed: {url}, Error: {e}")
return None # 返回None,不抛异常
Q3: 如何控制并发数?
用asyncio.Semaphore限流:
semaphore = asyncio.Semaphore(5) # 最多5个并发
async def limited_fetch(url):
async with semaphore:
# 你的请求逻辑
pass
八、总结
异步编程的核心优势:
| 场景 | 同步耗时 | 异步耗时 | 提升 |
|---|---|---|---|
| 10个HTTP请求 | 10秒 | 1秒 | 10倍 |
| 100个文件读写 | 100秒 | 5秒 | 20倍 |
| 1000条DB插入 | 100秒 | 10秒 | 10倍 |
适用场景:
- ✅ 网络爬虫
- ✅ API批量调用
- ✅ 文件批量处理
- ✅ 数据库批量操作
不适用场景:
- ❌ CPU密集型计算(用multiprocessing)
- ❌ 简单的一次性请求
- ❌ 需要严格顺序执行的任务
完整代码
关注公众号「船长Talk」,回复「异步代码」获取完整源码。
往期推荐:
作者:船长,数据分析师,专注数据分析 + 职场真相 + 投资洞察
欢迎关注公众号「CaptainTalk」,每天3篇干货!