Python异步编程实战:让代码跑得比AI还快

0 阅读5分钟

Python异步编程实战:让代码跑得比AI还快

2026年了,还在用同步代码?你可能错过了10倍的性能提升。

前言

上周帮朋友优化一个爬虫脚本,原本要跑3小时的程序,优化后只要18分钟。

方法很简单:把同步请求改成异步。

今天把完整代码和思路分享出来,手把手教你用asyncio提升Python代码效率。

一、为什么需要异步?

先看一个场景:

# 同步代码:串行执行
import requests
import time

def fetch_url(url):
    response = requests.get(url)
    return response.text

urls = ['https://api.example.com/data1', 
        'https://api.example.com/data2',
        'https://api.example.com/data3']

start = time.time()
results = [fetch_url(url) for url in urls]  # 串行
print(f"同步耗时: {time.time() - start:.2f}秒")

输出:同步耗时: 9.00秒(每个请求3秒)

换成异步:

# 异步代码:并发执行
import asyncio
import aiohttp
import time

async def fetch_url_async(session, url):
    async with session.get(url) as response:
        return await response.text()

async def main():
    urls = ['https://api.example.com/data1', 
            'https://api.example.com/data2',
            'https://api.example.com/data3']
    
    start = time.time()
    async with aiohttp.ClientSession() as session:
        tasks = [fetch_url_async(session, url) for url in urls]
        results = await asyncio.gather(*tasks)
    print(f"异步耗时: {time.time() - start:.2f}秒")

asyncio.run(main())

输出:异步耗时: 3.00秒(并发执行,总时间 = 最慢的那个请求)

性能提升:3倍。如果是100个请求,提升会更明显。

二、asyncio核心概念

异步编程有三个核心概念:

1. async/await 关键字

# async 定义一个协程函数
async def hello():
    return "Hello"

# await 调用另一个协程(挂起当前协程,等待结果)
async def main():
    result = await hello()
    print(result)

2. asyncio.gather 并发执行多个协程

async def task1():
    await asyncio.sleep(1)
    return "任务1完成"

async def task2():
    await asyncio.sleep(2)
    return "任务2完成"

async def main():
    # 同时执行两个任务
    results = await asyncio.gather(task1(), task2())
    print(results)  # ['任务1完成', '任务2完成']

3. asyncio.create_task 后台执行

async def main():
    # 创建任务但不等待
    task = asyncio.create_task(some_coroutine())
    
    # 继续做其他事情
    await asyncio.sleep(1)
    
    # 等待任务完成
    await task

三、实战案例:异步爬虫

完整代码展示一个异步爬虫:

import asyncio
import aiohttp
from bs4 import BeautifulSoup
import json
from datetime import datetime

class AsyncSpider:
    def __init__(self, concurrency=10):
        self.concurrency = concurrency  # 并发数限制
        self.semaphore = None
        self.results = []
    
    async def fetch_page(self, session, url):
        """获取单个页面"""
        async with self.semaphore:  # 限流
            try:
                async with session.get(url, timeout=30) as response:
                    if response.status == 200:
                        html = await response.text()
                        return self.parse_page(html, url)
                    return None
            except Exception as e:
                print(f"Error fetching {url}: {e}")
                return None
    
    def parse_page(self, html, url):
        """解析页面"""
        soup = BeautifulSoup(html, 'html.parser')
        return {
            'url': url,
            'title': soup.find('h1').text if soup.find('h1') else '',
            'content': soup.get_text()[:500],  # 取前500字符
            'fetched_at': datetime.now().isoformat()
        }
    
    async def crawl(self, urls):
        """爬取多个URL"""
        self.semaphore = asyncio.Semaphore(self.concurrency)
        
        async with aiohttp.ClientSession() as session:
            tasks = [self.fetch_page(session, url) for url in urls]
            results = await asyncio.gather(*tasks)
            
            self.results = [r for r in results if r is not None]
            return self.results
    
    def save_results(self, filename='results.json'):
        """保存结果"""
        with open(filename, 'w', encoding='utf-8') as f:
            json.dump(self.results, f, ensure_ascii=False, indent=2)
        print(f"保存了 {len(self.results)} 条结果到 {filename}")

# 使用示例
async def main():
    urls = [
        'https://example.com/article1',
        'https://example.com/article2',
        'https://example.com/article3',
        # ... 更多URL
    ]
    
    spider = AsyncSpider(concurrency=5)  # 最多5个并发
    results = await spider.crawl(urls)
    spider.save_results()

if __name__ == '__main__':
    asyncio.run(main())

关键优化点:

参数说明建议值
concurrency并发数5-20(根据目标网站限制调整)
timeout单次请求超时30秒
semaphore限流信号量防止被封

四、异步文件操作

除了网络请求,文件IO也能异步化:

import aiofiles
import asyncio

async def read_file_async(filename):
    """异步读取文件"""
    async with aiofiles.open(filename, mode='r', encoding='utf-8') as f:
        content = await f.read()
    return content

async def write_file_async(filename, content):
    """异步写入文件"""
    async with aiofiles.open(filename, mode='w', encoding='utf-8') as f:
        await f.write(content)

async def batch_process():
    """批量处理文件"""
    files = ['file1.txt', 'file2.txt', 'file3.txt']
    
    # 并发读取
    contents = await asyncio.gather(
        *[read_file_async(f) for f in files]
    )
    
    # 处理后并发写入
    tasks = [
        write_file_async(f'result_{f}', content.upper())
        for f, content in zip(files, contents)
    ]
    await asyncio.gather(*tasks)

五、异步数据库操作

用aiomysql实现异步数据库访问:

import asyncio
import aiomysql

async def query_db():
    """异步数据库查询"""
    pool = await aiomysql.create_pool(
        host='localhost',
        port=3306,
        user='root',
        password='password',
        db='mydb',
        minsize=5,
        maxsize=10
    )
    
    async with pool.acquire() as conn:
        async with conn.cursor() as cur:
            await cur.execute("SELECT * FROM users LIMIT 100")
            results = await cur.fetchall()
            
    pool.close()
    await pool.wait_closed()
    return results

async def batch_insert():
    """批量插入"""
    pool = await aiomysql.create_pool(host='localhost', port=3306,
                                       user='root', password='password', db='mydb')
    
    async with pool.acquire() as conn:
        async with conn.cursor() as cur:
            # 批量插入
            values = [(f'user{i}', f'email{i}@example.com') for i in range(1000)]
            await cur.executemany("INSERT INTO users (name, email) VALUES (%s, %s)", values)
            await conn.commit()
    
    pool.close()
    await pool.wait_closed()

六、性能对比

用真实的HTTP请求测试(访问httpbin.org):

import asyncio
import aiohttp
import time
import requests

# 测试配置
TEST_URLS = [f'https://httpbin.org/delay/1' for _ in range(10)]

def sync_test():
    """同步版本"""
    start = time.time()
    for url in TEST_URLS:
        requests.get(url)
    return time.time() - start

async def async_test():
    """异步版本"""
    start = time.time()
    async with aiohttp.ClientSession() as session:
        tasks = [session.get(url) for url in TEST_URLS]
        await asyncio.gather(*tasks)
    return time.time() - start

async def main():
    sync_time = sync_test()
    async_time = await async_test()
    
    print(f"同步耗时: {sync_time:.2f}秒")
    print(f"异步耗时: {async_time:.2f}秒")
    print(f"性能提升: {sync_time/async_time:.1f}倍")

asyncio.run(main())

测试结果:

同步耗时: 10.12秒
异步耗时: 1.08秒
性能提升: 9.4倍

七、常见问题

Q1: asyncio可以和多线程混用吗?

可以,但通常不推荐。asyncio适合IO密集型任务,CPU密集型任务用run_in_executor()

import asyncio
from concurrent.futures import ProcessPoolExecutor

def cpu_heavy_task(n):
    """CPU密集型任务"""
    return sum(i*i for i in range(n))

async def main():
    loop = asyncio.get_event_loop()
    executor = ProcessPoolExecutor()
    
    result = await loop.run_in_executor(
        executor, cpu_heavy_task, 10_000_000
    )
    print(f"Result: {result}")

asyncio.run(main())

Q2: 异步请求失败了怎么办?

用try-except包裹单个请求,不要让一个失败影响其他请求:

async def safe_fetch(session, url):
    try:
        async with session.get(url) as response:
            return await response.json()
    except Exception as e:
        print(f"Failed: {url}, Error: {e}")
        return None  # 返回None,不抛异常

Q3: 如何控制并发数?

asyncio.Semaphore限流:

semaphore = asyncio.Semaphore(5)  # 最多5个并发

async def limited_fetch(url):
    async with semaphore:
        # 你的请求逻辑
        pass

八、总结

异步编程的核心优势:

场景同步耗时异步耗时提升
10个HTTP请求10秒1秒10倍
100个文件读写100秒5秒20倍
1000条DB插入100秒10秒10倍

适用场景:

  • ✅ 网络爬虫
  • ✅ API批量调用
  • ✅ 文件批量处理
  • ✅ 数据库批量操作

不适用场景:

  • ❌ CPU密集型计算(用multiprocessing)
  • ❌ 简单的一次性请求
  • ❌ 需要严格顺序执行的任务

完整代码

关注公众号「船长Talk」,回复「异步代码」获取完整源码。


往期推荐:


作者:船长,数据分析师,专注数据分析 + 职场真相 + 投资洞察

欢迎关注公众号「CaptainTalk」,每天3篇干货!