Python并发编程:解锁程序性能的钥匙

24 阅读6分钟

想象一下,你在餐厅点餐:如果只有一个服务员,他必须依次处理每个顾客的点单,后面的人就要等待。但如果有多名服务员,就能同时服务多个顾客,效率大大提升。程序也是如此,并发编程让程序能够"同时"处理多个任务,极大提升性能和响应速度。

并发编程的三种模式

  1. 多线程:共享内存的协作
  2. 多进程:真正的并行执行
  3. 异步编程:单线程的高效切换

实战代码:构建高性能并发应用

多线程编程

import threading
import time
import queue
from typing import List

class ThreadPool:
    """简单的线程池实现"""
    
    def __init__(self, num_threads: int):
        self.num_threads = num_threads
        self.tasks = queue.Queue()
        self.threads = []
        self.results = []
        self.lock = threading.Lock()
        self._create_threads()
    
    def _create_threads(self):
        """创建工作线程"""
        for i in range(self.num_threads):
            thread = threading.Thread(target=self._worker, daemon=True)
            thread.start()
            self.threads.append(thread)
    
    def _worker(self):
        """工作线程函数"""
        while True:
            try:
                task_id, func, args, kwargs = self.tasks.get(timeout=1)
                result = func(*args, **kwargs)
                
                # 线程安全地保存结果
                with self.lock:
                    self.results.append((task_id, result))
                
                self.tasks.task_done()
            except queue.Empty:
                break
    
    def submit(self, task_id, func, *args, **kwargs):
        """提交任务到线程池"""
        self.tasks.put((task_id, func, args, kwargs))
    
    def wait_completion(self):
        """等待所有任务完成"""
        self.tasks.join()
        return self.results

# 使用线程池的示例
def download_file(url: str) -> str:
    """模拟下载文件(I/O密集型任务)"""
    print(f"开始下载: {url}")
    time.sleep(2)  # 模拟网络延迟
    print(f"完成下载: {url}")
    return f"{url} 的内容"

def process_data(data_id: int) -> dict:
    """模拟数据处理(CPU密集型任务)"""
    print(f"开始处理数据 {data_id}")
    time.sleep(1)  # 模拟计算时间
    result = {"id": data_id, "processed": True, "value": data_id * 10}
    print(f"完成处理数据 {data_id}")
    return result

def demo_thread_pool():
    """演示线程池的使用"""
    print("=== 线程池演示 ===")
    
    # 创建线程池
    pool = ThreadPool(3)
    
    # 提交下载任务
    urls = [
        "https://example.com/file1",
        "https://example.com/file2", 
        "https://example.com/file3",
        "https://example.com/file4",
        "https://example.com/file5"
    ]
    
    for i, url in enumerate(urls):
        pool.submit(f"download_{i}", download_file, url)
    
    # 提交数据处理任务
    for i in range(5):
        pool.submit(f"process_{i}", process_data, i)
    
    # 等待所有任务完成并获取结果
    results = pool.wait_completion()
    
    print(f"\n所有任务完成!共处理 {len(results)} 个任务")
    for task_id, result in results:
        print(f"任务 {task_id}: {result}")

# 运行演示
# demo_thread_pool()

多进程编程

import multiprocessing
import time
import os
from math import sqrt

def cpu_intensive_task(n: int) -> dict:
    """CPU密集型任务:计算大量数字的平方根"""
    pid = os.getpid()
    print(f"进程 {pid} 开始处理任务 {n}")
    
    start_time = time.time()
    results = []
    
    # 执行大量计算
    for i in range(n * 1000000):
        results.append(sqrt(i))
    
    execution_time = time.time() - start_time
    
    print(f"进程 {pid} 完成任务 {n}, 耗时: {execution_time:.2f}秒")
    return {
        "task_id": n,
        "process_id": pid,
        "execution_time": execution_time,
        "results_count": len(results)
    }

def demo_multiprocessing():
    """演示多进程的使用"""
    print("=== 多进程演示 ===")
    
    # 创建进程池
    num_processes = multiprocessing.cpu_count()
    print(f"系统有 {num_processes} 个CPU核心")
    
    # 使用进程池执行任务
    with multiprocessing.Pool(processes=num_processes) as pool:
        # 提交8个CPU密集型任务
        tasks = [1, 2, 3, 4, 5, 6, 7, 8]
        results = pool.map(cpu_intensive_task, tasks)
    
    print("\n所有任务完成!")
    for result in results:
        print(f"任务 {result['task_id']}: "
              f"进程 {result['process_id']}, "
              f"耗时 {result['execution_time']:.2f}秒")

def producer_consumer_pattern():
    """生产者-消费者模式演示"""
    
    def producer(queue, items):
        """生产者进程"""
        for item in items:
            print(f"生产者: 生产 {item}")
            queue.put(item)
            time.sleep(0.5)
        queue.put(None)  # 结束信号
    
    def consumer(queue, consumer_id):
        """消费者进程"""
        while True:
            item = queue.get()
            if item is None:
                queue.put(None)  # 让其他消费者也能结束
                break
            print(f"消费者 {consumer_id}: 处理 {item}")
            time.sleep(1)  # 模拟处理时间
    
    # 创建进程间队列
    shared_queue = multiprocessing.Queue()
    
    # 创建生产者进程
    producer_process = multiprocessing.Process(
        target=producer, 
        args=(shared_queue, ["A", "B", "C", "D", "E"])
    )
    
    # 创建消费者进程
    consumer_processes = []
    for i in range(2):
        process = multiprocessing.Process(
            target=consumer,
            args=(shared_queue, i)
        )
        consumer_processes.append(process)
    
    # 启动所有进程
    producer_process.start()
    for process in consumer_processes:
        process.start()
    
    # 等待所有进程完成
    producer_process.join()
    for process in consumer_processes:
        process.join()
    
    print("生产者-消费者模式演示完成")

# 运行演示
# demo_multiprocessing()
# producer_consumer_pattern()

异步编程

import asyncio
import aiohttp
import time
from typing import List

class AsyncDownloader:
    """异步下载器"""
    
    def __init__(self):
        self.session = None
    
    async def __aenter__(self):
        self.session = aiohttp.ClientSession()
        return self
    
    async def __aexit__(self, exc_type, exc_val, exc_tb):
        await self.session.close()
    
    async def download_url(self, url: str) -> dict:
        """异步下载单个URL"""
        try:
            async with self.session.get(url) as response:
                content = await response.text()
                return {
                    "url": url,
                    "status": response.status,
                    "content_length": len(content),
                    "success": True
                }
        except Exception as e:
            return {
                "url": url,
                "status": 0,
                "error": str(e),
                "success": False
            }
    
    async def download_multiple(self, urls: List[str]) -> List[dict]:
        """并发下载多个URL"""
        tasks = [self.download_url(url) for url in urls]
        results = await asyncio.gather(*tasks, return_exceptions=True)
        return results

async def demo_async_download():
    """演示异步下载"""
    print("=== 异步下载演示 ===")
    
    # 模拟的URL列表
    urls = [
        "https://httpbin.org/delay/1",
        "https://httpbin.org/delay/2", 
        "https://httpbin.org/delay/1",
        "https://httpbin.org/status/200",
        "https://httpbin.org/status/404"
    ] * 3  # 重复3次以增加任务数量
    
    start_time = time.time()
    
    async with AsyncDownloader() as downloader:
        results = await downloader.download_multiple(urls)
    
    end_time = time.time()
    
    print(f"下载完成!共 {len(urls)} 个URL,耗时: {end_time - start_time:.2f}秒")
    
    # 统计结果
    successful = sum(1 for r in results if r.get('success', False))
    failed = len(results) - successful
    
    print(f"成功: {successful}, 失败: {failed}")

async def async_producer_consumer():
    """异步生产者-消费者模式"""
    
    async def producer(queue: asyncio.Queue, count: int):
        """异步生产者"""
        for i in range(count):
            item = f"item_{i}"
            await queue.put(item)
            print(f"生产者: 生产 {item}")
            await asyncio.sleep(0.1)  # 模拟生产时间
        await queue.put(None)  # 结束信号
    
    async def consumer(queue: asyncio.Queue, consumer_id: int):
        """异步消费者"""
        while True:
            item = await queue.get()
            if item is None:
                await queue.put(None)  # 让其他消费者也能结束
                break
            print(f"消费者 {consumer_id}: 处理 {item}")
            await asyncio.sleep(0.2)  # 模拟处理时间
            queue.task_done()
    
    # 创建异步队列
    queue = asyncio.Queue(maxsize=5)
    
    # 创建并运行生产者和消费者任务
    producer_task = asyncio.create_task(producer(queue, 10))
    consumer_tasks = [
        asyncio.create_task(consumer(queue, i))
        for i in range(3)
    ]
    
    # 等待生产者完成
    await producer_task
    
    # 等待所有任务完成
    await queue.join()
    
    # 取消消费者任务
    for task in consumer_tasks:
        task.cancel()
    
    print("异步生产者-消费者模式演示完成")

# 运行异步演示
async def main():
    await demo_async_download()
    print("\n" + "="*50 + "\n")
    await async_producer_consumer()

# asyncio.run(main())

并发模式实战:Web API并发测试

import asyncio
import aiohttp
import threading
import multiprocessing
import time
from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor

class ConcurrentTester:
    """并发测试工具"""
    
    def __init__(self, target_url: str):
        self.target_url = target_url
        self.results = []
    
    def test_synchronous(self, requests_count: int) -> dict:
        """同步请求测试"""
        import requests
        
        start_time = time.time()
        successful = 0
        failed = 0
        
        for i in range(requests_count):
            try:
                response = requests.get(self.target_url, timeout=10)
                if response.status_code == 200:
                    successful += 1
                else:
                    failed += 1
            except:
                failed += 1
        
        execution_time = time.time() - start_time
        return {
            "method": "同步",
            "requests": requests_count,
            "successful": successful,
            "failed": failed,
            "time": execution_time,
            "requests_per_second": requests_count / execution_time
        }
    
    def test_threading(self, requests_count: int, num_threads: int) -> dict:
        """多线程测试"""
        start_time = time.time()
        
        def make_request(_):
            try:
                import requests
                response = requests.get(self.target_url, timeout=10)
                return response.status_code == 200
            except:
                return False
        
        with ThreadPoolExecutor(max_workers=num_threads) as executor:
            results = list(executor.map(make_request, range(requests_count)))
        
        execution_time = time.time() - start_time
        successful = sum(results)
        failed = requests_count - successful
        
        return {
            "method": f"多线程({num_threads}线程)",
            "requests": requests_count,
            "successful": successful,
            "failed": failed,
            "time": execution_time,
            "requests_per_second": requests_count / execution_time
        }
    
    async def test_async(self, requests_count: int) -> dict:
        """异步测试"""
        start_time = time.time()
        
        async def make_request(session):
            try:
                async with session.get(self.target_url) as response:
                    return response.status == 200
            except:
                return False
        
        async with aiohttp.ClientSession() as session:
            tasks = [make_request(session) for _ in range(requests_count)]
            results = await asyncio.gather(*tasks)
        
        execution_time = time.time() - start_time
        successful = sum(results)
        failed = requests_count - successful
        
        return {
            "method": "异步",
            "requests": requests_count,
            "successful": successful,
            "failed": failed,
            "time": execution_time,
            "requests_per_second": requests_count / execution_time
        }

async def run_concurrent_tests():
    """运行并发性能测试"""
    print("=== 并发性能测试 ===")
    
    tester = ConcurrentTester("https://httpbin.org/delay/1")
    requests_count = 20
    
    # 同步测试
    print("执行同步测试...")
    sync_result = tester.test_synchronous(requests_count)
    
    # 多线程测试
    print("执行多线程测试...")
    thread_result = tester.test_threading(requests_count, 5)
    
    # 异步测试
    print("执行异步测试...")
    async_result = await tester.test_async(requests_count)
    
    # 显示结果比较
    results = [sync_result, thread_result, async_result]
    
    print("\n性能测试结果:")
    print("-" * 80)
    print(f"{'方法':<20} {'请求数':<8} {'成功':<8} {'失败':<8} {'耗时(秒)':<10} {'QPS':<10}")
    print("-" * 80)
    
    for result in results:
        print(f"{result['method']:<20} {result['requests']:<8} "
              f"{result['successful']:<8} {result['failed']:<8} "
              f"{result['time']:<10.2f} {result['requests_per_second']:<10.2f}")
    
    # 找出性能最好的方法
    best = max(results, key=lambda x: x['requests_per_second'])
    print(f"\n最佳性能: {best['method']} - {best['requests_per_second']:.2f} QPS")

# 运行性能测试
# asyncio.run(run_concurrent_tests())

并发编程核心原则

  1. 正确性优先
  2. 选择合适的并发模型
  3. 资源管理
  4. 错误处理