Python 进程与线程

0 阅读16分钟

Python 进程与线程

目录


并发编程基础

什么是并发?

并发(Concurrency):多个任务在同一时间段内交替执行。 并行(Parallelism):多个任务在同一时刻同时执行。

# 串行执行
import time

def task(name, duration):
    print(f"{name} 开始")
    time.sleep(duration)
    print(f"{name} 结束")

start = time.time()
task("任务1", 2)
task("任务2", 3)
task("任务3", 1)
print(f"总耗时: {time.time() - start:.1f}秒")  # 约 6 秒

Python 的并发方式

  1. 多线程(Threading):适合 I/O 密集型任务
  2. 多进程(Multiprocessing):适合 CPU 密集型任务
  3. 异步编程(Asyncio):适合高并发 I/O 任务

线程基础

什么是线程?

线程是进程中的执行单元,一个进程可以包含多个线程,共享进程的内存空间。

优点

  • 轻量级,创建开销小
  • 共享内存,通信方便
  • 适合 I/O 密集型任务

缺点

  • 受 GIL 限制,不能真正并行执行 CPU 密集型任务
  • 需要处理线程安全问题
import threading

# 查看当前线程
print(threading.current_thread())
print(threading.active_count())  # 活跃线程数

创建和启动线程

方法1:使用 Thread 类

import threading
import time

def worker(name, duration):
    """工作函数"""
    print(f"线程 {name} 开始")
    time.sleep(duration)
    print(f"线程 {name} 结束")

# 创建线程
thread1 = threading.Thread(target=worker, args=("线程1", 2))
thread2 = threading.Thread(target=worker, args=("线程2", 3))

# 启动线程
thread1.start()
thread2.start()

# 等待线程完成
thread1.join()
thread2.join()

print("所有线程完成")

方法2:继承 Thread 类

import threading
import time

class MyThread(threading.Thread):
    def __init__(self, name, duration):
        super().__init__()
        self.name = name
        self.duration = duration

    def run(self):
        """线程执行的代码"""
        print(f"线程 {self.name} 开始")
        time.sleep(self.duration)
        print(f"线程 {self.name} 结束")

# 创建并启动线程
thread1 = MyThread("线程1", 2)
thread2 = MyThread("线程2", 3)

thread1.start()
thread2.start()

thread1.join()
thread2.join()

守护线程

import threading
import time

def daemon_worker():
    """守护线程工作函数"""
    while True:
        print("守护线程运行中...")
        time.sleep(1)

# 创建守护线程
daemon = threading.Thread(target=daemon_worker, daemon=True)
daemon.start()

# 主线程 sleep 3 秒后退出
time.sleep(3)
print("主线程退出,守护线程也会自动终止")

# 输出:
# 守护线程运行中... (3次)
# 主线程退出,守护线程也会自动终止

线程参数

import threading

def worker(name, delay, result_list):
    import time
    time.sleep(delay)
    result_list.append(f"{name} 完成")
    print(f"{name} 完成")

results = []
threads = []

for i in range(5):
    t = threading.Thread(target=worker, args=(f"任务{i}", i * 0.5, results))
    threads.append(t)
    t.start()

# 等待所有线程完成
for t in threads:
    t.join()

print(f"结果: {results}")

线程标识

import threading

def show_thread_info():
    thread = threading.current_thread()
    print(f"线程名: {thread.name}")
    print(f"线程ID: {thread.ident}")
    print(f"是否守护: {thread.daemon}")

t = threading.Thread(target=show_thread_info, name="自定义线程")
t.start()
t.join()

线程同步

为什么需要同步?

import threading

# ❌ 线程不安全示例
counter = 0

def increment():
    global counter
    for _ in range(100000):
        counter += 1

threads = [threading.Thread(target=increment) for _ in range(10)]
for t in threads:
    t.start()
for t in threads:
    t.join()

print(f"期望值: 1000000, 实际值: {counter}")
# 实际值通常小于 1000000(竞态条件)

Lock(互斥锁)

import threading

counter = 0
lock = threading.Lock()

def safe_increment():
    global counter
    for _ in range(100000):
        with lock:  # 自动获取和释放锁
            counter += 1

threads = [threading.Thread(target=safe_increment) for _ in range(10)]
for t in threads:
    t.start()
for t in threads:
    t.join()

print(f"期望值: 1000000, 实际值: {counter}")
# 实际值: 1000000 ✓

RLock(可重入锁)

import threading

rlock = threading.RLock()

def outer_function():
    with rlock:
        print("外层函数获得锁")
        inner_function()

def inner_function():
    with rlock:  # 同一线程可以再次获得锁
        print("内层函数获得锁")

thread = threading.Thread(target=outer_function)
thread.start()
thread.join()

Condition(条件变量)

import threading
import time

condition = threading.Condition()
items = []
MAX_ITEMS = 5

def producer():
    """生产者"""
    for i in range(10):
        with condition:
            while len(items) >= MAX_ITEMS:
                print("缓冲区已满,等待消费...")
                condition.wait()

            items.append(i)
            print(f"生产: {i}, 当前数量: {len(items)}")
            condition.notify_all()  # 通知消费者

        time.sleep(0.1)

def consumer():
    """消费者"""
    for _ in range(10):
        with condition:
            while not items:
                print("缓冲区为空,等待生产...")
                condition.wait()

            item = items.pop(0)
            print(f"消费: {item}, 当前数量: {len(items)}")
            condition.notify_all()  # 通知生产者

        time.sleep(0.2)

# 启动生产者和消费者
producer_thread = threading.Thread(target=producer)
consumer_thread = threading.Thread(target=consumer)

producer_thread.start()
consumer_thread.start()

producer_thread.join()
consumer_thread.join()

Event(事件)

import threading
import time

event = threading.Event()

def waiter():
    """等待事件"""
    print("等待事件触发...")
    event.wait()  # 阻塞直到事件被设置
    print("事件已触发,继续执行")

def setter():
    """设置事件"""
    time.sleep(2)
    print("设置事件")
    event.set()

waiter_thread = threading.Thread(target=waiter)
setter_thread = threading.Thread(target=setter)

waiter_thread.start()
setter_thread.start()

waiter_thread.join()
setter_thread.join()

Semaphore(信号量)

import threading
import time

# 限制同时访问的线程数为 3
semaphore = threading.Semaphore(3)

def worker(worker_id):
    with semaphore:
        print(f"工人 {worker_id} 开始工作")
        time.sleep(2)
        print(f"工人 {worker_id} 完成工作")

threads = [threading.Thread(target=worker, args=(i,)) for i in range(10)]
for t in threads:
    t.start()

for t in threads:
    t.join()

# 最多只有 3 个工人同时工作

Barrier(屏障)

import threading
import time

# 等待 3 个线程都到达屏障
barrier = threading.Barrier(3)

def worker(worker_id):
    print(f"工人 {worker_id} 准备就绪")
    barrier.wait()  # 等待其他线程
    print(f"工人 {worker_id} 开始工作")
    time.sleep(1)
    print(f"工人 {worker_id} 完成")

threads = [threading.Thread(target=worker, args=(i,)) for i in range(3)]
for t in threads:
    t.start()

for t in threads:
    t.join()

线程通信

Queue(队列)

import threading
import queue
import time

def producer(q, num_items):
    """生产者"""
    for i in range(num_items):
        item = f"物品{i}"
        q.put(item)
        print(f"生产: {item}")
        time.sleep(0.1)

    q.put(None)  # 发送结束信号

def consumer(q):
    """消费者"""
    while True:
        item = q.get()
        if item is None:
            break
        print(f"消费: {item}")
        time.sleep(0.2)
        q.task_done()

# 创建队列
q = queue.Queue(maxsize=10)

# 启动生产者和消费者
producer_thread = threading.Thread(target=producer, args=(q, 5))
consumer_thread = threading.Thread(target=consumer, args=(q,))

producer_thread.start()
consumer_thread.start()

producer_thread.join()
consumer_thread.join()

线程安全的列表

import threading
import queue

class ThreadSafeList:
    """线程安全的列表"""

    def __init__(self):
        self._list = []
        self._lock = threading.Lock()

    def append(self, item):
        with self._lock:
            self._list.append(item)

    def get_all(self):
        with self._lock:
            return self._list.copy()

# 使用
safe_list = ThreadSafeList()

def worker(worker_id):
    import time
    time.sleep(0.1)
    safe_list.append(f"数据{worker_id}")

threads = [threading.Thread(target=worker, args=(i,)) for i in range(10)]
for t in threads:
    t.start()
for t in threads:
    t.join()

print(safe_list.get_all())

线程池

ThreadPoolExecutor

from concurrent.futures import ThreadPoolExecutor, as_completed
import time

def task(name, duration):
    """任务函数"""
    print(f"{name} 开始")
    time.sleep(duration)
    print(f"{name} 结束")
    return f"{name} 的结果"

# 创建线程池(最多 5 个线程)
with ThreadPoolExecutor(max_workers=5) as executor:
    # 提交任务
    futures = {
        executor.submit(task, f"任务{i}", i % 3 + 1): i
        for i in range(10)
    }

    # 获取结果
    for future in as_completed(futures):
        task_id = futures[future]
        try:
            result = future.result()
            print(f"任务{task_id} 完成: {result}")
        except Exception as e:
            print(f"任务{task_id} 出错: {e}")

map 方法

from concurrent.futures import ThreadPoolExecutor
import time

def square(n):
    time.sleep(0.1)
    return n ** 2

numbers = list(range(10))

with ThreadPoolExecutor(max_workers=4) as executor:
    results = list(executor.map(square, numbers))

print(results)  # [0, 1, 4, 9, 16, 25, 36, 49, 64, 81]

回调函数

from concurrent.futures import ThreadPoolExecutor
import time

def task(n):
    time.sleep(1)
    return n ** 2

def callback(future):
    print(f"任务完成,结果: {future.result()}")

with ThreadPoolExecutor(max_workers=2) as executor:
    future = executor.submit(task, 5)
    future.add_done_callback(callback)

    # 等待完成
    future.result()

进程基础

什么是进程?

进程是操作系统资源分配的基本单位,每个进程有独立的内存空间。

优点

  • 真正的并行执行(不受 GIL 限制)
  • 稳定性好,一个进程崩溃不影响其他进程
  • 适合 CPU 密集型任务

缺点

  • 创建开销大
  • 进程间通信复杂
  • 占用更多内存
import multiprocessing
import os

# 查看进程信息
print(f"当前进程ID: {os.getpid()}")
print(f"父进程ID: {os.getppid()}")
print(f"CPU核心数: {multiprocessing.cpu_count()}")

创建和管理进程

方法1:使用 Process 类

import multiprocessing
import time

def worker(name, duration):
    """工作函数"""
    print(f"进程 {name} (PID: {multiprocessing.current_process().pid}) 开始")
    time.sleep(duration)
    print(f"进程 {name} 结束")

if __name__ == "__main__":
    # 创建进程
    process1 = multiprocessing.Process(target=worker, args=("进程1", 2))
    process2 = multiprocessing.Process(target=worker, args=("进程2", 3))

    # 启动进程
    process1.start()
    process2.start()

    # 等待进程完成
    process1.join()
    process2.join()

    print("所有进程完成")

方法2:继承 Process 类

import multiprocessing
import time

class MyProcess(multiprocessing.Process):
    def __init__(self, name, duration):
        super().__init__()
        self.name = name
        self.duration = duration

    def run(self):
        """进程执行的代码"""
        print(f"进程 {self.name} (PID: {self.pid}) 开始")
        time.sleep(self.duration)
        print(f"进程 {self.name} 结束")

if __name__ == "__main__":
    process1 = MyProcess("进程1", 2)
    process2 = MyProcess("进程2", 3)

    process1.start()
    process2.start()

    process1.join()
    process2.join()

守护进程

import multiprocessing
import time

def daemon_worker():
    """守护进程工作函数"""
    while True:
        print("守护进程运行中...")
        time.sleep(1)

if __name__ == "__main__":
    daemon = multiprocessing.Process(target=daemon_worker, daemon=True)
    daemon.start()

    time.sleep(3)
    print("主进程退出,守护进程也会终止")

进程池

import multiprocessing
import time

def worker(n):
    time.sleep(1)
    return n ** 2

if __name__ == "__main__":
    # 创建进程池(4个进程)
    with multiprocessing.Pool(processes=4) as pool:
        results = pool.map(worker, range(10))
        print(results)

进程间通信

Queue(队列)

import multiprocessing
import time

def producer(queue):
    """生产者"""
    for i in range(5):
        item = f"物品{i}"
        queue.put(item)
        print(f"生产: {item}")
        time.sleep(0.1)

def consumer(queue):
    """消费者"""
    while True:
        item = queue.get()
        if item is None:
            break
        print(f"消费: {item}")
        time.sleep(0.2)

if __name__ == "__main__":
    queue = multiprocessing.Queue()

    producer_proc = multiprocessing.Process(target=producer, args=(queue,))
    consumer_proc = multiprocessing.Process(target=consumer, args=(queue,))

    producer_proc.start()
    consumer_proc.start()

    producer_proc.join()
    queue.put(None)  # 发送结束信号
    consumer_proc.join()

Pipe(管道)

import multiprocessing

def sender(pipe):
    """发送端"""
    messages = ["消息1", "消息2", "消息3"]
    for msg in messages:
        pipe.send(msg)
        print(f"发送: {msg}")
    pipe.close()

def receiver(pipe):
    """接收端"""
    while True:
        try:
            msg = pipe.recv()
            print(f"接收: {msg}")
        except EOFError:
            break

if __name__ == "__main__":
    parent_conn, child_conn = multiprocessing.Pipe()

    sender_proc = multiprocessing.Process(target=sender, args=(parent_conn,))
    receiver_proc = multiprocessing.Process(target=receiver, args=(child_conn,))

    sender_proc.start()
    receiver_proc.start()

    sender_proc.join()
    receiver_proc.join()

Value 和 Array(共享内存)

import multiprocessing
import ctypes

def worker(counter, lock):
    """工作函数"""
    for _ in range(10000):
        with lock:
            counter.value += 1

if __name__ == "__main__":
    # 创建共享变量
    counter = multiprocessing.Value(ctypes.c_int, 0)
    lock = multiprocessing.Lock()

    processes = [
        multiprocessing.Process(target=worker, args=(counter, lock))
        for _ in range(10)
    ]

    for p in processes:
        p.start()
    for p in processes:
        p.join()

    print(f"计数器值: {counter.value}")  # 100000

Manager(管理器)

import multiprocessing

def worker(shared_dict, shared_list, key, value):
    """工作函数"""
    shared_dict[key] = value
    shared_list.append(value)

if __name__ == "__main__":
    with multiprocessing.Manager() as manager:
        # 创建共享数据结构
        shared_dict = manager.dict()
        shared_list = manager.list()

        processes = []
        for i in range(5):
            p = multiprocessing.Process(
                target=worker,
                args=(shared_dict, shared_list, f"key{i}", i * 10)
            )
            processes.append(p)
            p.start()

        for p in processes:
            p.join()

        print(f"字典: {dict(shared_dict)}")
        print(f"列表: {list(shared_list)}")

GIL 全局解释器锁

什么是 GIL?

GIL(Global Interpreter Lock)是 CPython 解释器的一个机制,确保同一时刻只有一个线程执行 Python 字节码。

影响

  • 多线程不能真正并行执行 CPU 密集型任务
  • I/O 密集型任务不受影响(I/O 操作会释放 GIL)

GIL 的影响测试

import threading
import multiprocessing
import time

# CPU 密集型任务
def cpu_bound_task(n):
    total = 0
    for i in range(n):
        total += i * i
    return total

def test_threading():
    """测试多线程"""
    start = time.time()
    threads = []
    for _ in range(4):
        t = threading.Thread(target=cpu_bound_task, args=(10**7,))
        threads.append(t)
        t.start()

    for t in threads:
        t.join()

    print(f"多线程耗时: {time.time() - start:.2f}秒")

def test_multiprocessing():
    """测试多进程"""
    start = time.time()
    processes = []
    for _ in range(4):
        p = multiprocessing.Process(target=cpu_bound_task, args=(10**7,))
        processes.append(p)
        p.start()

    for p in processes:
        p.join()

    print(f"多进程耗时: {time.time() - start:.2f}秒")

if __name__ == "__main__":
    test_threading()
    test_multiprocessing()

    # 输出示例:
    # 多线程耗时: 8.50秒(受 GIL 限制)
    # 多进程耗时: 2.20秒(真正并行)

绕过 GIL

# 方法1:使用多进程
import multiprocessing

# 方法2:使用 C 扩展(NumPy、SciPy 等)
import numpy as np

# 方法3:使用 asyncio(I/O 密集型)
import asyncio

# 方法4:使用其他 Python 实现(Jython、IronPython)

选择线程还是进程

决策指南

场景推荐方式原因
I/O 密集型(网络请求、文件读写)多线程/异步I/O 操作释放 GIL
CPU 密集型(计算、数据处理)多进程绕过 GIL,真正并行
需要共享大量数据多线程共享内存,通信简单
需要稳定性和隔离性多进程进程独立,互不影响
高并发网络服务异步编程轻量级,高效

对比总结

"""
线程 vs 进程对比

线程:
✓ 轻量级,创建快速
✓ 共享内存,通信方便
✓ 适合 I/O 密集型任务
✗ 受 GIL 限制
✗ 需要处理线程安全
✗ 一个线程崩溃可能影响整个进程

进程:
✓ 真正并行执行
✓ 稳定性好,隔离性强
✓ 适合 CPU 密集型任务
✗ 创建开销大
✗ 通信复杂
✗ 占用更多内存
"""

综合实战

实战1: 并发下载器

"""
并发文件下载器
展示线程池和进程池的应用
"""

import os
import time
import requests
from concurrent.futures import ThreadPoolExecutor, as_completed
from urllib.parse import urlparse

class ConcurrentDownloader:
    """并发下载器"""

    def __init__(self, max_workers=5, output_dir="downloads"):
        self.max_workers = max_workers
        self.output_dir = output_dir
        os.makedirs(output_dir, exist_ok=True)

    def download_file(self, url):
        """下载单个文件"""
        try:
            print(f"开始下载: {url}")
            response = requests.get(url, timeout=30)
            response.raise_for_status()

            # 获取文件名
            parsed_url = urlparse(url)
            filename = os.path.basename(parsed_url.path) or "download.bin"
            filepath = os.path.join(self.output_dir, filename)

            # 保存文件
            with open(filepath, 'wb') as f:
                f.write(response.content)

            file_size = len(response.content)
            print(f"✓ 下载完成: {filename} ({file_size / 1024:.1f} KB)")

            return {
                "url": url,
                "filename": filename,
                "size": file_size,
                "status": "success"
            }

        except Exception as e:
            print(f"✗ 下载失败: {url}, 错误: {e}")
            return {
                "url": url,
                "filename": None,
                "size": 0,
                "status": "failed",
                "error": str(e)
            }

    def download_multiple(self, urls):
        """并发下载多个文件"""
        results = []
        start_time = time.time()

        with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
            # 提交所有下载任务
            future_to_url = {
                executor.submit(self.download_file, url): url
                for url in urls
            }

            # 收集结果
            for future in as_completed(future_to_url):
                result = future.result()
                results.append(result)

        elapsed = time.time() - start_time

        # 统计
        success_count = sum(1 for r in results if r["status"] == "success")
        failed_count = len(results) - success_count
        total_size = sum(r["size"] for r in results)

        print(f"\n{'='*60}")
        print(f"下载完成统计:")
        print(f"  总数: {len(results)}")
        print(f"  成功: {success_count}")
        print(f"  失败: {failed_count}")
        print(f"  总大小: {total_size / 1024 / 1024:.2f} MB")
        print(f"  耗时: {elapsed:.2f} 秒")
        print(f"{'='*60}")

        return results

# 使用示例
def main():
    urls = [
        "https://httpbin.org/image/jpeg",
        "https://httpbin.org/image/png",
        "https://httpbin.org/bytes/1024",
        "https://httpbin.org/bytes/2048",
        "https://httpbin.org/bytes/4096",
    ]

    downloader = ConcurrentDownloader(max_workers=3)
    results = downloader.download_multiple(urls)

if __name__ == "__main__":
    main()

实战2: 并行数据处理

"""
并行数据处理系统
展示多进程在 CPU 密集型任务中的应用
"""

import multiprocessing
import time
import random
from concurrent.futures import ProcessPoolExecutor, as_completed

def generate_data(size):
    """生成随机数据"""
    return [random.randint(1, 1000) for _ in range(size)]

def process_chunk(data_chunk):
    """处理数据块(CPU 密集型)"""
    # 模拟复杂的计算
    result = 0
    for num in data_chunk:
        result += num ** 2

    # 排序
    sorted_data = sorted(data_chunk)

    # 统计
    stats = {
        "sum": sum(data_chunk),
        "mean": sum(data_chunk) / len(data_chunk),
        "min": min(data_chunk),
        "max": max(data_chunk),
        "square_sum": result,
        "count": len(data_chunk)
    }

    return stats

def merge_results(results):
    """合并多个结果"""
    merged = {
        "total_sum": 0,
        "total_count": 0,
        "total_square_sum": 0,
        "global_min": float('inf'),
        "global_max": float('-inf')
    }

    for result in results:
        merged["total_sum"] += result["sum"]
        merged["total_count"] += result["count"]
        merged["total_square_sum"] += result["square_sum"]
        merged["global_min"] = min(merged["global_min"], result["min"])
        merged["global_max"] = max(merged["global_max"], result["max"])

    merged["global_mean"] = merged["total_sum"] / merged["total_count"]

    return merged

def parallel_process_data(total_size=1000000, num_processes=4):
    """并行处理数据"""
    print(f"生成 {total_size} 条数据...")
    data = generate_data(total_size)

    # 分割数据
    chunk_size = total_size // num_processes
    chunks = [
        data[i:i + chunk_size]
        for i in range(0, total_size, chunk_size)
    ]

    print(f"分割为 {len(chunks)} 个数据块")
    print(f"开始并行处理...")

    start_time = time.time()

    # 使用进程池并行处理
    results = []
    with ProcessPoolExecutor(max_workers=num_processes) as executor:
        futures = [
            executor.submit(process_chunk, chunk)
            for chunk in chunks
        ]

        for future in as_completed(futures):
            result = future.result()
            results.append(result)

    elapsed = time.time() - start_time

    # 合并结果
    print("合并结果...")
    final_result = merge_results(results)

    print(f"\n{'='*60}")
    print(f"处理完成:")
    print(f"  数据总量: {final_result['total_count']}")
    print(f"  总和: {final_result['total_sum']}")
    print(f"  平均值: {final_result['global_mean']:.2f}")
    print(f"  最小值: {final_result['global_min']}")
    print(f"  最大值: {final_result['global_max']}")
    print(f"  平方和: {final_result['total_square_sum']}")
    print(f"  耗时: {elapsed:.2f} 秒")
    print(f"{'='*60}")

    return final_result

# 性能对比
def sequential_process_data(total_size=1000000):
    """串行处理数据(用于对比)"""
    print(f"生成 {total_size} 条数据...")
    data = generate_data(total_size)

    print("开始串行处理...")
    start_time = time.time()

    result = process_chunk(data)

    elapsed = time.time() - start_time

    print(f"\n{'='*60}")
    print(f"串行处理完成:")
    print(f"  数据总量: {result['count']}")
    print(f"  平均值: {result['mean']:.2f}")
    print(f"  耗时: {elapsed:.2f} 秒")
    print(f"{'='*60}")

if __name__ == "__main__":
    # 并行处理
    parallel_process_data(total_size=1000000, num_processes=4)

    print("\n" + "="*60 + "\n")

    # 串行处理(对比)
    sequential_process_data(total_size=1000000)

实战3: 生产者-消费者系统

"""
生产者-消费者系统
展示线程同步和通信的综合应用
"""

import threading
import queue
import time
import random
from collections import defaultdict

class TaskQueue:
    """任务队列"""

    def __init__(self, maxsize=100):
        self.queue = queue.Queue(maxsize=maxsize)
        self.task_counter = 0
        self.lock = threading.Lock()

    def add_task(self, task_data):
        """添加任务"""
        with self.lock:
            self.task_counter += 1
            task_id = self.task_counter

        self.queue.put((task_id, task_data))
        print(f"[生产者] 添加任务 #{task_id}")
        return task_id

    def get_task(self):
        """获取任务"""
        try:
            task_id, task_data = self.queue.get(timeout=1)
            return task_id, task_data
        except queue.Empty:
            return None, None

    def task_done(self):
        """标记任务完成"""
        self.queue.task_done()

    def join(self):
        """等待所有任务完成"""
        self.queue.join()

class WorkerPool:
    """工作者池"""

    def __init__(self, num_workers=3):
        self.num_workers = num_workers
        self.workers = []
        self.results = {}
        self.results_lock = threading.Lock()
        self.stats = defaultdict(int)
        self.stats_lock = threading.Lock()

    def start(self, task_queue, stop_event):
        """启动工作者"""
        for i in range(self.num_workers):
            worker = threading.Thread(
                target=self._worker_loop,
                args=(i, task_queue, stop_event),
                daemon=True
            )
            worker.start()
            self.workers.append(worker)
            print(f"[系统] 工作者 {i} 启动")

    def _worker_loop(self, worker_id, task_queue, stop_event):
        """工作者循环"""
        while not stop_event.is_set():
            task_id, task_data = task_queue.get_task()

            if task_id is None:
                continue

            try:
                # 处理任务
                result = self._process_task(worker_id, task_id, task_data)

                # 保存结果
                with self.results_lock:
                    self.results[task_id] = result

                # 更新统计
                with self.stats_lock:
                    self.stats["completed"] += 1

                print(f"[工作者{worker_id}] 完成任务 #{task_id}")
                task_queue.task_done()

            except Exception as e:
                print(f"[工作者{worker_id}] 任务 #{task_id} 失败: {e}")
                with self.stats_lock:
                    self.stats["failed"] += 1

    def _process_task(self, worker_id, task_id, task_data):
        """处理单个任务"""
        # 模拟不同类型的任务
        task_type = task_data.get("type", "compute")

        if task_type == "compute":
            # 计算任务
            n = task_data.get("value", 10)
            result = sum(i ** 2 for i in range(n))
            time.sleep(random.uniform(0.1, 0.5))
            return {"type": "compute", "input": n, "result": result}

        elif task_type == "transform":
            # 转换任务
            text = task_data.get("text", "")
            result = text.upper()
            time.sleep(random.uniform(0.05, 0.2))
            return {"type": "transform", "input": text, "result": result}

        else:
            raise ValueError(f"未知任务类型: {task_type}")

    def get_stats(self):
        """获取统计信息"""
        with self.stats_lock:
            return dict(self.stats)

    def get_results(self):
        """获取所有结果"""
        with self.results_lock:
            return dict(self.results)

class ProducerConsumerSystem:
    """生产者-消费者系统"""

    def __init__(self, num_workers=3, queue_size=100):
        self.task_queue = TaskQueue(maxsize=queue_size)
        self.worker_pool = WorkerPool(num_workers=num_workers)
        self.stop_event = threading.Event()
        self.producer_threads = []

    def start(self):
        """启动系统"""
        print("[系统] 启动生产者-消费者系统")
        self.worker_pool.start(self.task_queue, self.stop_event)

    def add_producer(self, producer_func, num_tasks=10):
        """添加生产者"""
        producer = threading.Thread(
            target=producer_func,
            args=(self.task_queue, num_tasks)
        )
        producer.start()
        self.producer_threads.append(producer)

    def wait_for_completion(self):
        """等待所有任务完成"""
        # 等待所有生产者完成
        for producer in self.producer_threads:
            producer.join()

        # 等待所有任务处理完成
        self.task_queue.join()

        # 停止工作者
        self.stop_event.set()

        print("[系统] 所有任务完成")

    def print_report(self):
        """打印报告"""
        stats = self.worker_pool.get_stats()
        results = self.worker_pool.get_results()

        print(f"\n{'='*60}")
        print(f"系统报告:")
        print(f"  完成任务数: {stats.get('completed', 0)}")
        print(f"  失败任务数: {stats.get('failed', 0)}")
        print(f"  总结果数: {len(results)}")
        print(f"{'='*60}")

        # 显示部分结果
        print("\n部分结果:")
        for task_id, result in list(results.items())[:5]:
            print(f"  任务 #{task_id}: {result}")
        print(f"{'='*60}")

# 生产者函数
def compute_producer(task_queue, num_tasks):
    """计算任务生产者"""
    for i in range(num_tasks):
        task_queue.add_task({
            "type": "compute",
            "value": random.randint(100, 1000)
        })
        time.sleep(random.uniform(0.05, 0.2))

def transform_producer(task_queue, num_tasks):
    """转换任务生产者"""
    texts = ["hello", "world", "python", "programming", "concurrency"]
    for i in range(num_tasks):
        task_queue.add_task({
            "type": "transform",
            "text": random.choice(texts)
        })
        time.sleep(random.uniform(0.05, 0.2))

# 使用示例
def main():
    system = ProducerConsumerSystem(num_workers=4, queue_size=50)

    # 启动系统
    system.start()

    # 添加生产者
    system.add_producer(compute_producer, num_tasks=10)
    system.add_producer(transform_producer, num_tasks=10)

    # 等待完成
    system.wait_for_completion()

    # 打印报告
    system.print_report()

if __name__ == "__main__":
    main()

小结

概念说明使用场景
线程轻量级执行单元I/O 密集型任务
进程独立执行单元CPU 密集型任务
Lock互斥锁保护共享资源
Condition条件变量生产者-消费者
Queue线程/进程安全队列任务分发
Event事件标志线程间信号
Semaphore信号量限制并发数
线程池复用线程大量短任务
进程池复用进程CPU 密集任务
GIL全局解释器锁CPython 限制

核心要点

  • I/O 密集型用线程,CPU 密集型用进程
  • 始终注意线程安全问题
  • 优先使用线程池/进程池
  • 合理使用同步原语
  • 理解 GIL 的限制
  • 避免死锁和竞态条件
  • 善用 Queue 进行通信

掌握并发编程将帮助你编写高性能的 Python 程序!