Python 进程与线程
目录
并发编程基础
什么是并发?
并发(Concurrency):多个任务在同一时间段内交替执行。 并行(Parallelism):多个任务在同一时刻同时执行。
# 串行执行
import time
def task(name, duration):
print(f"{name} 开始")
time.sleep(duration)
print(f"{name} 结束")
start = time.time()
task("任务1", 2)
task("任务2", 3)
task("任务3", 1)
print(f"总耗时: {time.time() - start:.1f}秒") # 约 6 秒
Python 的并发方式
- 多线程(Threading):适合 I/O 密集型任务
- 多进程(Multiprocessing):适合 CPU 密集型任务
- 异步编程(Asyncio):适合高并发 I/O 任务
线程基础
什么是线程?
线程是进程中的执行单元,一个进程可以包含多个线程,共享进程的内存空间。
优点:
- 轻量级,创建开销小
- 共享内存,通信方便
- 适合 I/O 密集型任务
缺点:
- 受 GIL 限制,不能真正并行执行 CPU 密集型任务
- 需要处理线程安全问题
import threading
# 查看当前线程
print(threading.current_thread())
print(threading.active_count()) # 活跃线程数
创建和启动线程
方法1:使用 Thread 类
import threading
import time
def worker(name, duration):
"""工作函数"""
print(f"线程 {name} 开始")
time.sleep(duration)
print(f"线程 {name} 结束")
# 创建线程
thread1 = threading.Thread(target=worker, args=("线程1", 2))
thread2 = threading.Thread(target=worker, args=("线程2", 3))
# 启动线程
thread1.start()
thread2.start()
# 等待线程完成
thread1.join()
thread2.join()
print("所有线程完成")
方法2:继承 Thread 类
import threading
import time
class MyThread(threading.Thread):
def __init__(self, name, duration):
super().__init__()
self.name = name
self.duration = duration
def run(self):
"""线程执行的代码"""
print(f"线程 {self.name} 开始")
time.sleep(self.duration)
print(f"线程 {self.name} 结束")
# 创建并启动线程
thread1 = MyThread("线程1", 2)
thread2 = MyThread("线程2", 3)
thread1.start()
thread2.start()
thread1.join()
thread2.join()
守护线程
import threading
import time
def daemon_worker():
"""守护线程工作函数"""
while True:
print("守护线程运行中...")
time.sleep(1)
# 创建守护线程
daemon = threading.Thread(target=daemon_worker, daemon=True)
daemon.start()
# 主线程 sleep 3 秒后退出
time.sleep(3)
print("主线程退出,守护线程也会自动终止")
# 输出:
# 守护线程运行中... (3次)
# 主线程退出,守护线程也会自动终止
线程参数
import threading
def worker(name, delay, result_list):
import time
time.sleep(delay)
result_list.append(f"{name} 完成")
print(f"{name} 完成")
results = []
threads = []
for i in range(5):
t = threading.Thread(target=worker, args=(f"任务{i}", i * 0.5, results))
threads.append(t)
t.start()
# 等待所有线程完成
for t in threads:
t.join()
print(f"结果: {results}")
线程标识
import threading
def show_thread_info():
thread = threading.current_thread()
print(f"线程名: {thread.name}")
print(f"线程ID: {thread.ident}")
print(f"是否守护: {thread.daemon}")
t = threading.Thread(target=show_thread_info, name="自定义线程")
t.start()
t.join()
线程同步
为什么需要同步?
import threading
# ❌ 线程不安全示例
counter = 0
def increment():
global counter
for _ in range(100000):
counter += 1
threads = [threading.Thread(target=increment) for _ in range(10)]
for t in threads:
t.start()
for t in threads:
t.join()
print(f"期望值: 1000000, 实际值: {counter}")
# 实际值通常小于 1000000(竞态条件)
Lock(互斥锁)
import threading
counter = 0
lock = threading.Lock()
def safe_increment():
global counter
for _ in range(100000):
with lock: # 自动获取和释放锁
counter += 1
threads = [threading.Thread(target=safe_increment) for _ in range(10)]
for t in threads:
t.start()
for t in threads:
t.join()
print(f"期望值: 1000000, 实际值: {counter}")
# 实际值: 1000000 ✓
RLock(可重入锁)
import threading
rlock = threading.RLock()
def outer_function():
with rlock:
print("外层函数获得锁")
inner_function()
def inner_function():
with rlock: # 同一线程可以再次获得锁
print("内层函数获得锁")
thread = threading.Thread(target=outer_function)
thread.start()
thread.join()
Condition(条件变量)
import threading
import time
condition = threading.Condition()
items = []
MAX_ITEMS = 5
def producer():
"""生产者"""
for i in range(10):
with condition:
while len(items) >= MAX_ITEMS:
print("缓冲区已满,等待消费...")
condition.wait()
items.append(i)
print(f"生产: {i}, 当前数量: {len(items)}")
condition.notify_all() # 通知消费者
time.sleep(0.1)
def consumer():
"""消费者"""
for _ in range(10):
with condition:
while not items:
print("缓冲区为空,等待生产...")
condition.wait()
item = items.pop(0)
print(f"消费: {item}, 当前数量: {len(items)}")
condition.notify_all() # 通知生产者
time.sleep(0.2)
# 启动生产者和消费者
producer_thread = threading.Thread(target=producer)
consumer_thread = threading.Thread(target=consumer)
producer_thread.start()
consumer_thread.start()
producer_thread.join()
consumer_thread.join()
Event(事件)
import threading
import time
event = threading.Event()
def waiter():
"""等待事件"""
print("等待事件触发...")
event.wait() # 阻塞直到事件被设置
print("事件已触发,继续执行")
def setter():
"""设置事件"""
time.sleep(2)
print("设置事件")
event.set()
waiter_thread = threading.Thread(target=waiter)
setter_thread = threading.Thread(target=setter)
waiter_thread.start()
setter_thread.start()
waiter_thread.join()
setter_thread.join()
Semaphore(信号量)
import threading
import time
# 限制同时访问的线程数为 3
semaphore = threading.Semaphore(3)
def worker(worker_id):
with semaphore:
print(f"工人 {worker_id} 开始工作")
time.sleep(2)
print(f"工人 {worker_id} 完成工作")
threads = [threading.Thread(target=worker, args=(i,)) for i in range(10)]
for t in threads:
t.start()
for t in threads:
t.join()
# 最多只有 3 个工人同时工作
Barrier(屏障)
import threading
import time
# 等待 3 个线程都到达屏障
barrier = threading.Barrier(3)
def worker(worker_id):
print(f"工人 {worker_id} 准备就绪")
barrier.wait() # 等待其他线程
print(f"工人 {worker_id} 开始工作")
time.sleep(1)
print(f"工人 {worker_id} 完成")
threads = [threading.Thread(target=worker, args=(i,)) for i in range(3)]
for t in threads:
t.start()
for t in threads:
t.join()
线程通信
Queue(队列)
import threading
import queue
import time
def producer(q, num_items):
"""生产者"""
for i in range(num_items):
item = f"物品{i}"
q.put(item)
print(f"生产: {item}")
time.sleep(0.1)
q.put(None) # 发送结束信号
def consumer(q):
"""消费者"""
while True:
item = q.get()
if item is None:
break
print(f"消费: {item}")
time.sleep(0.2)
q.task_done()
# 创建队列
q = queue.Queue(maxsize=10)
# 启动生产者和消费者
producer_thread = threading.Thread(target=producer, args=(q, 5))
consumer_thread = threading.Thread(target=consumer, args=(q,))
producer_thread.start()
consumer_thread.start()
producer_thread.join()
consumer_thread.join()
线程安全的列表
import threading
import queue
class ThreadSafeList:
"""线程安全的列表"""
def __init__(self):
self._list = []
self._lock = threading.Lock()
def append(self, item):
with self._lock:
self._list.append(item)
def get_all(self):
with self._lock:
return self._list.copy()
# 使用
safe_list = ThreadSafeList()
def worker(worker_id):
import time
time.sleep(0.1)
safe_list.append(f"数据{worker_id}")
threads = [threading.Thread(target=worker, args=(i,)) for i in range(10)]
for t in threads:
t.start()
for t in threads:
t.join()
print(safe_list.get_all())
线程池
ThreadPoolExecutor
from concurrent.futures import ThreadPoolExecutor, as_completed
import time
def task(name, duration):
"""任务函数"""
print(f"{name} 开始")
time.sleep(duration)
print(f"{name} 结束")
return f"{name} 的结果"
# 创建线程池(最多 5 个线程)
with ThreadPoolExecutor(max_workers=5) as executor:
# 提交任务
futures = {
executor.submit(task, f"任务{i}", i % 3 + 1): i
for i in range(10)
}
# 获取结果
for future in as_completed(futures):
task_id = futures[future]
try:
result = future.result()
print(f"任务{task_id} 完成: {result}")
except Exception as e:
print(f"任务{task_id} 出错: {e}")
map 方法
from concurrent.futures import ThreadPoolExecutor
import time
def square(n):
time.sleep(0.1)
return n ** 2
numbers = list(range(10))
with ThreadPoolExecutor(max_workers=4) as executor:
results = list(executor.map(square, numbers))
print(results) # [0, 1, 4, 9, 16, 25, 36, 49, 64, 81]
回调函数
from concurrent.futures import ThreadPoolExecutor
import time
def task(n):
time.sleep(1)
return n ** 2
def callback(future):
print(f"任务完成,结果: {future.result()}")
with ThreadPoolExecutor(max_workers=2) as executor:
future = executor.submit(task, 5)
future.add_done_callback(callback)
# 等待完成
future.result()
进程基础
什么是进程?
进程是操作系统资源分配的基本单位,每个进程有独立的内存空间。
优点:
- 真正的并行执行(不受 GIL 限制)
- 稳定性好,一个进程崩溃不影响其他进程
- 适合 CPU 密集型任务
缺点:
- 创建开销大
- 进程间通信复杂
- 占用更多内存
import multiprocessing
import os
# 查看进程信息
print(f"当前进程ID: {os.getpid()}")
print(f"父进程ID: {os.getppid()}")
print(f"CPU核心数: {multiprocessing.cpu_count()}")
创建和管理进程
方法1:使用 Process 类
import multiprocessing
import time
def worker(name, duration):
"""工作函数"""
print(f"进程 {name} (PID: {multiprocessing.current_process().pid}) 开始")
time.sleep(duration)
print(f"进程 {name} 结束")
if __name__ == "__main__":
# 创建进程
process1 = multiprocessing.Process(target=worker, args=("进程1", 2))
process2 = multiprocessing.Process(target=worker, args=("进程2", 3))
# 启动进程
process1.start()
process2.start()
# 等待进程完成
process1.join()
process2.join()
print("所有进程完成")
方法2:继承 Process 类
import multiprocessing
import time
class MyProcess(multiprocessing.Process):
def __init__(self, name, duration):
super().__init__()
self.name = name
self.duration = duration
def run(self):
"""进程执行的代码"""
print(f"进程 {self.name} (PID: {self.pid}) 开始")
time.sleep(self.duration)
print(f"进程 {self.name} 结束")
if __name__ == "__main__":
process1 = MyProcess("进程1", 2)
process2 = MyProcess("进程2", 3)
process1.start()
process2.start()
process1.join()
process2.join()
守护进程
import multiprocessing
import time
def daemon_worker():
"""守护进程工作函数"""
while True:
print("守护进程运行中...")
time.sleep(1)
if __name__ == "__main__":
daemon = multiprocessing.Process(target=daemon_worker, daemon=True)
daemon.start()
time.sleep(3)
print("主进程退出,守护进程也会终止")
进程池
import multiprocessing
import time
def worker(n):
time.sleep(1)
return n ** 2
if __name__ == "__main__":
# 创建进程池(4个进程)
with multiprocessing.Pool(processes=4) as pool:
results = pool.map(worker, range(10))
print(results)
进程间通信
Queue(队列)
import multiprocessing
import time
def producer(queue):
"""生产者"""
for i in range(5):
item = f"物品{i}"
queue.put(item)
print(f"生产: {item}")
time.sleep(0.1)
def consumer(queue):
"""消费者"""
while True:
item = queue.get()
if item is None:
break
print(f"消费: {item}")
time.sleep(0.2)
if __name__ == "__main__":
queue = multiprocessing.Queue()
producer_proc = multiprocessing.Process(target=producer, args=(queue,))
consumer_proc = multiprocessing.Process(target=consumer, args=(queue,))
producer_proc.start()
consumer_proc.start()
producer_proc.join()
queue.put(None) # 发送结束信号
consumer_proc.join()
Pipe(管道)
import multiprocessing
def sender(pipe):
"""发送端"""
messages = ["消息1", "消息2", "消息3"]
for msg in messages:
pipe.send(msg)
print(f"发送: {msg}")
pipe.close()
def receiver(pipe):
"""接收端"""
while True:
try:
msg = pipe.recv()
print(f"接收: {msg}")
except EOFError:
break
if __name__ == "__main__":
parent_conn, child_conn = multiprocessing.Pipe()
sender_proc = multiprocessing.Process(target=sender, args=(parent_conn,))
receiver_proc = multiprocessing.Process(target=receiver, args=(child_conn,))
sender_proc.start()
receiver_proc.start()
sender_proc.join()
receiver_proc.join()
Value 和 Array(共享内存)
import multiprocessing
import ctypes
def worker(counter, lock):
"""工作函数"""
for _ in range(10000):
with lock:
counter.value += 1
if __name__ == "__main__":
# 创建共享变量
counter = multiprocessing.Value(ctypes.c_int, 0)
lock = multiprocessing.Lock()
processes = [
multiprocessing.Process(target=worker, args=(counter, lock))
for _ in range(10)
]
for p in processes:
p.start()
for p in processes:
p.join()
print(f"计数器值: {counter.value}") # 100000
Manager(管理器)
import multiprocessing
def worker(shared_dict, shared_list, key, value):
"""工作函数"""
shared_dict[key] = value
shared_list.append(value)
if __name__ == "__main__":
with multiprocessing.Manager() as manager:
# 创建共享数据结构
shared_dict = manager.dict()
shared_list = manager.list()
processes = []
for i in range(5):
p = multiprocessing.Process(
target=worker,
args=(shared_dict, shared_list, f"key{i}", i * 10)
)
processes.append(p)
p.start()
for p in processes:
p.join()
print(f"字典: {dict(shared_dict)}")
print(f"列表: {list(shared_list)}")
GIL 全局解释器锁
什么是 GIL?
GIL(Global Interpreter Lock)是 CPython 解释器的一个机制,确保同一时刻只有一个线程执行 Python 字节码。
影响:
- 多线程不能真正并行执行 CPU 密集型任务
- I/O 密集型任务不受影响(I/O 操作会释放 GIL)
GIL 的影响测试
import threading
import multiprocessing
import time
# CPU 密集型任务
def cpu_bound_task(n):
total = 0
for i in range(n):
total += i * i
return total
def test_threading():
"""测试多线程"""
start = time.time()
threads = []
for _ in range(4):
t = threading.Thread(target=cpu_bound_task, args=(10**7,))
threads.append(t)
t.start()
for t in threads:
t.join()
print(f"多线程耗时: {time.time() - start:.2f}秒")
def test_multiprocessing():
"""测试多进程"""
start = time.time()
processes = []
for _ in range(4):
p = multiprocessing.Process(target=cpu_bound_task, args=(10**7,))
processes.append(p)
p.start()
for p in processes:
p.join()
print(f"多进程耗时: {time.time() - start:.2f}秒")
if __name__ == "__main__":
test_threading()
test_multiprocessing()
# 输出示例:
# 多线程耗时: 8.50秒(受 GIL 限制)
# 多进程耗时: 2.20秒(真正并行)
绕过 GIL
# 方法1:使用多进程
import multiprocessing
# 方法2:使用 C 扩展(NumPy、SciPy 等)
import numpy as np
# 方法3:使用 asyncio(I/O 密集型)
import asyncio
# 方法4:使用其他 Python 实现(Jython、IronPython)
选择线程还是进程
决策指南
| 场景 | 推荐方式 | 原因 |
|---|---|---|
| I/O 密集型(网络请求、文件读写) | 多线程/异步 | I/O 操作释放 GIL |
| CPU 密集型(计算、数据处理) | 多进程 | 绕过 GIL,真正并行 |
| 需要共享大量数据 | 多线程 | 共享内存,通信简单 |
| 需要稳定性和隔离性 | 多进程 | 进程独立,互不影响 |
| 高并发网络服务 | 异步编程 | 轻量级,高效 |
对比总结
"""
线程 vs 进程对比
线程:
✓ 轻量级,创建快速
✓ 共享内存,通信方便
✓ 适合 I/O 密集型任务
✗ 受 GIL 限制
✗ 需要处理线程安全
✗ 一个线程崩溃可能影响整个进程
进程:
✓ 真正并行执行
✓ 稳定性好,隔离性强
✓ 适合 CPU 密集型任务
✗ 创建开销大
✗ 通信复杂
✗ 占用更多内存
"""
综合实战
实战1: 并发下载器
"""
并发文件下载器
展示线程池和进程池的应用
"""
import os
import time
import requests
from concurrent.futures import ThreadPoolExecutor, as_completed
from urllib.parse import urlparse
class ConcurrentDownloader:
"""并发下载器"""
def __init__(self, max_workers=5, output_dir="downloads"):
self.max_workers = max_workers
self.output_dir = output_dir
os.makedirs(output_dir, exist_ok=True)
def download_file(self, url):
"""下载单个文件"""
try:
print(f"开始下载: {url}")
response = requests.get(url, timeout=30)
response.raise_for_status()
# 获取文件名
parsed_url = urlparse(url)
filename = os.path.basename(parsed_url.path) or "download.bin"
filepath = os.path.join(self.output_dir, filename)
# 保存文件
with open(filepath, 'wb') as f:
f.write(response.content)
file_size = len(response.content)
print(f"✓ 下载完成: {filename} ({file_size / 1024:.1f} KB)")
return {
"url": url,
"filename": filename,
"size": file_size,
"status": "success"
}
except Exception as e:
print(f"✗ 下载失败: {url}, 错误: {e}")
return {
"url": url,
"filename": None,
"size": 0,
"status": "failed",
"error": str(e)
}
def download_multiple(self, urls):
"""并发下载多个文件"""
results = []
start_time = time.time()
with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
# 提交所有下载任务
future_to_url = {
executor.submit(self.download_file, url): url
for url in urls
}
# 收集结果
for future in as_completed(future_to_url):
result = future.result()
results.append(result)
elapsed = time.time() - start_time
# 统计
success_count = sum(1 for r in results if r["status"] == "success")
failed_count = len(results) - success_count
total_size = sum(r["size"] for r in results)
print(f"\n{'='*60}")
print(f"下载完成统计:")
print(f" 总数: {len(results)}")
print(f" 成功: {success_count}")
print(f" 失败: {failed_count}")
print(f" 总大小: {total_size / 1024 / 1024:.2f} MB")
print(f" 耗时: {elapsed:.2f} 秒")
print(f"{'='*60}")
return results
# 使用示例
def main():
urls = [
"https://httpbin.org/image/jpeg",
"https://httpbin.org/image/png",
"https://httpbin.org/bytes/1024",
"https://httpbin.org/bytes/2048",
"https://httpbin.org/bytes/4096",
]
downloader = ConcurrentDownloader(max_workers=3)
results = downloader.download_multiple(urls)
if __name__ == "__main__":
main()
实战2: 并行数据处理
"""
并行数据处理系统
展示多进程在 CPU 密集型任务中的应用
"""
import multiprocessing
import time
import random
from concurrent.futures import ProcessPoolExecutor, as_completed
def generate_data(size):
"""生成随机数据"""
return [random.randint(1, 1000) for _ in range(size)]
def process_chunk(data_chunk):
"""处理数据块(CPU 密集型)"""
# 模拟复杂的计算
result = 0
for num in data_chunk:
result += num ** 2
# 排序
sorted_data = sorted(data_chunk)
# 统计
stats = {
"sum": sum(data_chunk),
"mean": sum(data_chunk) / len(data_chunk),
"min": min(data_chunk),
"max": max(data_chunk),
"square_sum": result,
"count": len(data_chunk)
}
return stats
def merge_results(results):
"""合并多个结果"""
merged = {
"total_sum": 0,
"total_count": 0,
"total_square_sum": 0,
"global_min": float('inf'),
"global_max": float('-inf')
}
for result in results:
merged["total_sum"] += result["sum"]
merged["total_count"] += result["count"]
merged["total_square_sum"] += result["square_sum"]
merged["global_min"] = min(merged["global_min"], result["min"])
merged["global_max"] = max(merged["global_max"], result["max"])
merged["global_mean"] = merged["total_sum"] / merged["total_count"]
return merged
def parallel_process_data(total_size=1000000, num_processes=4):
"""并行处理数据"""
print(f"生成 {total_size} 条数据...")
data = generate_data(total_size)
# 分割数据
chunk_size = total_size // num_processes
chunks = [
data[i:i + chunk_size]
for i in range(0, total_size, chunk_size)
]
print(f"分割为 {len(chunks)} 个数据块")
print(f"开始并行处理...")
start_time = time.time()
# 使用进程池并行处理
results = []
with ProcessPoolExecutor(max_workers=num_processes) as executor:
futures = [
executor.submit(process_chunk, chunk)
for chunk in chunks
]
for future in as_completed(futures):
result = future.result()
results.append(result)
elapsed = time.time() - start_time
# 合并结果
print("合并结果...")
final_result = merge_results(results)
print(f"\n{'='*60}")
print(f"处理完成:")
print(f" 数据总量: {final_result['total_count']}")
print(f" 总和: {final_result['total_sum']}")
print(f" 平均值: {final_result['global_mean']:.2f}")
print(f" 最小值: {final_result['global_min']}")
print(f" 最大值: {final_result['global_max']}")
print(f" 平方和: {final_result['total_square_sum']}")
print(f" 耗时: {elapsed:.2f} 秒")
print(f"{'='*60}")
return final_result
# 性能对比
def sequential_process_data(total_size=1000000):
"""串行处理数据(用于对比)"""
print(f"生成 {total_size} 条数据...")
data = generate_data(total_size)
print("开始串行处理...")
start_time = time.time()
result = process_chunk(data)
elapsed = time.time() - start_time
print(f"\n{'='*60}")
print(f"串行处理完成:")
print(f" 数据总量: {result['count']}")
print(f" 平均值: {result['mean']:.2f}")
print(f" 耗时: {elapsed:.2f} 秒")
print(f"{'='*60}")
if __name__ == "__main__":
# 并行处理
parallel_process_data(total_size=1000000, num_processes=4)
print("\n" + "="*60 + "\n")
# 串行处理(对比)
sequential_process_data(total_size=1000000)
实战3: 生产者-消费者系统
"""
生产者-消费者系统
展示线程同步和通信的综合应用
"""
import threading
import queue
import time
import random
from collections import defaultdict
class TaskQueue:
"""任务队列"""
def __init__(self, maxsize=100):
self.queue = queue.Queue(maxsize=maxsize)
self.task_counter = 0
self.lock = threading.Lock()
def add_task(self, task_data):
"""添加任务"""
with self.lock:
self.task_counter += 1
task_id = self.task_counter
self.queue.put((task_id, task_data))
print(f"[生产者] 添加任务 #{task_id}")
return task_id
def get_task(self):
"""获取任务"""
try:
task_id, task_data = self.queue.get(timeout=1)
return task_id, task_data
except queue.Empty:
return None, None
def task_done(self):
"""标记任务完成"""
self.queue.task_done()
def join(self):
"""等待所有任务完成"""
self.queue.join()
class WorkerPool:
"""工作者池"""
def __init__(self, num_workers=3):
self.num_workers = num_workers
self.workers = []
self.results = {}
self.results_lock = threading.Lock()
self.stats = defaultdict(int)
self.stats_lock = threading.Lock()
def start(self, task_queue, stop_event):
"""启动工作者"""
for i in range(self.num_workers):
worker = threading.Thread(
target=self._worker_loop,
args=(i, task_queue, stop_event),
daemon=True
)
worker.start()
self.workers.append(worker)
print(f"[系统] 工作者 {i} 启动")
def _worker_loop(self, worker_id, task_queue, stop_event):
"""工作者循环"""
while not stop_event.is_set():
task_id, task_data = task_queue.get_task()
if task_id is None:
continue
try:
# 处理任务
result = self._process_task(worker_id, task_id, task_data)
# 保存结果
with self.results_lock:
self.results[task_id] = result
# 更新统计
with self.stats_lock:
self.stats["completed"] += 1
print(f"[工作者{worker_id}] 完成任务 #{task_id}")
task_queue.task_done()
except Exception as e:
print(f"[工作者{worker_id}] 任务 #{task_id} 失败: {e}")
with self.stats_lock:
self.stats["failed"] += 1
def _process_task(self, worker_id, task_id, task_data):
"""处理单个任务"""
# 模拟不同类型的任务
task_type = task_data.get("type", "compute")
if task_type == "compute":
# 计算任务
n = task_data.get("value", 10)
result = sum(i ** 2 for i in range(n))
time.sleep(random.uniform(0.1, 0.5))
return {"type": "compute", "input": n, "result": result}
elif task_type == "transform":
# 转换任务
text = task_data.get("text", "")
result = text.upper()
time.sleep(random.uniform(0.05, 0.2))
return {"type": "transform", "input": text, "result": result}
else:
raise ValueError(f"未知任务类型: {task_type}")
def get_stats(self):
"""获取统计信息"""
with self.stats_lock:
return dict(self.stats)
def get_results(self):
"""获取所有结果"""
with self.results_lock:
return dict(self.results)
class ProducerConsumerSystem:
"""生产者-消费者系统"""
def __init__(self, num_workers=3, queue_size=100):
self.task_queue = TaskQueue(maxsize=queue_size)
self.worker_pool = WorkerPool(num_workers=num_workers)
self.stop_event = threading.Event()
self.producer_threads = []
def start(self):
"""启动系统"""
print("[系统] 启动生产者-消费者系统")
self.worker_pool.start(self.task_queue, self.stop_event)
def add_producer(self, producer_func, num_tasks=10):
"""添加生产者"""
producer = threading.Thread(
target=producer_func,
args=(self.task_queue, num_tasks)
)
producer.start()
self.producer_threads.append(producer)
def wait_for_completion(self):
"""等待所有任务完成"""
# 等待所有生产者完成
for producer in self.producer_threads:
producer.join()
# 等待所有任务处理完成
self.task_queue.join()
# 停止工作者
self.stop_event.set()
print("[系统] 所有任务完成")
def print_report(self):
"""打印报告"""
stats = self.worker_pool.get_stats()
results = self.worker_pool.get_results()
print(f"\n{'='*60}")
print(f"系统报告:")
print(f" 完成任务数: {stats.get('completed', 0)}")
print(f" 失败任务数: {stats.get('failed', 0)}")
print(f" 总结果数: {len(results)}")
print(f"{'='*60}")
# 显示部分结果
print("\n部分结果:")
for task_id, result in list(results.items())[:5]:
print(f" 任务 #{task_id}: {result}")
print(f"{'='*60}")
# 生产者函数
def compute_producer(task_queue, num_tasks):
"""计算任务生产者"""
for i in range(num_tasks):
task_queue.add_task({
"type": "compute",
"value": random.randint(100, 1000)
})
time.sleep(random.uniform(0.05, 0.2))
def transform_producer(task_queue, num_tasks):
"""转换任务生产者"""
texts = ["hello", "world", "python", "programming", "concurrency"]
for i in range(num_tasks):
task_queue.add_task({
"type": "transform",
"text": random.choice(texts)
})
time.sleep(random.uniform(0.05, 0.2))
# 使用示例
def main():
system = ProducerConsumerSystem(num_workers=4, queue_size=50)
# 启动系统
system.start()
# 添加生产者
system.add_producer(compute_producer, num_tasks=10)
system.add_producer(transform_producer, num_tasks=10)
# 等待完成
system.wait_for_completion()
# 打印报告
system.print_report()
if __name__ == "__main__":
main()
小结
| 概念 | 说明 | 使用场景 |
|---|---|---|
| 线程 | 轻量级执行单元 | I/O 密集型任务 |
| 进程 | 独立执行单元 | CPU 密集型任务 |
| Lock | 互斥锁 | 保护共享资源 |
| Condition | 条件变量 | 生产者-消费者 |
| Queue | 线程/进程安全队列 | 任务分发 |
| Event | 事件标志 | 线程间信号 |
| Semaphore | 信号量 | 限制并发数 |
| 线程池 | 复用线程 | 大量短任务 |
| 进程池 | 复用进程 | CPU 密集任务 |
| GIL | 全局解释器锁 | CPython 限制 |
核心要点:
- I/O 密集型用线程,CPU 密集型用进程
- 始终注意线程安全问题
- 优先使用线程池/进程池
- 合理使用同步原语
- 理解 GIL 的限制
- 避免死锁和竞态条件
- 善用 Queue 进行通信
掌握并发编程将帮助你编写高性能的 Python 程序!