第7章: 并发与异步 — GIL的现实与突破
Java/Kotlin 开发者对并发的理解建立在"线程 = OS线程"这个基本事实上:Thread.start() 直接映射 pthread_create,synchronized 直接映射 futex,JVM 的内存模型(JMM)虽然复杂,但底层模型是清晰的。Python 的并发世界则被一个名为 GIL(Global Interpreter Lock,全局解释器锁) 的东西彻底改变——它让多线程在 CPU 密集型场景下几乎毫无用处,迫使你转向多进程或异步协程。这不是 Python 的"缺陷",而是一个有意识的设计取舍。理解 GIL 是理解 Python 并发的一切起点。
7.1 GIL 详解: 什么是GIL,为什么存在,如何影响你
Java/Kotlin 对比
public class CpuBound {
public static void main(String[] args) throws InterruptedException {
int numThreads = 4;
Thread[] threads = new Thread[numThreads];
for (int i = 0; i < numThreads; i++) {
threads[i] = new Thread(() -> {
long sum = 0;
for (int j = 0; j < 100_000_000; j++) {
sum += j;
}
System.out.println(sum);
});
threads[i].start();
}
for (Thread t : threads) t.join();
}
}
import kotlinx.coroutines.*
fun main() = runBlocking {
val jobs = List(4) {
launch(Dispatchers.Default) {
var sum = 0L
repeat(100_000_000) { sum += it }
println(sum)
}
}
jobs.joinAll()
}
Python 实现
import threading
import time
def cpu_bound(n: int) -> int:
"""纯 CPU 计算,GIL 不会被释放"""
total = 0
for i in range(n):
total += i
return total
def run_sequential(n: int, workers: int) -> float:
"""顺序执行"""
start = time.perf_counter()
for _ in range(workers):
cpu_bound(n)
return time.perf_counter() - start
def run_threads(n: int, workers: int) -> float:
"""多线程执行 — 受 GIL 限制,不会更快"""
start = time.perf_counter()
threads = []
for _ in range(workers):
t = threading.Thread(target=cpu_bound, args=(n,))
threads.append(t)
t.start()
for t in threads:
t.join()
return time.perf_counter() - start
if __name__ == "__main__":
N = 20_000_000
WORKERS = 4
seq_time = run_sequential(N, WORKERS)
thread_time = run_threads(N, WORKERS)
print(f"顺序执行: {seq_time:.3f}s")
print(f"多线程执行: {thread_time:.3f}s")
print(f"加速比: {seq_time / thread_time:.2f}x")
import threading
import time
import urllib.request
def fetch_url(url: str) -> str:
"""I/O 操作会主动释放 GIL"""
with urllib.request.urlopen(url, timeout=5) as resp:
return resp.read(100).decode()[:50]
def run_sequential_io(urls: list[str]) -> float:
start = time.perf_counter()
for url in urls:
fetch_url(url)
return time.perf_counter() - start
def run_threads_io(urls: list[str]) -> float:
start = time.perf_counter()
threads = [
threading.Thread(target=fetch_url, args=(url,))
for url in urls
]
for t in threads:
t.start()
for t in threads:
t.join()
return time.perf_counter() - start
if __name__ == "__main__":
urls = [
"https://httpbin.org/delay/1",
"https://httpbin.org/delay/1",
"https://httpbin.org/delay/1",
]
seq = run_sequential_io(urls)
thr = run_threads_io(urls)
print(f"顺序: {seq:.2f}s, 多线程: {thr:.2f}s")
核心差异
| 维度 | Java/Kotlin | Python (CPython) |
|---|
| 线程模型 | 1:1(线程 = OS线程) | 1:1,但受 GIL 约束 |
| CPU 并行 | 多线程即可 | 必须用多进程 |
| I/O 并发 | 多线程/协程 | 多线程/协程均可 |
| 内存共享 | 天然共享,需同步 | 同进程内共享,跨进程需 IPC |
| 锁粒度 | 对象级(synchronized) | 全局(GIL)+ 对象级(Lock) |
GIL 何时释放:
- I/O 操作:
socket.read()、urllib、文件读写等
time.sleep():显式让出 GIL
- C 扩展:NumPy、Pandas 等在计算前显式释放 GIL(
Py_BEGIN_ALLOW_THREADS)
- 字节码边界:默认每执行 100 条字节码(
sys.setswitchinterval() 可调)检查是否切换
常见陷阱
def bad_cpu_parallelism():
threads = [threading.Thread(target=cpu_bound, args=(10_000_000,)) for _ in range(8)]
for t in threads: t.start()
for t in threads: t.join()
import sys
print(sys.getswitchinterval())
lock = threading.Lock()
def increment():
with lock:
global counter
counter += 1
何时使用
- 需要 CPU 并行 →
multiprocessing(7.3节)或 ProcessPoolExecutor(7.4节)
- I/O 并发且代码简单 →
threading(7.2节)
- I/O 并发且追求高吞吐 →
asyncio(7.5节)
- 混合型 → CPU 部分用进程池,I/O 部分用线程/协程
7.2 threading 模块: I/O 密集型的利器
Java/Kotlin 对比
Thread t = new Thread(() -> {
System.out.println("Running in: " + Thread.currentThread().getName());
});
t.setDaemon(true);
t.start();
t.join();
ReentrantLock lock = new ReentrantLock();
lock.lock();
try {
} finally {
lock.unlock();
}
CountDownLatch latch = new CountDownLatch(1);
latch.countDown();
latch.await();
Semaphore sem = new Semaphore(3);
sem.acquire();
try { } finally { sem.release(); }
import kotlin.concurrent.thread
thread(name = "worker") {
println("Running in: ${Thread.currentThread().name}")
}
import kotlinx.coroutines.*
Python 实现
import threading
import time
import random
def worker(name: str, seconds: int):
print(f"[{threading.current_thread().name}] {name} 开始工作")
time.sleep(seconds)
print(f"[{threading.current_thread().name}] {name} 完成")
t1 = threading.Thread(target=worker, args=("Alice", 2), name="worker-1")
t1.start()
t1.join()
class MyWorker(threading.Thread):
def __init__(self, name: str, seconds: int):
super().__init__(name=name)
self.seconds = seconds
def run(self):
worker(self.name, self.seconds)
t2 = MyWorker("Bob", 1)
t2.start()
t2.join()
def background_task():
for i in range(10):
print(f"background: {i}")
time.sleep(0.5)
daemon = threading.Thread(target=background_task, daemon=True)
daemon.start()
time.sleep(1.2)
print("主线程结束")
counter = 0
lock = threading.Lock()
def safe_increment(n: int):
global counter
for _ in range(n):
with lock:
counter += 1
threads = [threading.Thread(target=safe_increment, args=(100_000,)) for _ in range(4)]
for t in threads: t.start()
for t in threads: t.join()
print(f"counter = {counter}")
rlock = threading.RLock()
def outer():
with rlock:
print("outer acquired")
inner()
def inner():
with rlock:
print("inner acquired")
outer()
event = threading.Event()
def waiter(name: str):
print(f"{name}: 等待信号...")
event.wait()
print(f"{name}: 收到信号,继续执行")
def setter():
time.sleep(1)
print("发送信号")
event.set()
threading.Thread(target=waiter, args=("W1",)).start()
threading.Thread(target=waiter, args=("W2",)).start()
threading.Thread(target=setter).start()
condition = threading.Condition()
items: list[str] = []
def producer():
for i in range(3):
time.sleep(random.random())
with condition:
items.append(f"item-{i}")
print(f"生产: item-{i}")
condition.notify()
def consumer(name: str):
with condition:
while not items:
condition.wait()
item = items.pop(0)
print(f"{name} 消费: {item}")
threading.Thread(target=producer).start()
threading.Thread(target=consumer, args=("C1",)).start()
threading.Thread(target=consumer, args=("C2",)).start()
semaphore = threading.Semaphore(2)
def limited_worker(name: str):
with semaphore:
print(f"{name}: 获得许可")
time.sleep(2)
print(f"{name}: 释放许可")
for i in range(5):
threading.Thread(target=limited_worker, args=(f"W{i}",)).start()
核心差异
| 概念 | Java | Python |
|---|
| 创建线程 | new Thread(runnable) | threading.Thread(target=fn) |
| 守护线程 | setDaemon(true) | daemon=True |
| 互斥锁 | ReentrantLock | threading.Lock() / RLock() |
| 等待/通知 | wait()/notify() | Condition.wait()/notify() |
| 信号量 | Semaphore | threading.Semaphore() |
| 倒计时门栓 | CountDownLatch | threading.Event()(简化版) |
关键差异:Python 的 threading 模块没有 CountDownLatch、CyclicBarrier、ReadWriteLock 等高级同步原语。Event 是最接近 CountDownLatch(1) 的替代,但功能更简单。需要更复杂的协调时,通常直接用 asyncio。
常见陷阱
threads = [threading.Thread(target=worker, args=("x", 1)) for _ in range(5)]
for t in threads: t.start()
lock = threading.Lock()
def recursive():
with lock:
recursive()
condition = threading.Condition()
shared_list = []
def append_item():
shared_list.append(1)
何时使用
- 网络请求、文件读写等 I/O 密集型任务 →
threading 是最简单的选择
- 需要与同步 C 扩展交互 →
threading(C 扩展可能释放 GIL)
- CPU 密集型 → 不要用
threading,用 multiprocessing
- 高并发 I/O(数千连接) → 用
asyncio,线程开销太大
7.3 multiprocessing: CPU 密集型的解决方案
Java/Kotlin 对比
ProcessBuilder pb = new ProcessBuilder("java", "-jar", "worker.jar");
Process process = pb.start();
int exitCode = process.waitFor();
Python 实现
import multiprocessing
import time
def cpu_worker(name: str, n: int) -> int:
"""CPU 密集型任务 — 在独立进程中运行,不受 GIL 限制"""
total = 0
for i in range(n):
total += i
print(f"[{name}] 完成, 结果: {total}")
return total
def demo_process():
processes = []
for i in range(4):
p = multiprocessing.Process(
target=cpu_worker,
args=(f"Worker-{i}", 20_000_000),
)
processes.append(p)
p.start()
for p in processes:
p.join()
print(f"{p.name} exit code: {p.exitcode}")
if __name__ == "__main__":
demo_process()
def producer(queue: multiprocessing.Queue):
for i in range(5):
queue.put(f"message-{i}")
print(f"生产: message-{i}")
queue.put(None)
def consumer(queue: multiprocessing.Queue, name: str):
while True:
msg = queue.get()
if msg is None:
print(f"{name}: 收到结束信号")
queue.put(None)
break
print(f"{name}: 消费 {msg}")
def demo_queue():
q = multiprocessing.Queue()
p1 = multiprocessing.Process(target=producer, args=(q,))
p2 = multiprocessing.Process(target=consumer, args=(q, "C1"))
p3 = multiprocessing.Process(target=consumer, args=(q, "C2"))
p2.start()
p3.start()
p1.start()
p1.join()
p2.join()
p3.join()
def pipe_demo():
parent_conn, child_conn = multiprocessing.Pipe()
def child(conn):
conn.send("来自子进程的问候")
conn.send(42)
conn.close()
p = multiprocessing.Process(target=child, args=(child_conn,))
p.start()
print(parent_conn.recv())
print(parent_conn.recv())
p.join()
def shared_memory_demo():
counter = multiprocessing.Value("i", 0)
arr = multiprocessing.Array("d", [0.0, 0.0, 0.0])
def increment(value, n):
for _ in range(n):
with value.get_lock():
value.value += 1
processes = [
multiprocessing.Process(target=increment, args=(counter, 100_000))
for _ in range(4)
]
for p in processes: p.start()
for p in processes: p.join()
print(f"共享计数器: {counter.value}")
def manager_demo():
with multiprocessing.Manager() as manager:
shared_dict = manager.dict()
shared_list = manager.list()
def worker(d, lst, idx):
d[f"key-{idx}"] = f"value-{idx}"
lst.append(f"item-{idx}")
processes = [
multiprocessing.Process(target=worker, args=(shared_dict, shared_list, i))
for i in range(4)
]
for p in processes: p.start()
for p in processes: p.join()
print(f"共享字典: {dict(shared_dict)}")
print(f"共享列表: {list(shared_list)}")
def pool_demo():
def square(n: int) -> int:
return n * n
with multiprocessing.Pool(processes=4) as pool:
results = pool.map(square, range(10))
print(f"map 结果: {results}")
async_result = pool.map_async(square, range(10))
results = async_result.get()
print(f"map_async 结果: {results}")
result = pool.apply(square, (5,))
print(f"apply 结果: {result}")
for r in pool.imap(square, range(5)):
print(f"imap: {r}", end=" ")
print()
核心差异
| 维度 | Java ProcessBuilder | Python multiprocessing |
|---|
| 创建开销 | 启动新 JVM(秒级) | fork() 或 spawn(毫秒级) |
| IPC 机制 | socket/文件/外部 MQ | Queue/Pipe/Value/Array/Manager |
| 共享内存 | 无内置支持 | Value/Array(高效)、Manager(灵活) |
| 进程池 | 无内置(需线程池或框架) | Pool 内置 |
| 序列化 | 无需(独立进程) | pickle 序列化参数和返回值 |
关键差异:Python 的 multiprocessing 比 Java 的 ProcessBuilder 高级得多——内置了进程池、共享内存、多种 IPC 机制。这是因为 Python 需要 multiprocessing 来弥补 GIL 的不足,所以标准库提供了完整的多进程支持。
常见陷阱
import multiprocessing
def work(q):
pass
pool = multiprocessing.Pool(4)
何时使用
- CPU 密集型并行计算 →
multiprocessing.Pool 是首选
- 需要绕过 GIL 的计算任务 →
multiprocessing.Process
- 需要跨进程共享状态 →
Value/Array(简单)、Manager(灵活但慢)
- I/O 密集型 → 不需要
multiprocessing,threading 或 asyncio 更轻量
7.4 concurrent.futures: 统一的执行器接口
Java/Kotlin 对比
ExecutorService executor = Executors.newFixedThreadPool(4);
Future<Integer> future = executor.submit(() -> {
Thread.sleep(1000);
return 42;
});
int result = future.get();
boolean done = future.isDone();
CompletableFuture.supplyAsync(() -> fetchData())
.thenApply(data -> process(data))
.thenAccept(result -> save(result))
.exceptionally(ex -> { handleError(ex); return null; });
List<Future<?>> futures = ...;
executor.invokeAll(futures);
val result = withContext(Dispatchers.Default) {
heavyComputation()
}
val deferred = async { fetchData() }
val result = deferred.await()
Python 实现
import concurrent.futures
import time
import urllib.request
def fetch_url(url: str) -> tuple[str, int]:
"""模拟网络请求"""
try:
with urllib.request.urlopen(url, timeout=10) as resp:
return url, resp.status
except Exception as e:
return url, -1
def thread_pool_demo():
urls = [f"https://httpbin.org/get?id={i}" for i in range(6)]
with concurrent.futures.ThreadPoolExecutor(max_workers=3) as executor:
future_to_url = {
executor.submit(fetch_url, url): url
for url in urls
}
for future in concurrent.futures.as_completed(future_to_url):
url = future_to_url[future]
try:
_, status = future.result()
print(f"{url} → {status}")
except Exception as e:
print(f"{url} → 错误: {e}")
with concurrent.futures.ThreadPoolExecutor(max_workers=3) as executor:
results = executor.map(fetch_url, urls)
for url, status in results:
print(f"{url} → {status}")
def cpu_task(n: int) -> int:
total = 0
for i in range(n):
total += i
return total
def process_pool_demo():
numbers = [10_000_000, 20_000_000, 15_000_000, 25_000_000]
with concurrent.futures.ProcessPoolExecutor(max_workers=4) as executor:
futures = [executor.submit(cpu_task, n) for n in numbers]
for future in concurrent.futures.as_completed(futures):
result = future.result()
print(f"结果: {result}")
results = list(executor.map(cpu_task, numbers))
print(f"map 结果: {results}")
def future_demo():
with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
future = executor.submit(lambda: time.sleep(1) or "done")
print(f"是否完成: {future.done()}")
print(f"是否取消: {future.cancelled()}")
result = future.result(timeout=5)
print(f"结果: {result}")
def wait_demo():
with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
futures = {
executor.submit(time.sleep, i): f"task-{i}"
for i in [1, 2, 3, 0.5]
}
done, not_done = concurrent.futures.wait(
futures,
timeout=2,
return_when=concurrent.futures.FIRST_COMPLETED,
)
print(f"已完成: {len(done)}, 未完成: {len(not_done)}")
if __name__ == "__main__":
print("=== ThreadPoolExecutor ===")
thread_pool_demo()
print("\n=== ProcessPoolExecutor ===")
process_pool_demo()
核心差异
| 概念 | Java | Python |
|---|
| 执行器接口 | ExecutorService | Executor(抽象基类) |
| 线程池 | Executors.newFixedThreadPool() | ThreadPoolExecutor |
| 进程池 | 无内置 | ProcessPoolExecutor |
| Future | Future<T> / CompletableFuture | Future |
| 链式回调 | thenApply/thenCompose | add_done_callback(无链式) |
| 等待多个 | invokeAll() | wait() / as_completed() |
关键差异:Python 的 concurrent.futures API 是直接从 Java 的 ExecutorService 借鉴来的。最大的缺失是没有 CompletableFuture 那样的链式组合能力——Python 用 asyncio 来填补这个位置。
常见陷阱
executor = concurrent.futures.ThreadPoolExecutor(4)
def callback(future):
pass
何时使用
- 需要简洁的线程/进程池 API →
concurrent.futures 是最佳选择
- I/O 密集型批量任务 →
ThreadPoolExecutor
- CPU 密集型批量任务 →
ProcessPoolExecutor
- 需要链式异步组合 →
asyncio(concurrent.futures 不支持)
- 简单并行 map →
executor.map() 最简洁
7.5 asyncio: 事件循环与协程
Java/Kotlin 对比
import kotlinx.coroutines.*
suspend fun fetchData(): String {
delay(1000)
return "data"
}
fun main() = runBlocking {
launch {
val data = fetchData()
println(data)
}
val deferred = async { fetchData() }
println(deferred.await())
val results = awaitAll(
async { fetchData() },
async { fetchData() },
)
}
try (var executor = Executors.newVirtualThreadPerTaskExecutor()) {
Future<String> f = executor.submit(() -> {
Thread.sleep(1000);
return "data";
});
f.get();
}
Python 实现
import asyncio
import time
async def main():
print("hello")
await asyncio.sleep(1)
print("world")
async def fetch_data(name: str, delay_sec: float) -> str:
"""模拟异步 I/O 操作"""
print(f"{name}: 开始获取数据")
await asyncio.sleep(delay_sec)
print(f"{name}: 数据获取完成")
return f"{name}-result"
async def basic_demo():
r1 = await fetch_data("A", 1)
r2 = await fetch_data("B", 1)
print(f"顺序耗时: ~2s, 结果: {r1}, {r2}")
async def task_demo():
task1 = asyncio.create_task(fetch_data("A", 1))
task2 = asyncio.create_task(fetch_data("B", 1))
r1 = await task1
r2 = await task2
print(f"并发耗时: ~1s, 结果: {r1}, {r2}")
async def gather_demo():
results = await asyncio.gather(
fetch_data("A", 1),
fetch_data("B", 1),
fetch_data("C", 1),
)
print(f"并发耗时: ~1s, 结果: {results}")
async def sleep_comparison():
print("--- time.sleep (阻塞事件循环) ---")
start = time.perf_counter()
await asyncio.sleep(1)
print(f"asyncio.sleep 耗时: {time.perf_counter() - start:.2f}s")
async def wrong_sleep_demo():
"""演示 time.sleep 在协程中的危害"""
async def good_task():
await asyncio.sleep(1)
return "good"
async def bad_task():
time.sleep(2)
return "bad"
start = time.perf_counter()
results = await asyncio.gather(good_task(), bad_task())
elapsed = time.perf_counter() - start
print(f"耗时: {elapsed:.2f}s")
async def full_demo():
print("=== 基础协程 ===")
await basic_demo()
print("\n=== Task 并发 ===")
await task_demo()
print("\n=== gather 并发 ===")
await gather_demo()
print("\n=== sleep 对比 ===")
await sleep_comparison()
if __name__ == "__main__":
asyncio.run(full_demo())
asyncio 同步原语 vs Java 对应物
import asyncio
async def producer(queue: asyncio.Queue):
for i in range(5):
await queue.put(f"item-{i}")
print(f"生产: item-{i}")
await asyncio.sleep(0.01)
async def consumer(queue: asyncio.Queue):
while True:
item = await queue.get()
print(f"消费: {item}")
queue.task_done()
if item == "item-4":
break
async def queue_demo():
q = asyncio.Queue(maxsize=3)
await asyncio.gather(producer(q), consumer(q))
asyncio.run(queue_demo())
async def worker(event: asyncio.Event, worker_id: int):
print(f"Worker-{worker_id}: 等待信号...")
await event.wait()
print(f"Worker-{worker_id}: 开始工作!")
async def event_demo():
event = asyncio.Event()
workers = [asyncio.create_task(worker(event, i)) for i in range(3)]
await asyncio.sleep(0.1)
event.set()
await asyncio.gather(*workers)
asyncio.run(event_demo())
async def api_call(sem: asyncio.Semaphore, url: str):
async with sem:
print(f"请求: {url}")
await asyncio.sleep(0.1)
return f"response from {url}"
async def semaphore_demo():
sem = asyncio.Semaphore(3)
urls = [f"/api/{i}" for i in range(10)]
results = await asyncio.gather(*[api_call(sem, url) for url in urls])
print(f"完成 {len(results)} 个请求")
asyncio.run(semaphore_demo())
| 原语 | Python asyncio | Java | Kotlin |
|---|
| 互斥锁 | asyncio.Lock | ReentrantLock | Mutex |
| 信号量 | asyncio.Semaphore | Semaphore | Semaphore |
| 事件 | asyncio.Event | CountDownLatch | CompletableDeferred |
| 队列 | asyncio.Queue | BlockingQueue | Channel |
| 条件 | asyncio.Condition | Condition | Condition |
核心差异
| 维度 | Kotlin 协程 | Python asyncio |
|---|
| 定义 | suspend fun | async def |
| 挂起 | 隐式(调用 suspend 函数) | 显式(await) |
| 启动 | launch { } / async { } | asyncio.create_task() |
| 等待多个 | awaitAll() | asyncio.gather() |
| 事件循环 | 隐式(Dispatchers) | 显式(asyncio.run()) |
| 取消 | Job.cancel() | Task.cancel() |
| 结构化并发 | 语言级保证 | 3.11+ TaskGroup |
关键差异:
- Kotlin 的
suspend 是隐式的——调用 suspend 函数自动挂起。Python 的 await 是显式的——你必须写 await,否则协程不会执行。
- Kotlin 有结构化并发的语言级保证——
coroutineScope 自动等待子协程。Python 3.11 之前没有这个保证(TaskGroup 弥补了这一点)。
- Python 的事件循环是显式的——
asyncio.run() 创建并管理循环。Kotlin 的调度器更抽象。
常见陷阱
async def fetch():
await asyncio.sleep(1)
return "data"
async def bad():
result = fetch()
print(result)
何时使用
- 高并发 I/O(HTTP 请求、数据库查询、WebSocket) →
asyncio 是最佳选择
- 需要数千甚至数万并发连接 →
asyncio(线程模型做不到)
- 已有同步代码库 →
threading 或 concurrent.futures(改造成本低)
- CPU 密集型 →
multiprocessing(asyncio 无法绕过 GIL)
7.6 async/await 深入
Java/Kotlin 对比
import kotlinx.coroutines.flow.*
import kotlinx.coroutines.sync.*
val channel = Channel<String>()
val mutex = Mutex()
mutex.withLock { }
val semaphore = Semaphore(3)
semaphore.acquire()
try { } finally { semaphore.release() }
flow {
for (i in 1..10) {
delay(100)
emit(i)
}
}.collect { println(it) }
Python 实现
import asyncio
async def async_range(n: int):
"""异步生成器: 逐个产出值,每次产出可以挂起"""
for i in range(n):
await asyncio.sleep(0.1)
yield i
async def async_for_demo():
print("--- async for ---")
async for value in async_range(5):
print(f" 收到: {value}")
class AsyncResource:
"""模拟异步资源(如数据库连接、HTTP 会话)"""
async def __aenter__(self):
print(" 打开资源(异步)")
await asyncio.sleep(0.1)
return self
async def __aexit__(self, exc_type, exc_val, exc_tb):
print(" 关闭资源(异步)")
await asyncio.sleep(0.1)
return False
async def async_with_demo():
print("--- async with ---")
async with AsyncResource() as resource:
print(" 使用资源中...")
await asyncio.sleep(0.2)
async def queue_demo():
print("--- asyncio.Queue ---")
queue: asyncio.Queue[str] = asyncio.Queue(maxsize=3)
async def producer():
for i in range(5):
await asyncio.sleep(0.1)
await queue.put(f"item-{i}")
print(f" 生产: item-{i}")
async def consumer(name: str):
while True:
item = await queue.get()
print(f" {name} 消费: {item}")
queue.task_done()
producers = [asyncio.create_task(producer()) for _ in range(2)]
consumers = [asyncio.create_task(consumer(f"C{i}")) for i in range(2)]
await asyncio.gather(*producers)
await queue.join()
for c in consumers:
c.cancel()
async def lock_demo():
print("--- asyncio.Lock ---")
lock = asyncio.Lock()
shared_counter = 0
async def increment():
nonlocal shared_counter
async with lock:
temp = shared_counter
await asyncio.sleep(0.001)
shared_counter = temp + 1
tasks = [asyncio.create_task(increment()) for _ in range(100)]
await asyncio.gather(*tasks)
print(f" 计数器: {shared_counter}")
async def semaphore_demo():
print("--- asyncio.Semaphore ---")
sem = asyncio.Semaphore(2)
async def limited_task(name: str):
async with sem:
print(f" {name}: 开始")
await asyncio.sleep(1)
print(f" {name}: 完成")
tasks = [asyncio.create_task(limited_task(f"T{i}")) for i in range(5)]
await asyncio.gather(*tasks)
async def full_demo():
await async_for_demo()
await async_with_demo()
await queue_demo()
await lock_demo()
await semaphore_demo()
if __name__ == "__main__":
asyncio.run(full_demo())
核心差异
| 概念 | Kotlin | Python asyncio |
|---|
| 异步迭代 | Flow.collect {} | async for |
| 异步资源管理 | use {}(扩展函数) | async with |
| 异步队列 | Channel | asyncio.Queue |
| 异步锁 | Mutex.withLock {} | async with asyncio.Lock() |
| 异步信号量 | Semaphore.acquire() | async with asyncio.Semaphore() |
| 异步生成器 | flow { emit() } | async def + yield |
关键差异:Python 的异步原语(async for、async with)是语言级语法。Kotlin 的 Flow、Channel 是库级抽象。Python 的方式更直接,Kotlin 的方式更灵活(可以组合各种操作符)。
常见陷阱
何时使用
- 流式数据处理 →
async for + 异步生成器
- 数据库连接、HTTP 会话管理 →
async with
- 生产者-消费者模式 →
asyncio.Queue
- 协程间共享资源保护 →
asyncio.Lock
- 限制并发数(如限流) →
asyncio.Semaphore
7.7 TaskGroup (3.11+) 与结构化并发
Java/Kotlin 对比
suspend fun structuredConcurrency() = coroutineScope {
launch {
delay(1000)
println("子任务1完成")
}
launch {
delay(500)
println("子任务2完成")
}
}
try (var scope = new StructuredTaskScope.ShutdownOnFailure()) {
Subtask<String> task1 = scope.fork(() -> fetchUser());
Subtask<String> task2 = scope.fork(() -> fetchOrder());
scope.join();
scope.throwIfFailed();
}
Python 实现
import asyncio
async def taskgroup_demo():
print("=== TaskGroup 基础 ===")
async def worker(name: str, delay_sec: float) -> str:
print(f" {name}: 开始")
await asyncio.sleep(delay_sec)
print(f" {name}: 完成")
return f"{name}-result"
async with asyncio.TaskGroup() as tg:
task1 = tg.create_task(worker("A", 1))
task2 = tg.create_task(worker("B", 0.5))
task3 = tg.create_task(worker("C", 0.8))
print(f" 结果: {task1.result()}, {task2.result()}, {task3.result()}")
async def taskgroup_error_demo():
print("\n=== TaskGroup 异常处理 ===")
async def may_fail(name: str, fail: bool, delay: float):
await asyncio.sleep(delay)
if fail:
raise ValueError(f"{name} 失败了!")
print(f" {name}: 成功")
return f"{name}-ok"
try:
async with asyncio.TaskGroup() as tg:
tg.create_task(may_fail("A", False, 0.5))
tg.create_task(may_fail("B", True, 0.3))
tg.create_task(may_fail("C", False, 1.0))
except ValueError* as eg:
print(f" 捕获异常组: {eg}")
for exc in eg.exceptions:
print(f" - {exc}")
async def manual_approach():
"""3.11 之前的手动方式 — 容易出错"""
tasks = []
try:
tasks.append(asyncio.create_task(asyncio.sleep(0.5)))
tasks.append(asyncio.create_task(asyncio.sleep(0.3)))
results = await asyncio.gather(*tasks, return_exceptions=True)
except Exception:
for t in tasks:
t.cancel()
raise
async def taskgroup_approach():
"""3.11+ TaskGroup — 自动管理"""
async with asyncio.TaskGroup() as tg:
tg.create_task(asyncio.sleep(0.5))
tg.create_task(asyncio.sleep(0.3))
async def taskgroup_results():
print("\n=== TaskGroup 收集结果 ===")
results: list[str] = []
async with asyncio.TaskGroup() as tg:
for i in range(5):
task = tg.create_task(asyncio.sleep(0.1 * i))
task.add_done_callback(
lambda t, idx=i: results.append(f"task-{idx}")
)
print(f" 完成顺序: {results}")
if __name__ == "__main__":
asyncio.run(taskgroup_demo())
asyncio.run(taskgroup_error_demo())
asyncio.run(taskgroup_results())
核心差异
| 维度 | Kotlin coroutineScope | Python TaskGroup |
|---|
| 结构化保证 | 语言级 | 库级(3.11+) |
| 异常传播 | 取消所有子协程 | 取消所有子任务 |
| 结果收集 | List<Deferred> | add_done_callback 或外部列表 |
| 超时 | withTimeout() | asyncio.timeout() [3.11+] |
| 嵌套 | 天然支持 | 天然支持(嵌套 async with) |
关键差异:Python 的 TaskGroup 是对 Kotlin coroutineScope 的直接借鉴,但实现更简单。Kotlin 的结构化并发是编译器保证的,Python 的 TaskGroup 是运行时保证的。
常见陷阱
async def bad():
async with asyncio.TaskGroup() as tg:
task = tg.create_task(asyncio.sleep(1))
async def tricky():
result = None
try:
async with asyncio.TaskGroup() as tg:
result = await tg.create_task(some_task())
except* Exception:
if result is None:
print("任务未完成")
何时使用
- Python 3.11+ 的所有新代码 → 优先使用
TaskGroup 替代手动任务管理
- 需要"全部成功或全部回滚"的语义 →
TaskGroup
- 嵌套并发操作 →
TaskGroup 天然支持嵌套
- Python 3.10 → 用
asyncio.gather(return_exceptions=True) + 手动取消
7.8 ExceptionGroup 与 except* (3.11+, PEP 654)
Java/Kotlin 对比
try {
} catch (Exception e) {
}
Python 实现
import asyncio
async def exceptiongroup_basic():
print("=== ExceptionGroup 基础 ===")
async def failing_task(name: str):
await asyncio.sleep(0.1)
raise ValueError(f"{name} 失败")
try:
async with asyncio.TaskGroup() as tg:
tg.create_task(failing_task("A"))
tg.create_task(failing_task("B"))
tg.create_task(failing_task("C"))
except* ValueError as eg:
print(f" 捕获到 {len(eg.exceptions)} 个 ValueError:")
for exc in eg.exceptions:
print(f" - {exc}")
async def except_star_demo():
print("\n=== except* 选择性处理 ===")
async def mixed_task(name: str, error_type: str):
await asyncio.sleep(0.1)
if error_type == "value":
raise ValueError(f"{name}: ValueError")
elif error_type == "type":
raise TypeError(f"{name}: TypeError")
elif error_type == "runtime":
raise RuntimeError(f"{name}: RuntimeError")
try:
async with asyncio.TaskGroup() as tg:
tg.create_task(mixed_task("A", "value"))
tg.create_task(mixed_task("B", "type"))
tg.create_task(mixed_task("C", "value"))
tg.create_task(mixed_task("D", "runtime"))
except* ValueError as eg:
print(f" 处理 ValueError: {len(eg.exceptions)} 个")
for exc in eg.exceptions:
print(f" - {exc}")
except* TypeError as eg:
print(f" 处理 TypeError: {len(eg.exceptions)} 个")
for exc in eg.exceptions:
print(f" - {exc}")
def manual_exceptiongroup():
print("\n=== 手动创建 ExceptionGroup ===")
errors = [
ValueError("错误1"),
TypeError("错误2"),
RuntimeError("错误3"),
]
eg = ExceptionGroup("多个错误", errors)
print(f" ExceptionGroup: {eg}")
print(f" 包含 {len(eg.exceptions)} 个异常")
try:
raise eg
except* ValueError:
print(" 处理了 ValueError")
except* (TypeError, RuntimeError):
print(" 处理了 TypeError 和 RuntimeError")
async def nested_exceptiongroup():
print("\n=== 嵌套 ExceptionGroup ===")
async def outer_task(name: str):
try:
async with asyncio.TaskGroup() as tg:
tg.create_task(asyncio.sleep(0.1))
raise ValueError(f"{name}: 内层错误")
except* ValueError as eg:
print(f" {name}: 内层处理了 {len(eg.exceptions)} 个异常")
async def failing_outer():
await asyncio.sleep(0.05)
raise RuntimeError("外层错误")
try:
async with asyncio.TaskGroup() as tg:
tg.create_task(outer_task("A"))
tg.create_task(outer_task("B"))
tg.create_task(failing_outer())
except* RuntimeError as eg:
print(f" 外层捕获 RuntimeError: {eg.exceptions}")
async def batch_operation(urls: list[str]) -> dict[str, str]:
"""模拟批量请求,部分失败不影响其他请求"""
results: dict[str, str] = {}
errors: list[Exception] = []
async def fetch(url: str):
await asyncio.sleep(0.1)
if "bad" in url:
raise ConnectionError(f"无法连接 {url}")
return f"{url} 的内容"
try:
async with asyncio.TaskGroup() as tg:
tasks = {tg.create_task(fetch(url)): url for url in urls}
except* ConnectionError as eg:
errors.extend(eg.exceptions)
for task, url in tasks.items():
if task.done() and not task.cancelled():
try:
results[url] = task.result()
except Exception:
pass
print(f" 成功: {len(results)}, 失败: {len(errors)}")
return results
async def batch_demo():
print("\n=== 批量操作部分失败 ===")
urls = ["url-1", "url-2-bad", "url-3", "url-4-bad", "url-5"]
results = await batch_operation(urls)
print(f" 结果: {results}")
if __name__ == "__main__":
asyncio.run(exceptiongroup_basic())
asyncio.run(except_star_demo())
manual_exceptiongroup()
asyncio.run(nested_exceptiongroup())
asyncio.run(batch_demo())
核心差异
| 维度 | Java/Kotlin | Python 3.11+ |
|---|
| 多异常聚合 | 无原生支持 | ExceptionGroup |
| 选择性处理 | 无 | except* |
| 嵌套异常组 | 无 | 天然支持 |
| 与并发集成 | 无 | TaskGroup 自动创建 |
关键差异:ExceptionGroup + except* 是 Python 独有的特性,Java/Kotlin 没有等价物。这不是一个"对比"关系,而是 Python 在并发异常处理方面的创新。它解决了"多个并发任务同时失败时如何优雅处理"这个普遍问题。
常见陷阱
try:
raise ExceptionGroup("errors", [ValueError("a"), TypeError("b")])
except Exception as e:
print(type(e))
try:
raise ExceptionGroup("errors", [ValueError("a"), TypeError("b")])
except* ValueError as eg:
print(f"ValueError: {eg.exceptions}")
except* TypeError as eg:
print(f"TypeError: {eg.exceptions}")
try:
raise ExceptionGroup("errors", [ValueError("a")])
except* ValueError:
raise RuntimeError("处理失败")
何时使用
- 并发任务可能部分失败 →
except* 精确处理不同类型的异常
- 批量操作(如批量 API 调用) →
ExceptionGroup 收集所有失败
- 需要区分"已处理"和"未处理"的异常 →
except* 的选择性处理
- Python 3.10 及以下 → 用
return_exceptions=True + 手动检查
7.9 自由线程实验: PEP 703 (3.13)
Java/Kotlin 对比
Python 实现
import threading
import time
def cpu_bound_work(n: int) -> int:
"""纯 CPU 计算"""
total = 0
for i in range(n):
total += i
return total
def demo_gil_limitation():
"""演示 GIL 对 CPU 密集型的影响"""
N = 10_000_000
WORKERS = 4
start = time.perf_counter()
for _ in range(WORKERS):
cpu_bound_work(N)
sequential_time = time.perf_counter() - start
start = time.perf_counter()
threads = [
threading.Thread(target=cpu_bound_work, args=(N,))
for _ in range(WORKERS)
]
for t in threads: t.start()
for t in threads: t.join()
threaded_time = time.perf_counter() - start
print(f"顺序执行: {sequential_time:.2f}s")
print(f"多线程(GIL): {threaded_time:.2f}s")
print(f"加速比: {sequential_time / threaded_time:.2f}x")
def demo_thread_safety_concerns():
"""
在 no-GIL 模式下,这段代码有竞态条件:
"""
counter = 0
def increment():
nonlocal counter
for _ in range(100_000):
counter += 1
"""
no-GIL 模式对 C 扩展有重大影响:
1. 依赖 GIL 的 C 扩展需要修改:
- 移除 Py_BEGIN_ALLOW_THREADS / Py_END_ALLOW_THREADS(不再需要释放 GIL)
- 添加自己的线程安全机制
2. 使用引用计数的 C 代码需要修改:
- Py_INCREF / Py_DECREF 不再是线程安全的
- 需要使用新的线程安全引用计数 API
3. ABI 不兼容:
- no-GIL 模式的 Python 有不同的 ABI
- C 扩展需要为 no-GIL 模式单独编译
- pip install 的预编译包可能不可用
4. 已适配的包:
- NumPy, pandas 等核心科学计算库正在适配
- 适配进度: https://github.com/python/cpython/issues/116165
"""
"""
# 方法1: 使用 python3.13t 可执行文件
python3.13t my_script.py
# 方法2: 编译时禁用 GIL
./configure --disable-gil
make
# 方法3: 运行时检查
"""
import sys
def check_gil_status():
"""检查当前 Python 是否启用了 GIL"""
if sys.version_info >= (3, 13):
try:
gil_enabled = sys._is_gil_enabled()
print(f"GIL 状态: {'启用' if gil_enabled else '禁用'}")
except AttributeError:
print("GIL 状态: 启用(标准构建)")
else:
print(f"GIL 状态: 启用(Python {sys.version_info.major}.{sys.version_info.minor} 不支持禁用 GIL)")
"""
import interpreters # 3.12+ 实验性模块
# 创建独立的子解释器(有自己的 GIL)
interp = interpreters.create()
# 在子解释器中运行代码
interp.run('''
import threading
# 这个解释器有自己的 GIL,不影响主解释器
''')
# 这与 multiprocessing 不同:
# - 子解释器共享同一个进程(更轻量)
# - 但有独立的 GIL(不互相阻塞)
# - 目前仍有很多限制(不能共享大多数对象)
"""
if __name__ == "__main__":
print("=== GIL 限制演示 ===")
demo_gil_limitation()
print("\n=== GIL 状态检查 ===")
check_gil_status()
核心差异
| 维度 | Java/Kotlin | Python (有 GIL) | Python (no-GIL, 3.13t) |
|---|
| CPU 并行 | 天然支持 | 需要多进程 | 多线程即可 |
| 线程安全 | 开发者负责 | GIL 保证 | 开发者负责 |
| 同步需求 | 必须 | 大部分不需要 | 必须 |
| C 扩展 | 无影响 | 无影响 | 需要重新适配 |
| 生态成熟度 | 成熟 | 成熟 | 实验性 |
关键差异:no-GIL Python 让 Python 的线程模型向 Java/Kotlin 靠拢——获得了 CPU 并行能力,但也失去了 GIL 提供的"免费线程安全"。这是一个权衡,不是纯粹的升级。
常见陷阱
何时使用
- 当前生产环境 → 不要使用 no-GIL,用
multiprocessing 绕过 GIL
- 实验和评估 → 可以用
python3.13t 测试你的工作负载
- CPU 密集型且进程间通信开销大 → no-GIL 是未来的解决方案
- 依赖大量 C 扩展 → 等待生态系统适配(关注 NumPy、pandas 等的适配进度)
本章总结: Python 并发决策树
你的任务是什么类型?
│
├─ I/O 密集型(网络请求、文件、数据库)
│ ├─ 少量并发(< 100)→ threading 或 ThreadPoolExecutor
│ ├─ 大量并发(> 100)→ asyncio
│ └─ 已有同步代码库 → ThreadPoolExecutor(改造成本最低)
│
├─ CPU 密集型(计算、图像处理、机器学习)
│ ├─ 简单并行 map → ProcessPoolExecutor
│ ├─ 需要共享状态 → multiprocessing.Value/Array
│ └─ 大数据量 → 考虑 Dask、Ray 等分布式框架
│
├─ 混合型(CPU + I/O)
│ ├─ asyncio + ProcessPoolExecutor(推荐)
│ └─ threading + multiprocessing(更简单但不够优雅)
│
└─ 未来(Python 3.15+ no-GIL 稳定后)
└─ 可能不再需要区分 CPU/I/O,多线程通吃
与 Java/Kotlin 的心智模型对照
| 你在 Java/Kotlin 中做的 | 在 Python 中应该做的 |
|---|
new Thread() / thread {} | threading.Thread()(I/O)或 multiprocessing.Process()(CPU) |
ExecutorService | concurrent.futures.ThreadPoolExecutor(I/O)或 ProcessPoolExecutor(CPU) |
CompletableFuture / async/await | asyncio + async/await |
coroutineScope | asyncio.TaskGroup [3.11+] |
synchronized / ReentrantLock | threading.Lock()(线程)或 asyncio.Lock()(协程) |
volatile / AtomicInteger | 通常不需要(GIL 保护),no-GIL 后需要 |
CountDownLatch | threading.Event() |
Semaphore | threading.Semaphore() 或 asyncio.Semaphore() |
BlockingQueue | queue.Queue(线程)或 asyncio.Queue(协程) |
| 多异常处理 | ExceptionGroup + except* [3.11+](Python 独有) |