===
引言
提供Python调用ChatGPT API完整代码示例,配合jiekou.ai中转服务,国内免翻墙直接使用,支持流式输出、多轮对话,按量计费低成本开发。
本文将详细介绍:
-
Claude API并发请求处理方式
-
Anthropic速率限制规则与各层级对比
-
自动重试与限流处理代码
-
国内用户使用jiekou.ai的优势
一、Anthropic的速率限制
限制维度
Anthropic从三个维度限制API调用:
-
RPM (Requests Per Minute):每分钟请求次数
-
TPM (Tokens Per Minute):每分钟处理的token数
-
TPD (Tokens Per Day):每天处理的token总量
不同层级的限制
Free Tier: RPM 5 / TPM 25,000 / TPD 300,000
Build Tier 1: RPM 50 / TPM 50,000 / TPD 1,000,000
Build Tier 2: RPM 1,000 / TPM 100,000 / TPD 2,500,000
Build Tier 3: RPM 2,000 / TPM 200,000 / TPD 5,000,000
注:以上数据来自官方文档,具体限制以官方最新说明为准。
速率限制响应
触发速率限制时,API返回 429 Too Many Requests:
{
"type": "error",
"error": {
"type": "rate_limit_error",
"message": "Rate limit exceeded. Please retry after 30 seconds."
}
}
响应头中包含重试等待时间:
retry-after: 30
x-ratelimit-limit-requests: 50
x-ratelimit-remaining-requests: 0
x-ratelimit-reset-requests: 2024-01-01T00:01:00Z
二、并发请求处理
基础并发模式
使用 asyncio 实现并发请求:
import asyncio
import anthropic
from typing import list
async def process_single(client: anthropic.AsyncAnthropic, prompt: str) -> str:
message = await client.messages.create(
model="claude-3-5-sonnet-20241022",
max_tokens=1024,
messages=[{"role": "user", "content": prompt}]
)
return message.content[0].text
async def process_batch(prompts: list[str], max_concurrent: int = 5) -> list[str]:
client = anthropic.AsyncAnthropic(
api_key="your-api-key",
base_url="https://api.jiekou.ai/v1"
)
semaphore = asyncio.Semaphore(max_concurrent)
async def process_with_semaphore(prompt: str) -> str:
async with semaphore:
return await process_single(client, prompt)
tasks = [process_with_semaphore(p) for p in prompts]
results = await asyncio.gather(*tasks, return_exceptions=True)
await client.close()
return results
# 使用示例
prompts = [f"问题{i}:Python有什么优势?" for i in range(20)]
results = asyncio.run(process_batch(prompts, max_concurrent=5))
带速率限制的并发控制
import asyncio
import time
from collections import deque
class RateLimiter:
def __init__(self, max_requests: int, window_seconds: int = 60):
self.max_requests = max_requests
self.window_seconds = window_seconds
self.requests = deque()
self.lock = asyncio.Lock()
async def acquire(self):
async with self.lock:
now = time.time()
while self.requests and self.requests[0] < now - self.window_seconds:
self.requests.popleft()
if len(self.requests) >= self.max_requests:
wait_time = self.window_seconds - (now - self.requests[0])
if wait_time > 0:
await asyncio.sleep(wait_time)
self.requests.append(time.time())
rate_limiter = RateLimiter(max_requests=40, window_seconds=60)
async def process_with_rate_limit(client, prompt: str) -> str:
await rate_limiter.acquire()
message = await client.messages.create(
model="claude-3-5-sonnet-20241022",
max_tokens=512,
messages=[{"role": "user", "content": prompt}]
)
return message.content[0].text
三、自动重试机制
使用tenacity库
from tenacity import (
retry,
stop_after_attempt,
wait_exponential,
retry_if_exception_type
)
import anthropic
@retry(
stop=stop_after_attempt(5),
wait=wait_exponential(multiplier=1, min=4, max=60),
retry=retry_if_exception_type(anthropic.RateLimitError)
)
def call_claude_with_retry(client: anthropic.Anthropic, prompt: str) -> str:
message = client.messages.create(
model="claude-3-5-sonnet-20241022",
max_tokens=1024,
messages=[{"role": "user", "content": prompt}]
)
return message.content[0].text
client = anthropic.Anthropic(
api_key="your-api-key",
base_url="https://api.jiekou.ai/v1"
)
try:
result = call_claude_with_retry(client, "解释一下量子计算")
print(result)
except anthropic.RateLimitError:
print("多次重试后仍然失败,请稍后再试")
手动指数退避
import time
import anthropic
def call_with_exponential_backoff(
client: anthropic.Anthropic,
prompt: str,
max_retries: int = 5
) -> str:
for attempt in range(max_retries):
try:
message = client.messages.create(
model="claude-3-5-sonnet-20241022",
max_tokens=1024,
messages=[{"role": "user", "content": prompt}]
)
return message.content[0].text
except anthropic.RateLimitError as e:
if attempt == max_retries - 1:
raise
retry_after = int(e.response.headers.get("retry-after", 2 ** attempt))
print(f"速率限制,{retry_after}秒后重试(第{attempt + 1}次)")
time.sleep(retry_after)
except anthropic.APIError as e:
if e.status_code >= 500:
wait = 2 ** attempt
print(f"服务器错误,{wait}秒后重试")
time.sleep(wait)
else:
raise
raise RuntimeError("超过最大重试次数")
四、批量任务处理
生产者-消费者模式
import asyncio
from asyncio import Queue
import anthropic
NUM_WORKERS = 5
async def producer(queue: Queue, prompts: list):
for i, prompt in enumerate(prompts):
await queue.put((i, prompt))
for _ in range(NUM_WORKERS):
await queue.put(None)
async def consumer(worker_id: int, queue: Queue, results: dict, client: anthropic.AsyncAnthropic):
while True:
item = await queue.get()
if item is None:
break
idx, prompt = item
try:
message = await client.messages.create(
model="claude-3-5-haiku-20241022",
max_tokens=512,
messages=[{"role": "user", "content": prompt}]
)
results[idx] = message.content[0].text
except Exception as e:
results[idx] = f"错误: {str(e)}"
queue.task_done()
async def batch_process(prompts: list) -> list:
client = anthropic.AsyncAnthropic(
api_key="your-api-key",
base_url="https://api.jiekou.ai/v1"
)
queue = Queue(maxsize=20)
results = {}
workers = [
asyncio.create_task(consumer(i, queue, results, client))
for i in range(NUM_WORKERS)
]
await producer(queue, prompts)
await asyncio.gather(*workers)
await client.close()
return [results[i] for i in range(len(prompts))]
五、使用jiekou.ai的优势
对于国内开发者,使用 jiekou.ai 作为Claude API中转有以下优势:
-
国内访问:官方需要代理,jiekou.ai 直连稳定
-
速率限制:官方按账号层级限制,jiekou.ai 弹性扩容
-
并发支持:官方受账号等级限制,jiekou.ai 按需调整
-
计费方式:官方美元需信用卡,jiekou.ai 人民币支持支付宝/微信
jiekou.ai提供更高的默认并发限制,适合批量处理任务。
六、监控与统计
统计请求成功率
import time
from dataclasses import dataclass, field
@dataclass
class APIStats:
total_requests: int = 0
success_count: int = 0
rate_limit_count: int = 0
error_count: int = 0
total_tokens: int = 0
start_time: float = field(default_factory=time.time)
@property
def success_rate(self) -> float:
if self.total_requests == 0:
return 0
return self.success_count / self.total_requests * 100
@property
def rpm(self) -> float:
elapsed = (time.time() - self.start_time) / 60
if elapsed == 0:
return 0
return self.total_requests / elapsed
def report(self):
print(f"总请求数: {self.total_requests}")
print(f"成功率: {self.success_rate:.1f}%")
print(f"速率限制次数: {self.rate_limit_count}")
print(f"平均RPM: {self.rpm:.1f}")
print(f"总Token消耗: {self.total_tokens:,}")
总结
处理Claude API并发和速率限制的核心原则:
-
控制并发数:使用信号量或令牌桶算法,不要盲目并发
-
指数退避重试:遇到429错误时,按指数增长等待时间
-
读取响应头:利用
retry-after头获取精确等待时间 -
监控关键指标:实时跟踪成功率、RPM、token消耗
-
选对模型:批量任务用haiku,复杂任务用sonnet
对于国内开发者,推荐使用 jiekou.ai 作为Claude API接入点,不仅解决网络问题,还提供更灵活的并发支持。
🔗 立即注册 jiekou.ai:jiekou.ai
API接入地址:api.jiekou.ai/v1(完全兼容Anth… SDK)