第4部分:异步任务处理 - 解放主请求线程
4.1 为什么需要异步任务队列
FastAPI的BackgroundTasks适合轻量级操作,但对于耗时任务(发邮件、生成报表、图片处理等),应该使用专业的任务队列:
from fastapi import BackgroundTasks
import asyncio
# ❌ 不适用于耗时任务
@app.post("/generate-report")
async def generate_report(bg: BackgroundTasks):
def generate_report_sync():
time.sleep(30) # 耗时30秒
return "报告生成完成"
bg.add_task(generate_report_sync) # 会阻塞工作进程!
return {"status": "processing"}
# ✅ 正确做法:使用Celery或RQ
4.2 集成Celery异步任务队列
创建 app/celery_app.py:
from celery import Celery
from celery.schedules import crontab
import asyncio
from app.database import AsyncSessionLocal
from app.models import Order, User
from datetime import datetime
import logging
logger = logging.getLogger(__name__)
# 创建Celery应用
celery_app = Celery(
'fastapi_tasks',
broker='redis://localhost:6379/1', # Redis作为消息代理
backend='redis://localhost:6379/2', # Redis作为结果后端
include=['app.tasks'] # 包含任务模块
)
# Celery配置
celery_app.conf.update(
task_serializer='json',
accept_content=['json'],
result_serializer='json',
timezone='Asia/Shanghai',
enable_utc=True,
task_track_started=True,
task_time_limit=30 * 60, # 任务超时时间30分钟
task_soft_time_limit=25 * 60,
worker_prefetch_multiplier=1, # 每个worker一次只取一个任务
worker_max_tasks_per_child=1000, # 每个worker处理1000个任务后重启
beat_schedule={
# 每天凌晨1点清理过期缓存
'clean_expired_cache': {
'task': 'app.tasks.clean_expired_cache',
'schedule': crontab(hour=1, minute=0),
},
# 每5分钟同步用户活跃度
'sync_user_activity': {
'task': 'app.tasks.sync_user_activity',
'schedule': 300.0, # 每300秒
},
}
)
# 异步任务基类
class AsyncTask:
"""支持异步数据库操作的Celery任务基类"""
@staticmethod
async def get_db_session():
"""获取异步数据库会话"""
async with AsyncSessionLocal() as session:
try:
yield session
await session.commit()
except Exception:
await session.rollback()
raise
# 在Celery任务中使用异步数据库
@celery_app.task(bind=True, base=AsyncTask, name="process_order")
def process_order_task(self, order_id: int):
"""处理订单的异步任务(实际生产中使用asyncio.run)"""
async def async_process():
async with AsyncSessionLocal() as session:
# 查询订单
result = await session.execute(
select(Order).filter(Order.id == order_id)
)
order = result.scalar_one_or_none()
if order:
# 处理订单逻辑
order.status = 'processed'
order.processed_at = datetime.now()
await session.commit()
logger.info(f"✅ 订单处理完成: {order_id}")
return {"status": "success", "order_id": order_id}
return {"status": "failed", "reason": "订单不存在"}
# 在同步函数中运行异步代码
return asyncio.run(async_process())
创建 app/tasks.py:
import asyncio
from datetime import datetime, timedelta
from typing import List, Dict, Any
import logging
from app.celery_app import celery_app
from app.database import AsyncSessionLocal
from app.models import User, Order, Product
from app.cache import cache
logger = logging.getLogger(__name__)
@celery_app.task
def send_welcome_email(user_id: int):
"""发送欢迎邮件(异步任务)"""
# 模拟邮件发送耗时
logger.info(f"📧 正在给用户 {user_id} 发送欢迎邮件...")
# 实际发送邮件逻辑
return {"status": "sent", "user_id": user_id, "timestamp": datetime.now().isoformat()}
@celery_app.task
def generate_sales_report(start_date: str, end_date: str):
"""生成销售报表(耗时任务)"""
async def async_generate():
async with AsyncSessionLocal() as session:
# 查询指定时间段的订单
result = await session.execute(
select(Order)
.filter(Order.created_at >= start_date)
.filter(Order.created_at <= end_date)
)
orders = result.scalars().all()
# 统计报表
total_sales = sum(order.total_amount for order in orders)
total_orders = len(orders)
top_products = {}
for order in orders:
# 简化处理:实际中需要关联商品表
if order.product_id not in top_products:
top_products[order.product_id] = 0
top_products[order.product_id] += 1
report = {
"period": f"{start_date} 至 {end_date}",
"total_sales": total_sales,
"total_orders": total_orders,
"top_products": sorted(
top_products.items(),
key=lambda x: x[1],
reverse=True
)[:10],
"generated_at": datetime.now().isoformat()
}
# 缓存报表结果
cache_key = f"sales_report:{start_date}:{end_date}"
await cache.set(cache_key, report, ttl=3600)
return report
return asyncio.run(async_generate())
@celery_app.task
def update_product_popularity():
"""更新商品热度(周期性任务)"""
async def async_update():
async with AsyncSessionLocal() as session:
# 查询最近24小时的订单
yesterday = datetime.now() - timedelta(days=1)
result = await session.execute(
select(Order.product_id, func.count(Order.id).label('order_count'))
.filter(Order.created_at >= yesterday)
.group_by(Order.product_id)
)
product_stats = result.all()
# 更新商品热度
for product_id, order_count in product_stats:
await session.execute(
update(Product)
.where(Product.id == product_id)
.values(
popularity=order_count,
last_updated=datetime.now()
)
)
await session.commit()
# 清除相关缓存
cache_keys = ["hot_products:*", "product:*"]
for pattern in cache_keys:
# 实际中需要实现按模式删除的功能
pass
logger.info(f"✅ 商品热度更新完成,处理了 {len(product_stats)} 个商品")
return {"status": "success", "count": len(product_stats)}
return asyncio.run(async_update())
第5部分:部署架构优化 - 从单机到高可用集群
5.1 Uvicorn + Gunicorn 多进程架构
创建 gunicorn_conf.py 配置文件:
import multiprocessing
import os
# 服务器配置
host = os.getenv("HOST", "0.0.0.0")
port = os.getenv("PORT", "8000")
bind = f"{host}:{port}"
# 工作进程配置
workers = int(os.getenv("WORKERS", multiprocessing.cpu_count() * 2 + 1))
worker_class = "uvicorn.workers.UvicornWorker"
worker_connections = 1000
# 进程管理
max_requests = 1000
max_requests_jitter = 50
timeout = 120
keepalive = 2
# 日志配置
accesslog = "-"
errorlog = "-"
loglevel = "info"
# 性能调优
preload_app = True # 预加载应用,提高启动速度
reload = False # 生产环境禁用热重载
# 监控
statsd_host = os.getenv("STATSD_HOST", "localhost:9125")
生产环境启动命令:
# 使用Gunicorn管理Uvicorn workers
gunicorn -c gunicorn_conf.py app.main:app
# 或者直接使用uvicorn(适合容器化部署)
uvicorn app.main:app \
--host 0.0.0.0 \
--port 8000 \
--workers 8 \
--loop uvloop \
--http httptools \
--log-level info \
--no-access-log # 生产环境建议关闭访问日志,避免性能影响
5.2 Nginx负载均衡配置
创建 nginx.conf 配置文件:
upstream fastapi_backend {
# 负载均衡策略:最小连接数
least_conn;
# 后端服务器列表(可以动态扩展)
server 127.0.0.1:8000 weight=10 max_fails=3 fail_timeout=30s;
server 127.0.0.1:8001 weight=10 max_fails=3 fail_timeout=30s;
server 127.0.0.1:8002 weight=10 max_fails=3 fail_timeout=30s;
# 健康检查
keepalive 32;
}
server {
listen 80;
server_name api.example.com;
# 静态文件由Nginx直接处理(减轻FastAPI负担)
location /static/ {
alias /var/www/static/;
expires 30d;
add_header Cache-Control "public, immutable";
}
# API请求转发到FastAPI
location / {
proxy_pass http://fastapi_backend;
proxy_http_version 1.1;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection "upgrade";
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
# 超时配置
proxy_connect_timeout 60s;
proxy_send_timeout 60s;
proxy_read_timeout 60s;
# 缓冲配置
proxy_buffering on;
proxy_buffer_size 4k;
proxy_buffers 8 4k;
proxy_busy_buffers_size 8k;
}
# 健康检查端点
location /health {
access_log off;
return 200 "healthy\n";
add_header Content-Type text/plain;
}
}
5.3 Docker容器化部署
创建 Dockerfile:
# 使用官方Python镜像
FROM python:3.10-slim
# 设置工作目录
WORKDIR /app
# 安装系统依赖
RUN apt-get update && apt-get install -y \
gcc \
postgresql-client \
&& rm -rf /var/lib/apt/lists/*
# 复制依赖文件
COPY requirements.txt .
# 安装Python依赖
RUN pip install --no-cache-dir -r requirements.txt
# 复制应用代码
COPY . .
# 创建非root用户
RUN useradd -m -u 1000 fastapi && chown -R fastapi:fastapi /app
USER fastapi
# 暴露端口
EXPOSE 8000
# 健康检查
HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')"
# 启动命令(可以根据环境变量选择启动方式)
CMD ["gunicorn", "-c", "gunicorn_conf.py", "app.main:app"]
创建 docker-compose.yml 用于本地开发和测试:
version: '3.8'
services:
postgres:
image: postgres:15
environment:
POSTGRES_USER: fastapi
POSTGRES_PASSWORD: fastapi123
POSTGRES_DB: fastapi_db
volumes:
- postgres_data:/var/lib/postgresql/data
ports:
- "5432:5432"
healthcheck:
test: ["CMD-SHELL", "pg_isready -U fastapi"]
interval: 10s
timeout: 5s
retries: 5
redis:
image: redis:7-alpine
ports:
- "6379:6379"
command: redis-server --appendonly yes
volumes:
- redis_data:/data
healthcheck:
test: ["CMD", "redis-cli", "ping"]
interval: 10s
timeout: 5s
retries: 5
fastapi:
build: .
ports:
- "8000:8000"
environment:
DATABASE_URL: postgresql+asyncpg://fastapi:fastapi123@postgres:5432/fastapi_db
REDIS_URL: redis://redis:6379/0
WORKERS: 4
depends_on:
postgres:
condition: service_healthy
redis:
condition: service_healthy
volumes:
- ./app:/app/app
- ./static:/var/www/static
celery:
build: .
command: celery -A app.celery_app worker --loglevel=info --concurrency=4
environment:
DATABASE_URL: postgresql+asyncpg://fastapi:fastapi123@postgres:5432/fastapi_db
REDIS_URL: redis://redis:6379/0
depends_on:
- redis
- postgres
celery-beat:
build: .
command: celery -A app.celery_app beat --loglevel=info
environment:
DATABASE_URL: postgresql+asyncpg://fastapi:fastapi123@postgres:5432/fastapi_db
REDIS_URL: redis://localhost:6379/0
depends_on:
- redis
- postgres
nginx:
image: nginx:alpine
ports:
- "80:80"
volumes:
- ./nginx.conf:/etc/nginx/nginx.conf
- ./static:/var/www/static
depends_on:
- fastapi
volumes:
postgres_data:
redis_data:
第6部分:监控与告警 - 可观测性体系
6.1 Prometheus + Grafana 监控方案
创建 app/monitoring.py:
import time
import asyncio
from typing import Dict, Any, Optional
from datetime import datetime
import logging
import psutil
import os
from prometheus_client import Counter, Histogram, Gauge, generate_latest, REGISTRY
from fastapi import Request, Response
from contextlib import asynccontextmanager
# 配置日志
logger = logging.getLogger(__name__)
# ==================== Prometheus指标定义 ====================
# 请求相关指标
REQUEST_COUNT = Counter(
'fastapi_requests_total',
'总请求数',
['method', 'endpoint', 'status']
)
REQUEST_LATENCY = Histogram(
'fastapi_request_duration_seconds',
'请求延迟',
['method', 'endpoint'],
buckets=(0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1.0, 5.0)
)
ACTIVE_REQUESTS = Gauge(
'fastapi_active_requests',
'活跃请求数'
)
# 缓存相关指标
CACHE_HITS_TOTAL = Counter(
'cache_hits_total',
'缓存命中数',
['cache_layer']
)
CACHE_MISSES_TOTAL = Counter(
'cache_misses_total',
'缓存未命中数',
['cache_layer']
)
CACHE_SIZE_ITEMS = Gauge(
'cache_size_items',
'缓存条目数',
['cache_layer']
)
# 数据库相关指标
DB_CONNECTIONS_IN_USE = Gauge(
'db_connections_in_use',
'正在使用的数据库连接数'
)
DB_CONNECTIONS_IDLE = Gauge(
'db_connections_idle',
'空闲的数据库连接数'
)
DB_CONNECTION_WAIT_SECONDS = Histogram(
'db_connection_wait_seconds',
'获取数据库连接的等待时间',
buckets=(0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1.0, 5.0)
)
# 系统指标
SYSTEM_CPU_PERCENT = Gauge(
'system_cpu_usage_percent',
'系统CPU使用率'
)
SYSTEM_MEMORY_PERCENT = Gauge(
'system_memory_usage_percent',
'系统内存使用率'
)
PROCESS_MEMORY_USAGE_BYTES = Gauge(
'process_memory_usage_bytes',
'进程内存使用量'
)
# 业务指标
PRODUCT_VIEWS_TOTAL = Counter(
'product_views_total',
'商品查看总数',
['product_id']
)
ORDER_CREATED_TOTAL = Counter(
'orders_created_total',
'订单创建总数'
)
# ==================== 监控中间件 ====================
async def monitor_request(request: Request, call_next) -> Response:
"""
监控中间件:记录请求指标
关键功能:
1. 记录请求延迟
2. 记录请求计数
3. 记录活跃请求数
4. 添加请求跟踪ID
"""
# 开始时间
start_time = time.time()
# 增加活跃请求数
ACTIVE_REQUESTS.inc()
# 添加请求ID
request_id = f"req_{int(start_time * 1000)}_{os.getpid()}"
request.state.request_id = request_id
try:
# 执行请求
response = await call_next(request)
# 计算延迟
latency = time.time() - start_time
# 记录请求统计
REQUEST_COUNT.labels(
method=request.method,
endpoint=request.url.path,
status=response.status_code
).inc()
# 记录延迟
REQUEST_LATENCY.labels(
method=request.method,
endpoint=request.url.path
).observe(latency)
# 添加性能头信息
response.headers["X-Request-ID"] = request_id
response.headers["X-Response-Time"] = f"{latency:.3f}s"
# 记录慢请求
if latency > 1.0: # 超过1秒认为是慢请求
logger.warning(f"🚨 慢请求检测: {request.method} {request.url.path} - {latency:.3f}s")
return response
except Exception as e:
# 记录异常请求
latency = time.time() - start_time
REQUEST_COUNT.labels(
method=request.method,
endpoint=request.url.path,
status=500
).inc()
logger.error(f"❌ 请求处理异常: {request.method} {request.url.path} - {str(e)}")
raise
finally:
# 减少活跃请求数
ACTIVE_REQUESTS.dec()
# ==================== 系统监控 ====================
async def update_system_metrics():
"""更新系统指标"""
try:
# CPU使用率
SYSTEM_CPU_PERCENT.set(psutil.cpu_percent(interval=None))
# 内存使用率
memory = psutil.virtual_memory()
SYSTEM_MEMORY_PERCENT.set(memory.percent)
# 进程内存使用量
process = psutil.Process(os.getpid())
PROCESS_MEMORY_USAGE_BYTES.set(process.memory_info().rss)
# 其他系统指标(可以按需扩展)
# - 磁盘I/O
# - 网络I/O
# - 打开文件数
# - 线程数
except Exception as e:
logger.error(f"❌ 更新系统指标失败: {e}")
# ==================== 数据库监控 ====================
async def monitor_database_pool():
"""监控数据库连接池"""
try:
from app.database import get_pool_stats
stats = await get_pool_stats()
if stats:
DB_CONNECTIONS_IN_USE.set(stats.get('connections_in_use', 0))
DB_CONNECTIONS_IDLE.set(stats.get('connections_idle', 0))
# 检查连接池使用率
total = stats.get('connections_in_use', 0) + stats.get('connections_idle', 0)
pool_size = stats.get('pool_size', 20)
if total > 0 and pool_size > 0:
usage_rate = stats.get('connections_in_use', 0) / pool_size
if usage_rate > 0.8:
logger.warning(f"⚠️ 数据库连接池使用率过高: {usage_rate:.1%}")
except Exception as e:
logger.error(f"❌ 数据库连接池监控失败: {e}")
# ==================== 缓存监控 ====================
async def monitor_cache_stats():
"""监控缓存统计"""
try:
from app.cache import cache
stats = cache.get_stats()
# 更新缓存命中/未命中指标
local_stats = stats.get('local_cache', {})
CACHE_HITS_TOTAL.labels(cache_layer='local').inc(local_stats.get('hits', 0))
CACHE_MISSES_TOTAL.labels(cache_layer='local').inc(local_stats.get('misses', 0))
CACHE_SIZE_ITEMS.labels(cache_layer='local').set(local_stats.get('size', 0))
redis_stats = stats.get('redis_cache', {})
CACHE_HITS_TOTAL.labels(cache_layer='redis').inc(redis_stats.get('hits', 0))
CACHE_MISSES_TOTAL.labels(cache_layer='redis').inc(redis_stats.get('misses', 0))
except Exception as e:
logger.error(f"❌ 缓存监控失败: {e}")
# ==================== 综合监控服务 ====================
class MonitoringService:
"""监控服务"""
def __init__(self):
self.metrics_update_task = None
self.is_running = False
async def start(self):
"""启动监控服务"""
if self.is_running:
return
self.is_running = True
# 启动周期性指标更新
self.metrics_update_task = asyncio.create_task(self._periodic_update())
logger.info("✅ 监控服务已启动")
async def stop(self):
"""停止监控服务"""
self.is_running = False
if self.metrics_update_task:
self.metrics_update_task.cancel()
try:
await self.metrics_update_task
except asyncio.CancelledError:
pass
logger.info("✅ 监控服务已停止")
async def _periodic_update(self):
"""周期性更新指标"""
while self.is_running:
try:
# 更新系统指标
await update_system_metrics()
# 监控数据库连接池
await monitor_database_pool()
# 监控缓存统计
await monitor_cache_stats()
# 记录应用状态
await self._record_app_metrics()
# 每30秒更新一次
await asyncio.sleep(30)
except asyncio.CancelledError:
break
except Exception as e:
logger.error(f"❌ 周期性监控更新失败: {e}")
await asyncio.sleep(60) # 出错后等待更长时间
async def _record_app_metrics(self):
"""记录应用特定指标"""
try:
# 记录应用运行时间
uptime = time.time() - psutil.Process(os.getpid()).create_time()
# 记录Python内存使用
import gc
gc.collect() # 强制垃圾回收
# 记录活动协程数
tasks = [task for task in asyncio.all_tasks() if not task.done()]
# 记录事件循环延迟(如果有重要影响)
loop = asyncio.get_event_loop()
# 这里可以添加更多应用特定的指标
except Exception as e:
logger.error(f"❌ 记录应用指标失败: {e}")
async def get_system_info(self) -> Dict[str, Any]:
"""获取系统信息"""
try:
import platform
return {
"timestamp": datetime.now().isoformat(),
"system": {
"platform": platform.platform(),
"python_version": platform.python_version(),
"cpu_count": psutil.cpu_count(),
"cpu_physical_cores": psutil.cpu_count(logical=False),
},
"memory": {
"total": psutil.virtual_memory().total,
"available": psutil.virtual_memory().available,
"percent": psutil.virtual_memory().percent,
},
"disk": {
"total": psutil.disk_usage("/").total,
"used": psutil.disk_usage("/").used,
"free": psutil.disk_usage("/").free,
"percent": psutil.disk_usage("/").percent,
},
"process": {
"pid": os.getpid(),
"memory_rss": psutil.Process(os.getpid()).memory_info().rss,
"cpu_percent": psutil.Process(os.getpid()).cpu_percent(),
}
}
except Exception as e:
logger.error(f"❌ 获取系统信息失败: {e}")
return {"error": str(e)}
# ==================== FastAPI端点 ====================
def setup_monitoring_endpoints(app):
"""设置监控端点"""
@app.get("/metrics")
async def metrics():
"""Prometheus指标端点"""
try:
# 更新指标
await update_system_metrics()
await monitor_database_pool()
await monitor_cache_stats()
# 生成Prometheus格式的指标
return Response(
content=generate_latest(REGISTRY),
media_type="text/plain"
)
except Exception as e:
logger.error(f"❌ 生成指标失败: {e}")
return Response(
content=f"Error generating metrics: {str(e)}",
status_code=500,
media_type="text/plain"
)
@app.get("/health")
async def health_check():
"""健康检查端点"""
try:
# 检查数据库健康
from app.database import check_database_health
db_health = await check_database_health()
# 检查缓存健康
from app.cache import check_cache_health
cache_health = await check_cache_health()
# 检查系统负载
cpu_percent = psutil.cpu_percent(interval=0.1)
memory_percent = psutil.virtual_memory().percent
# 判断整体健康状态
overall_status = "healthy"
if db_health.get("status") != "healthy":
overall_status = "unhealthy"
elif cache_health.get("status") != "healthy":
overall_status = "unhealthy"
elif cpu_percent > 90:
overall_status = "unhealthy"
elif memory_percent > 90:
overall_status = "unhealthy"
return {
"status": overall_status,
"timestamp": datetime.now().isoformat(),
"database": db_health,
"cache": cache_health,
"system": {
"cpu_percent": cpu_percent,
"memory_percent": memory_percent,
},
"uptime": time.time() - psutil.Process(os.getpid()).create_time(),
}
except Exception as e:
logger.error(f"❌ 健康检查失败: {e}")
return {
"status": "unhealthy",
"timestamp": datetime.now().isoformat(),
"error": str(e),
}
@app.get("/system/info")
async def system_info():
"""系统信息端点"""
try:
monitoring_service = MonitoringService()
info = await monitoring_service.get_system_info()
return info
except Exception as e:
logger.error(f"❌ 获取系统信息失败: {e}")
return {"error": str(e)}
@app.get("/system/stats")
async def system_stats():
"""系统统计端点"""
try:
# 数据库连接池统计
from app.database import get_pool_stats
db_stats = await get_pool_stats()
# 缓存统计
from app.cache import cache
cache_stats = cache.get_stats()
# 系统统计
import psutil
return {
"database": db_stats,
"cache": cache_stats,
"system": {
"cpu_count": psutil.cpu_count(),
"cpu_percent": psutil.cpu_percent(interval=0.1),
"memory_total": psutil.virtual_memory().total,
"memory_used": psutil.virtual_memory().used,
"memory_percent": psutil.virtual_memory().percent,
"disk_total": psutil.disk_usage("/").total,
"disk_used": psutil.disk_usage("/").used,
"disk_percent": psutil.disk_usage("/").percent,
},
"application": {
"active_requests": ACTIVE_REQUESTS._value.get(),
"request_rate": self._calculate_request_rate(),
}
}
except Exception as e:
logger.error(f"❌ 获取系统统计失败: {e}")
return {"error": str(e)}
# ==================== 工具函数 ====================
def _calculate_request_rate():
"""计算请求率(简化实现)"""
# 实际项目中可以从日志或专门的计数器中计算
return 0
def setup_request_logging():
"""设置请求日志"""
import logging
import json
class RequestLogFormatter(logging.Formatter):
"""请求日志格式化器"""
def format(self, record):
log_record = {
"timestamp": self.formatTime(record),
"level": record.levelname,
"message": record.getMessage(),
"module": record.module,
"funcName": record.funcName,
"lineno": record.lineno,
}
if hasattr(record, 'request_id'):
log_record['request_id'] = record.request_id
if hasattr(record, 'method'):
log_record['method'] = record.method
log_record['endpoint'] = record.endpoint
log_record['status'] = record.status
log_record['latency'] = record.latency
return json.dumps(log_record, ensure_ascii=False)
# 配置日志处理器
handler = logging.StreamHandler()
handler.setFormatter(RequestLogFormatter())
# 添加到根日志记录器
logging.getLogger().addHandler(handler)
logger.info("✅ 请求日志配置完成")
# 全局监控服务实例
monitoring_service = MonitoringService()
# 启动监控服务
async def start_monitoring():
"""启动监控"""
await monitoring_service.start()
# 停止监控服务
async def stop_monitoring():
"""停止监控"""
await monitoringitoring_service.stop()
第7部分:高级优化技巧 - 超越基础配置
7.1 连接池深度调优
除了基本的连接池配置,还有一些高级优化手段:
# 高级连接池配置
engine = create_async_engine(
DATABASE_URL,
# 基础配置
pool_size=20,
max_overflow=30,
# 高级优化
pool_timeout=30, # 连接获取超时
pool_recycle=1800, # 连接回收时间(防止数据库超时)
pool_pre_ping=True, # 连接前健康检查
pool_use_lifo=True, # 后进先出,提高缓存局部性
# 性能优化
echo=False, # 生产环境关闭SQL日志
echo_pool=False, # 连接池日志
hide_parameters=True, # 生产环境隐藏参数
# 连接设置
connect_args={
"server_settings": {
"jit": "off", # 关闭JIT编译
"effective_cache_size": "8GB", # 告诉PostgreSQL可用缓存
},
"command_timeout": 30, # SQL命令超时
}
)
7.2 查询优化策略
批量查询优化:
# ❌ 低效:多次单条查询
async def get_users(ids: List[int], db: AsyncSession):
users = []
for user_id in ids:
result = await db.execute(select(User).filter(User.id == user_id))
user = result.scalar_one_or_none()
users.append(user)
return users
# ✅ 高效:批量查询
async def get_users_batch(ids: List[int], db: AsyncSession):
result = await db.execute(select(User).filter(User.id.in_(ids)))
return result.scalars().all()
懒加载与预加载:
# 懒加载(按需查询)
async def get_user_with_orders_lazy(user_id: int, db: AsyncSession):
result = await db.execute(select(User).filter(User.id == user_id))
user = result.scalar_one()
# 当访问user.orders时才会查询订单
return user
# 预加载(一次性查询)
async def get_user_with_orders_eager(user_id: int, db: AsyncSession):
result = await db.execute(
select(User).options(joinedload(User.orders)).filter(User.id == user_id)
)
return result.unique().scalar_one()
7.3 缓存高级策略
热点数据识别与预热:
class HotSpotDetector:
"""热点数据检测器"""
def __init__(self):
self.access_counts = {}
self.last_reset_time = time.time()
def record_access(self, key: str):
"""记录访问"""
self.access_counts[key] = self.access_counts.get(key, 0) + 1
def get_hot_keys(self, threshold: int = 100) -> List[str]:
"""获取热点键"""
return [
key for key, count in self.access_counts.items()
if count >= threshold
]
async def preload_hot_data(self, db: AsyncSession):
"""预加载热点数据"""
hot_keys = self.get_hot_keys()
for key in hot_keys:
# 从数据库加载并缓存
if key.startswith('product:'):
product_id = int(key.split(':')[1])
result = await db.execute(
select(Product).filter(Product.id == product_id)
)
product = result.scalar_one_or_none()
if product:
await cache.set(key, product, ttl=3600)
缓存雪崩防护:
class CacheAvalancheProtection:
"""缓存雪崩防护"""
def __init__(self, cache_instance):
self.cache = cache_instance
self.mutexes = {}
async def get_with_protection(self, key: str, loader_func: Callable, ttl: int = 300) -> Any:
"""带防护的缓存获取"""
# 尝试获取缓存
cached_data = await self.cache.get(key)
if cached_data is not None:
return cached_data
# 互斥锁防止缓存击穿
mutex_key = f"mutex:{key}"
mutex_acquired = False
try:
# 尝试获取互斥锁
mutex_acquired = await self._acquire_mutex(mutex_key)
if mutex_acquired:
# 重新检查缓存(双检锁)
cached_data = await self.cache.get(key)
if cached_data is not None:
return cached_data
# 执行加载函数
if asyncio.iscoroutinefunction(loader_func):
data = await loader_func()
else:
data = loader_func()
# 设置缓存(随机过期时间,防止雪崩)
randomized_ttl = ttl + random.randint(-60, 60)
await self.cache.set(key, data, randomized_ttl)
return data
else:
# 等待并重试
await asyncio.sleep(0.01)
return await self.get_with_protection(key, loader_func, ttl)
finally:
if mutex_acquired:
await self._release_mutex(mutex_key)
async def _acquire_mutex(self, key: str, timeout: int = 10) -> bool:
"""获取互斥锁"""
try:
result = await self.cache.set(
key,
f"locked_{int(time.time())}",
ex=timeout,
nx=True # 仅当键不存在时设置
)
return result
except Exception:
return False
async def _release_mutex(self, key: str):
"""释放互斥锁"""
try:
await self.cache.delete(key)
except Exception:
pass
第8部分:实战案例分析 - 大流量场景应对
8.1 秒杀场景优化
挑战:
- 瞬时超高并发
- 库存一致性要求
- 防止超卖
解决方案:
@router.post("/seckill/{product_id}")
async def seckill_product(
product_id: int,
user_id: int = Depends(get_current_user),
db: AsyncSession = Depends(get_db)
):
"""
秒杀商品接口
"""
# 1. 频率限制
redis = await cache.connect()
key = f"seckill:rate:{user_id}:{product_id}"
attempts = await redis.incr(key)
if attempts == 1:
await redis.expire(key, 60) # 每分钟限制
if attempts > 3:
raise HTTPException(status_code=429, detail="请求过于频繁")
# 2. 库存预检查(缓存)
stock_key = f"seckill:stock:{product_id}"
stock = await redis.get(stock_key)
if not stock:
# 从数据库加载库存
result = await db.execute(
select(Product.stock).filter(Product.id == product_id)
)
db_stock = result.scalar() or 0
# 缓存库存(短暂过期时间)
await redis.setex(stock_key, 5, db_stock)
stock = db_stock
else:
stock = int(stock)
if stock <= 0:
raise HTTPException(status_code=400, detail="商品已售罄")
# 3. 分布式锁保证库存原子操作
lock_key = f"seckill:lock:{product_id}"
lock_token = f"{user_id}_{int(time.time()*1000)}"
try:
# 尝试获取锁
lock_acquired = await redis.set(
lock_key,
lock_token,
ex=5, # 5秒超时
nx=True # 仅当键不存在时设置
)
if not lock_acquired:
raise HTTPException(status_code=409, detail="系统繁忙,请重试")
# 4. 扣减库存(Redis原子操作)
remaining = await redis.decr(stock_key)
if remaining < 0:
# 恢复库存
await redis.incr(stock_key)
raise HTTPException(status_code=400, detail="库存不足")
# 5. 异步创建订单
background_tasks.add_task(
create_seckill_order,
product_id,
user_id
)
return {
"status": "success",
"message": "秒杀成功",
"remaining_stock": remaining
}
finally:
# 释放锁(检查是否为自己的锁)
current_token = await redis.get(lock_key)
if current_token == lock_token:
await redis.delete(lock_key)
8.2 高并发查询优化
挑战:
- 数据库连接池压力
- 查询响应时间波动
- 缓存命中率不稳定
解决方案:
class QueryOptimizer:
"""查询优化器"""
def __init__(self):
self.query_stats = {}
self.slow_query_threshold = 1.0 # 1秒
async def optimized_query(self, query_func: Callable, cache_key: str = None) -> Any:
"""优化查询"""
start_time = time.time()
# 1. 缓存优先
if cache_key:
cached_data = await cache.get(cache_key)
if cached_data is not None:
return cached_data
# 2. 执行查询
try:
result = await query_func()
# 3. 记录查询性能
query_time = time.time() - start_time
if query_time > self.slow_query_threshold:
self._log_slow_query(query_func.__name__, query_time)
# 4. 更新缓存
if cache_key:
ttl = self._calculate_ttl(query_time)
await cache.set(cache_key, result, ttl)
return result
except Exception as e:
query_time = time.time() - start_time
logger.error(f"❌ 查询失败: {query_func.__name__}, 耗时{query_time:.3f}s, 错误{str(e)}")
raise
def _calculate_ttl(self, query_time: float) -> int:
"""根据查询时间计算缓存过期时间"""
if query_time > 2.0:
return 3600 # 慢查询缓存时间长
elif query_time > 1.0:
return 1800
elif query_time > 0.5:
return 900
else:
return 300
def _log_slow_query(self, query_name: str, query_time: float):
"""记录慢查询"""
logger.warning(f"⚠️ 慢查询: {query_name}, 耗时{query_time:.3f}s")
# 更新统计
if query_name not in self.query_stats:
self.query_stats[query_name] = {
"count": 0,
"total_time": 0,
"max_time": 0,
}
stats = self.query_stats[query_name]
stats["count"] += 1
stats["total_time"] += query_time
stats["max_time"] = max(stats["max_time"], query_time)
第9部分:总结与最佳实践
9.1 核心优化要点总结
经过完整的性能优化实践,我们总结出以下关键要点:
-
异步架构是基础
- 使用异步数据库驱动(asyncpg/aiomysql)
- 避免在异步函数中调用同步代码
- 合理使用asyncio任务和协程
-
连接池优化是关键
- 设置合理的pool_size和max_overflow
- 启用pool_pre_ping防止失效连接
- 监控连接池使用情况
-
缓存策略是核武器
- 实施多级缓存(进程内+Redis)
- 防缓存穿透和缓存击穿
- 合理的缓存失效策略
-
部署架构是保障
- 使用Gunicorn管理Uvicorn workers
- Nginx负载均衡和静态文件服务
- 容器化部署和自动扩缩容
-
可观测性是眼睛
- 全面的监控指标
- 及时的告警机制
- 性能基准测试
9.2 持续优化建议
性能优化不是一次性工作,而是一个持续的过程:
-
定期性能评估
- 每周review关键指标
- 每月分析瓶颈和优化机会
- 每季度架构评估
-
渐进式优化
- 一次只优化一个方面
- 每次优化都要有数据支撑
- 避免过度优化
-
技术栈更新
- 关注FastAPI和依赖库更新
- 及时应用性能改进
- 测试新特性的影响
-
容量规划
- 根据业务增长预测资源需求
- 提前规划扩容方案
- 成本效益分析
9.3 行动号召
现在,轮到你动手实践了:
-
诊断现有应用
- 使用本文提供的工具分析你的FastAPI应用
- 识别性能瓶颈和优化机会
-
逐步实施优化
- 从连接池优化开始
- 然后添加缓存层
- 最后优化部署架构
-
建立监控体系
- 部署Prometheus和Grafana
- 设置关键指标告警
- 建立性能基准
-
持续学习和改进
- 关注FastAPI社区和最佳实践
- 定期review和优化你的应用
- 分享你的经验和成果
记住:性能优化是一场马拉松,不是短跑。通过系统化的方法、持续的改进和正确的工具,你的FastAPI应用也能轻松应对千万级并发访问!
👋 我是扣子,一名专注于Python后端技术的开发者。如果你有任何问题或想要深入讨论,欢迎在评论区留言!
🚀 让我们一起构建高性能、高可用的后端系统!