第9章 MCP客户端开发
前言
如果MCP服务器是"知识提供者",那么客户端就是"知识消费者"。本章讲解如何构建高效、可靠的MCP客户端,让LLM能够充分利用MCP服务提供的所有能力。
9.1 客户端的核心职责
9.1.1 客户端的四大职责
graph TB
A["MCP客户端"] --> B["连接管理"]
A --> C["服务发现"]
A --> D["功能调用"]
A --> E["LLM集成"]
B --> B1["建立连接"]
B --> B2["保活心跳"]
B --> B3["异常恢复"]
B --> B4["连接池"]
C --> C1["工具发现"]
C --> C2["资源发现"]
C --> C3["提示发现"]
C --> C4["动态注册"]
D --> D1["工具调用"]
D --> D2["资源读取"]
D --> D3["提示渲染"]
D --> D4["结果缓存"]
E --> E1["工具绑定"]
E --> E2["上下文构建"]
E --> E3["调用序列"]
E --> E4["错误处理"]
9.1.2 客户端架构
graph LR
subgraph LLM["LLM层"]
LLM1["语言模型"]
LLM2["提示词生成"]
end
subgraph Client["客户端层"]
Gateway["网关"]
ConnMgr["连接管理"]
DiscoveryMgr["服务发现"]
ToolExecutor["工具执行"]
ResourceLoader["资源加载"]
Cache["缓存层"]
end
subgraph Server["服务器层"]
Server1["MCP Server 1"]
Server2["MCP Server 2"]
ServerN["MCP Server N"]
end
LLM1 --> LLM2
LLM2 --> Gateway
Gateway --> ConnMgr
Gateway --> DiscoveryMgr
Gateway --> ToolExecutor
Gateway --> ResourceLoader
ToolExecutor --> Cache
ResourceLoader --> Cache
ConnMgr --> Server1
DiscoveryMgr --> Server1
ToolExecutor --> Server1
ResourceLoader --> Server1
ToolExecutor --> Server2
ResourceLoader --> Server2
ToolExecutor --> ServerN
ResourceLoader --> ServerN
9.2 MCP客户端开发框架
9.2.1 基础连接管理
import asyncio
from typing import Dict, Optional, List
from dataclasses import dataclass
from datetime import datetime
import json
from enum import Enum
class ConnectionState(Enum):
"""连接状态"""
DISCONNECTED = "disconnected"
CONNECTING = "connecting"
CONNECTED = "connected"
RECONNECTING = "reconnecting"
FAILED = "failed"
@dataclass
class ServerConfig:
"""服务器配置"""
name: str
host: str
port: int
timeout: int = 30
max_retries: int = 3
retry_delay: int = 5
class MCPConnection:
"""MCP连接管理"""
def __init__(self, config: ServerConfig):
self.config = config
self.state = ConnectionState.DISCONNECTED
self.socket = None
self.heartbeat_task = None
self.message_queue = asyncio.Queue()
self.response_handlers: Dict[str, asyncio.Future] = {}
self.connection_time = None
self.last_activity = None
async def connect(self) -> bool:
"""
连接到MCP服务器
Returns:
连接是否成功
"""
self.state = ConnectionState.CONNECTING
for attempt in range(self.config.max_retries):
try:
# 创建WebSocket连接(或其他协议)
import websockets
uri = f"ws://{self.config.host}:{self.config.port}"
self.socket = await asyncio.wait_for(
websockets.connect(uri),
timeout=self.config.timeout
)
self.state = ConnectionState.CONNECTED
self.connection_time = datetime.now()
self.last_activity = datetime.now()
# 启动心跳
self.heartbeat_task = asyncio.create_task(self._heartbeat())
# 启动消息监听
asyncio.create_task(self._listen())
logger.info(f"Connected to {self.config.name}")
return True
except asyncio.TimeoutError:
logger.warning(
f"Connection timeout (attempt {attempt + 1}/{self.config.max_retries})"
)
await asyncio.sleep(self.config.retry_delay)
except Exception as e:
logger.error(f"Connection failed: {e}")
await asyncio.sleep(self.config.retry_delay)
self.state = ConnectionState.FAILED
return False
async def disconnect(self):
"""断开连接"""
if self.heartbeat_task:
self.heartbeat_task.cancel()
if self.socket:
await self.socket.close()
self.state = ConnectionState.DISCONNECTED
logger.info(f"Disconnected from {self.config.name}")
async def _heartbeat(self):
"""心跳保活"""
while self.state == ConnectionState.CONNECTED:
try:
await asyncio.sleep(30) # 每30秒发送心跳
# 发送ping消息
await self._send_message({
"jsonrpc": "2.0",
"method": "ping",
"id": str(datetime.now().timestamp())
})
self.last_activity = datetime.now()
except Exception as e:
logger.error(f"Heartbeat failed: {e}")
await self._reconnect()
async def _listen(self):
"""监听服务器消息"""
try:
while self.state == ConnectionState.CONNECTED:
message = await self.socket.recv()
data = json.loads(message)
# 处理响应
if "id" in data:
request_id = data["id"]
if request_id in self.response_handlers:
future = self.response_handlers[request_id]
if not future.done():
future.set_result(data)
del self.response_handlers[request_id]
# 处理通知
elif "method" in data:
await self._handle_notification(data)
self.last_activity = datetime.now()
except Exception as e:
logger.error(f"Message listening failed: {e}")
await self._reconnect()
async def _send_message(self, message: Dict) -> Dict:
"""发送消息并等待响应"""
if self.state != ConnectionState.CONNECTED:
raise RuntimeError(f"Not connected (state: {self.state})")
message_id = message.get("id", str(datetime.now().timestamp()))
future = asyncio.Future()
self.response_handlers[message_id] = future
try:
await self.socket.send(json.dumps(message))
response = await asyncio.wait_for(future, timeout=self.config.timeout)
return response
except asyncio.TimeoutError:
raise TimeoutError(f"Request {message_id} timeout")
finally:
if message_id in self.response_handlers:
del self.response_handlers[message_id]
async def _handle_notification(self, message: Dict):
"""处理服务器通知"""
method = message.get("method")
if method == "resource/updated":
# 资源更新通知
uri = message.get("params", {}).get("uri")
logger.info(f"Resource updated: {uri}")
elif method == "tool/registered":
# 工具注册通知
tool_name = message.get("params", {}).get("name")
logger.info(f"Tool registered: {tool_name}")
async def _reconnect(self):
"""自动重新连接"""
self.state = ConnectionState.RECONNECTING
await asyncio.sleep(self.config.retry_delay)
await self.connect()
class ConnectionPool:
"""连接池管理多个MCP服务器"""
def __init__(self):
self.connections: Dict[str, MCPConnection] = {}
async def add_server(self, config: ServerConfig) -> bool:
"""添加服务器"""
conn = MCPConnection(config)
if await conn.connect():
self.connections[config.name] = conn
return True
return False
async def remove_server(self, name: str):
"""移除服务器"""
if name in self.connections:
await self.connections[name].disconnect()
del self.connections[name]
def get_connection(self, name: str) -> Optional[MCPConnection]:
"""获取连接"""
return self.connections.get(name)
async def close_all(self):
"""关闭所有连接"""
for conn in self.connections.values():
await conn.disconnect()
9.2.2 服务发现
from typing import List
class ServiceDiscovery:
"""服务发现管理"""
def __init__(self, connection: MCPConnection):
self.connection = connection
self.tools_cache: Dict[str, Dict] = {}
self.resources_cache: Dict[str, Dict] = {}
self.prompts_cache: Dict[str, Dict] = {}
async def discover_tools(self, force_refresh: bool = False) -> List[Dict]:
"""
发现可用的工具
Args:
force_refresh: 是否强制刷新缓存
Returns:
工具列表
"""
if not force_refresh and self.tools_cache:
return list(self.tools_cache.values())
try:
response = await self.connection._send_message({
"jsonrpc": "2.0",
"method": "tools/list",
"id": "tools_discovery",
"params": {}
})
if "error" in response:
raise RuntimeError(f"Discovery failed: {response['error']}")
tools = response.get("result", {}).get("tools", [])
# 缓存工具
for tool in tools:
self.tools_cache[tool["name"]] = tool
logger.info(f"Discovered {len(tools)} tools")
return tools
except Exception as e:
logger.error(f"Tool discovery failed: {e}")
return []
async def discover_resources(self, uri_template: str = None) -> List[Dict]:
"""
发现可用的资源
Args:
uri_template: 资源URI模式(可选)
Returns:
资源列表
"""
try:
params = {}
if uri_template:
params["uri"] = uri_template
response = await self.connection._send_message({
"jsonrpc": "2.0",
"method": "resources/list",
"id": "resources_discovery",
"params": params
})
if "error" in response:
raise RuntimeError(f"Discovery failed: {response['error']}")
resources = response.get("result", {}).get("resources", [])
# 缓存资源
for resource in resources:
self.resources_cache[resource["uri"]] = resource
logger.info(f"Discovered {len(resources)} resources")
return resources
except Exception as e:
logger.error(f"Resource discovery failed: {e}")
return []
async def discover_prompts(self) -> List[Dict]:
"""发现可用的提示模板"""
try:
response = await self.connection._send_message({
"jsonrpc": "2.0",
"method": "prompts/list",
"id": "prompts_discovery",
"params": {}
})
if "error" in response:
raise RuntimeError(f"Discovery failed: {response['error']}")
prompts = response.get("result", {}).get("prompts", [])
# 缓存提示
for prompt in prompts:
self.prompts_cache[prompt["name"]] = prompt
logger.info(f"Discovered {len(prompts)} prompts")
return prompts
except Exception as e:
logger.error(f"Prompt discovery failed: {e}")
return []
def get_tool(self, name: str) -> Optional[Dict]:
"""获取工具定义"""
return self.tools_cache.get(name)
def get_resource(self, uri: str) -> Optional[Dict]:
"""获取资源定义"""
return self.resources_cache.get(uri)
def get_prompt(self, name: str) -> Optional[Dict]:
"""获取提示定义"""
return self.prompts_cache.get(name)
9.3 工具调用与资源访问
9.3.1 工具执行器
class ToolExecutor:
"""工具执行管理"""
def __init__(self, connection: MCPConnection):
self.connection = connection
self.execution_history: List[Dict] = []
self.result_cache: Dict[str, Dict] = {}
async def call_tool(self, tool_name: str, arguments: Dict) -> Dict:
"""
调用工具
Args:
tool_name: 工具名称
arguments: 工具参数
Returns:
工具执行结果
"""
try:
# 记录执行开始
execution_start = datetime.now()
# 发送工具调用请求
response = await self.connection._send_message({
"jsonrpc": "2.0",
"method": "tools/call",
"id": f"tool_{tool_name}_{datetime.now().timestamp()}",
"params": {
"name": tool_name,
"arguments": arguments
}
})
# 记录执行时间
execution_time = (datetime.now() - execution_start).total_seconds()
# 构建结果
result = {
"tool": tool_name,
"arguments": arguments,
"response": response,
"execution_time": execution_time,
"timestamp": execution_start.isoformat()
}
# 记录到历史
self.execution_history.append(result)
# 缓存结果
cache_key = self._generate_cache_key(tool_name, arguments)
self.result_cache[cache_key] = response.get("result")
logger.info(f"Tool {tool_name} executed in {execution_time:.2f}s")
if "error" in response:
raise ToolExecutionError(
f"Tool execution failed: {response['error']['message']}"
)
return response.get("result", {})
except Exception as e:
logger.error(f"Tool execution failed: {e}")
raise
def _generate_cache_key(self, tool_name: str, arguments: Dict) -> str:
"""生成缓存键"""
import hashlib
arg_str = json.dumps(arguments, sort_keys=True)
hash_val = hashlib.md5(arg_str.encode()).hexdigest()
return f"{tool_name}_{hash_val}"
async def call_tool_with_cache(self, tool_name: str, arguments: Dict,
cache_ttl: int = 3600) -> Dict:
"""
带缓存的工具调用
Args:
tool_name: 工具名称
arguments: 工具参数
cache_ttl: 缓存有效期(秒)
Returns:
工具执行结果
"""
cache_key = self._generate_cache_key(tool_name, arguments)
# 检查缓存
if cache_key in self.result_cache:
logger.info(f"Using cached result for {tool_name}")
return self.result_cache[cache_key]
# 执行工具
return await self.call_tool(tool_name, arguments)
def get_execution_history(self, tool_name: str = None) -> List[Dict]:
"""获取执行历史"""
if tool_name:
return [e for e in self.execution_history if e["tool"] == tool_name]
return self.execution_history
class ToolExecutionError(Exception):
"""工具执行错误"""
pass
class ResourceLoader:
"""资源加载器"""
def __init__(self, connection: MCPConnection):
self.connection = connection
self.resource_cache: Dict[str, str] = {}
async def read_resource(self, uri: str) -> str:
"""
读取资源
Args:
uri: 资源URI
Returns:
资源内容
"""
# 检查缓存
if uri in self.resource_cache:
logger.info(f"Using cached resource: {uri}")
return self.resource_cache[uri]
try:
response = await self.connection._send_message({
"jsonrpc": "2.0",
"method": "resources/read",
"id": f"resource_{uri}",
"params": {"uri": uri}
})
if "error" in response:
raise ResourceNotFoundError(f"Resource not found: {uri}")
content = response.get("result", {}).get("contents", "")
# 缓存资源
self.resource_cache[uri] = content
logger.info(f"Loaded resource: {uri}")
return content
except Exception as e:
logger.error(f"Resource loading failed: {e}")
raise
async def subscribe_resource(self, uri: str, callback):
"""
订阅资源变更
Args:
uri: 资源URI
callback: 变更回调函数
"""
try:
# 注册订阅
await self.connection._send_message({
"jsonrpc": "2.0",
"method": "resources/subscribe",
"id": f"subscribe_{uri}",
"params": {"uri": uri}
})
logger.info(f"Subscribed to resource: {uri}")
except Exception as e:
logger.error(f"Subscription failed: {e}")
class ResourceNotFoundError(Exception):
"""资源未找到错误"""
pass
9.4 与LLM集成
9.4.1 工具绑定
class LLMToolBinding:
"""将MCP工具绑定到LLM"""
def __init__(self, llm_client, tool_executor: ToolExecutor):
self.llm_client = llm_client # Claude、GPT等
self.tool_executor = tool_executor
self.bound_tools: Dict[str, Dict] = {}
async def bind_tool(self, tool_definition: Dict):
"""
绑定工具到LLM
Args:
tool_definition: MCP工具定义
"""
tool_name = tool_definition["name"]
# 转换MCP工具格式为LLM格式
llm_tool_spec = self._convert_to_llm_format(tool_definition)
self.bound_tools[tool_name] = llm_tool_spec
logger.info(f"Tool {tool_name} bound to LLM")
def _convert_to_llm_format(self, tool_def: Dict) -> Dict:
"""
将MCP工具定义转换为LLM工具格式
例如:从MCP格式转换为Claude格式
"""
return {
"name": tool_def["name"],
"description": tool_def["description"],
"input_schema": {
"type": "object",
"properties": {
arg["name"]: {
"type": arg.get("type", "string"),
"description": arg.get("description", "")
}
for arg in tool_def.get("inputSchema", {}).get("properties", {}).items()
},
"required": [
arg for arg, spec in tool_def.get("inputSchema", {}).get("properties", {}).items()
if spec.get("required", False)
]
}
}
async def execute_llm_tool_call(self, tool_name: str,
arguments: Dict) -> str:
"""
执行LLM调用的工具
Args:
tool_name: 工具名称
arguments: 工具参数
Returns:
工具结果(转换为字符串)
"""
try:
result = await self.tool_executor.call_tool(tool_name, arguments)
return json.dumps(result, ensure_ascii=False)
except Exception as e:
return f"Error: {str(e)}"
def get_llm_tools(self) -> List[Dict]:
"""获取所有绑定的工具(LLM格式)"""
return list(self.bound_tools.values())
9.4.2 上下文构建与调用序列
class MCPAgentContext:
"""MCP代理上下文"""
def __init__(self, connection_pool: ConnectionPool,
llm_client, discovery: ServiceDiscovery):
self.connection_pool = connection_pool
self.llm_client = llm_client
self.discovery = discovery
self.tool_binding = LLMToolBinding(llm_client, ToolExecutor(connection_pool.get_connection("primary")))
self.conversation_history: List[Dict] = []
async def prepare_context(self):
"""准备LLM上下文"""
# 发现所有可用工具
tools = await self.discovery.discover_tools()
for tool in tools:
await self.tool_binding.bind_tool(tool)
logger.info(f"Prepared {len(tools)} tools for LLM")
async def agentic_loop(self, user_query: str) -> str:
"""
代理循环:与LLM互动,调用工具,返回结果
Args:
user_query: 用户查询
Returns:
最终响应
"""
self.conversation_history.append({
"role": "user",
"content": user_query
})
max_iterations = 10
iteration = 0
while iteration < max_iterations:
iteration += 1
# 1. LLM思考与决策
llm_response = await self.llm_client.chat(
messages=self.conversation_history,
tools=self.tool_binding.get_llm_tools()
)
self.conversation_history.append({
"role": "assistant",
"content": llm_response
})
# 2. 检查是否需要调用工具
if not self._has_tool_use(llm_response):
# 如果没有工具调用,返回最终响应
return self._extract_response(llm_response)
# 3. 执行工具调用
tool_calls = self._parse_tool_calls(llm_response)
tool_results = []
for tool_call in tool_calls:
tool_name = tool_call["name"]
arguments = tool_call["arguments"]
try:
result = await self.tool_binding.execute_llm_tool_call(
tool_name, arguments
)
tool_results.append({
"tool": tool_name,
"result": result
})
except Exception as e:
tool_results.append({
"tool": tool_name,
"error": str(e)
})
# 4. 将工具结果添加到对话历史
self.conversation_history.append({
"role": "user",
"content": self._format_tool_results(tool_results)
})
logger.info(f"Iteration {iteration}: Called {len(tool_calls)} tools")
return "Maximum iterations reached"
def _has_tool_use(self, response: str) -> bool:
"""检查响应中是否有工具调用"""
return "<tool_use>" in response or "function_calls" in response
def _parse_tool_calls(self, response: str) -> List[Dict]:
"""从LLM响应中解析工具调用"""
# 这里需要根据LLM的具体格式来解析
# 示例实现
tool_calls = []
# ... 解析逻辑
return tool_calls
def _extract_response(self, llm_response: str) -> str:
"""提取LLM的最终文本响应"""
# 移除工具调用标记
return llm_response.split("<tool_use>")[0].strip()
def _format_tool_results(self, tool_results: List[Dict]) -> str:
"""格式化工具结果供LLM使用"""
formatted = "Tool Results:\n"
for item in tool_results:
if "error" in item:
formatted += f"- {item['tool']}: ERROR - {item['error']}\n"
else:
formatted += f"- {item['tool']}: {item['result']}\n"
return formatted
9.5 客户端性能与可靠性
9.5.1 错误处理与重试
class RetryStrategy:
"""重试策略"""
def __init__(self, max_retries: int = 3, base_delay: int = 1):
self.max_retries = max_retries
self.base_delay = base_delay
async def execute_with_retry(self, func, *args, **kwargs):
"""
执行函数,支持重试
Args:
func: 要执行的异步函数
*args, **kwargs: 函数参数
Returns:
函数执行结果
"""
last_error = None
for attempt in range(self.max_retries):
try:
return await func(*args, **kwargs)
except (TimeoutError, ConnectionError) as e:
last_error = e
# 计算延迟(指数退避)
delay = self.base_delay * (2 ** attempt)
logger.warning(
f"Attempt {attempt + 1} failed, retrying in {delay}s: {e}"
)
await asyncio.sleep(delay)
except Exception as e:
# 不可重试的错误直接抛出
raise
raise last_error
class ErrorHandler:
"""统一错误处理"""
@staticmethod
async def handle_error(error: Exception, context: str = "") -> Dict:
"""
处理错误并返回用户友好的消息
Args:
error: 异常对象
context: 错误上下文
Returns:
错误信息字典
"""
if isinstance(error, TimeoutError):
return {
"error": "timeout",
"message": f"请求超时。{context}",
"retry": True
}
elif isinstance(error, ConnectionError):
return {
"error": "connection",
"message": f"连接错误。{context}",
"retry": True
}
elif isinstance(error, ToolExecutionError):
return {
"error": "tool_execution",
"message": f"工具执行失败。{context}",
"retry": False
}
elif isinstance(error, ResourceNotFoundError):
return {
"error": "not_found",
"message": f"资源未找到。{context}",
"retry": False
}
else:
return {
"error": "unknown",
"message": f"未知错误。{context}",
"retry": False
}
9.5.2 性能优化
class PerformanceOptimizer:
"""性能优化"""
def __init__(self):
self.metrics: Dict[str, List[float]] = {}
async def measure_performance(self, operation_name: str,
coro) -> tuple:
"""
测量操作性能
Args:
operation_name: 操作名称
coro: 协程
Returns:
(结果, 执行时间)
"""
import time
start = time.time()
result = await coro
duration = time.time() - start
# 记录指标
if operation_name not in self.metrics:
self.metrics[operation_name] = []
self.metrics[operation_name].append(duration)
return result, duration
def get_performance_stats(self, operation_name: str) -> Dict:
"""获取性能统计"""
if operation_name not in self.metrics:
return {}
times = self.metrics[operation_name]
return {
"count": len(times),
"avg": sum(times) / len(times),
"min": min(times),
"max": max(times),
"total": sum(times)
}
def get_all_stats(self) -> Dict[str, Dict]:
"""获取所有统计"""
return {
op: self.get_performance_stats(op)
for op in self.metrics.keys()
}
本章总结
| 核心概念 | 关键点 |
|---|---|
| 连接管理 | 状态机、心跳、异常恢复、连接池 |
| 服务发现 | 工具、资源、提示的发现和缓存 |
| 工具调用 | 同步执行、缓存机制、执行历史 |
| 资源加载 | 流式读取、缓存、订阅通知 |
| LLM集成 | 工具绑定、上下文构建、代理循环 |
| 错误处理 | 重试策略、错误分类、恢复机制 |
| 性能优化 | 缓存、异步处理、性能监控 |
| 可靠性 | 断路器、降级方案、监控告警 |
常见问题
Q1: 客户端如何处理服务器故障? A: 通过自动重连、降级方案、缓存结果等方式。心跳检测故障,自动启动重连流程。
Q2: 能否连接多个MCP服务器? A: 是的。连接池支持管理多个服务器连接,每个连接独立。
Q3: 如何优化工具调用性能? A: 使用结果缓存、批量调用、异步处理。根据场景选择合适的缓存策略。
Q4: LLM调用工具时如何处理权限? A: 在服务器端实施权限控制。客户端只展示有权限的工具。
Q5: 如何监控客户端性能? A: 使用性能优化器记录指标,定期分析执行时间、成功率等。
实战要点
✅ 推荐做法
- 使用连接池管理多个服务器
- 实施服务发现缓存
- 为工具调用结果缓存
- 使用指数退避重试策略
- 监控和记录性能指标
- 定期检测连接健康状态
- 实施断路器保护
❌ 避免的做法
- 不要忽视连接失败
- 不要无限重试
- 不要过度缓存
- 不要忽视权限检查
- 不要缺乏性能监控
- 不要阻塞式等待
- 不要忘记超时设置
延伸阅读
- MCP客户端规范:spec.modelcontextprotocol.io/client
- 异步编程模式:docs.python.org/3/library/a…
- 分布式系统设计:martin.kleppmann.com/ddia.html
- 可靠性工程:sre.google/books/
下一章预告:第10章将讲述如何进行MCP系统的监控与调试——包括日志、追踪、性能分析等关键内容。