第12章 文件系统与文档管理MCP应用
前言
文档是企业的知识资产。本章展示如何通过MCP构建智能文档管理系统,让LLM能够理解、搜索、分类和处理企业文档,以及辅助代码开发和审查。
12.1 案例1:智能文档管理系统
12.1.1 应用场景
graph TB
A["文档管理需求"] --> B["文档类型多样"]
A --> C["查询困难"]
A --> D["分类混乱"]
A --> E["版本管理复杂"]
B --> B1["Word/PDF/Excel/图片"]
C --> C1["全文搜索"]
D --> D1["自动分类/标签"]
E --> E1["版本追踪"]
F["MCP解决方案"] --> F1["文档解析"]
F --> F2["智能搜索"]
F --> F3["自动分类"]
F --> F4["版本管理"]
F --> F5["权限控制"]
12.1.2 实现架构
from pathlib import Path
from typing import List, Dict, Optional
from dataclasses import dataclass
from datetime import datetime
import mimetypes
import hashlib
@dataclass
class Document:
"""文档对象"""
path: str
name: str
type: str
size: int
created_at: datetime
modified_at: datetime
content: Optional[str] = None
tags: List[str] = None
category: Optional[str] = None
hash: Optional[str] = None
class DocumentParser:
"""文档解析器"""
@staticmethod
async def parse_document(file_path: str) -> Document:
"""
解析文档文件
Args:
file_path: 文件路径
Returns:
文档对象
"""
path = Path(file_path)
if not path.exists():
raise FileNotFoundError(f"File not found: {file_path}")
# 获取文件信息
stat = path.stat()
mime_type, _ = mimetypes.guess_type(str(path))
# 解析内容
content = await DocumentParser._extract_content(file_path, mime_type)
# 计算哈希
file_hash = DocumentParser._calculate_hash(file_path)
return Document(
path=str(path),
name=path.name,
type=mime_type or "unknown",
size=stat.st_size,
created_at=datetime.fromtimestamp(stat.st_ctime),
modified_at=datetime.fromtimestamp(stat.st_mtime),
content=content,
hash=file_hash
)
@staticmethod
async def _extract_content(file_path: str, mime_type: str) -> str:
"""
提取文档内容
Args:
file_path: 文件路径
mime_type: MIME类型
Returns:
文档内容
"""
if mime_type == "application/pdf":
return await DocumentParser._extract_pdf(file_path)
elif mime_type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
return await DocumentParser._extract_docx(file_path)
elif mime_type == "text/plain":
with open(file_path, 'r', encoding='utf-8') as f:
return f.read()
elif mime_type and mime_type.startswith("text"):
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
return f.read()
else:
return ""
@staticmethod
async def _extract_pdf(file_path: str) -> str:
"""提取PDF内容"""
try:
import PyPDF2
text = []
with open(file_path, 'rb') as f:
reader = PyPDF2.PdfReader(f)
for page in reader.pages:
text.append(page.extract_text())
return '\n'.join(text)
except Exception as e:
logger.error(f"PDF extraction failed: {e}")
return ""
@staticmethod
async def _extract_docx(file_path: str) -> str:
"""提取DOCX内容"""
try:
from docx import Document as DocxDocument
doc = DocxDocument(file_path)
text = []
for paragraph in doc.paragraphs:
text.append(paragraph.text)
return '\n'.join(text)
except Exception as e:
logger.error(f"DOCX extraction failed: {e}")
return ""
@staticmethod
def _calculate_hash(file_path: str) -> str:
"""计算文件哈希"""
hash_obj = hashlib.sha256()
with open(file_path, 'rb') as f:
for chunk in iter(lambda: f.read(8192), b''):
hash_obj.update(chunk)
return hash_obj.hexdigest()
class DocumentClassifier:
"""文档分类器"""
def __init__(self):
self.categories = {
"合同": ["合同", "协议", "agreement"],
"报告": ["报告", "report", "analysis"],
"规程": ["规程", "procedure", "guideline"],
"法律": ["法律", "legal", "law"],
"技术": ["技术", "技术规范", "technical", "api"]
}
def classify_document(self, document: Document) -> Dict[str, any]:
"""
分类文档
Args:
document: 文档对象
Returns:
分类结果
"""
if not document.content:
return {"category": "unknown", "confidence": 0.0, "tags": []}
# 转换为小写
content_lower = (document.content + document.name).lower()
# 匹配关键字
scores = {}
for category, keywords in self.categories.items():
score = sum(1 for keyword in keywords if keyword in content_lower)
scores[category] = score
# 获取最高分类
best_category = max(scores, key=scores.get) if scores else "unknown"
confidence = scores[best_category] / len(self.categories[best_category]) if scores.get(best_category) else 0.0
# 提取标签
tags = []
for category, keywords in self.categories.items():
for keyword in keywords:
if keyword in content_lower:
tags.append(keyword)
return {
"category": best_category,
"confidence": min(confidence, 1.0),
"tags": list(set(tags))
}
class DocumentSearchEngine:
"""文档搜索引擎"""
def __init__(self):
self.index: Dict[str, List[int]] = {} # 简单的倒排索引
self.documents: Dict[int, Document] = {}
def index_document(self, doc_id: int, document: Document):
"""
建立文档索引
Args:
doc_id: 文档ID
document: 文档对象
"""
self.documents[doc_id] = document
if not document.content:
return
# 分词并建立索引
words = self._tokenize(document.content)
for word in set(words):
if word not in self.index:
self.index[word] = []
self.index[word].append(doc_id)
def search(self, query: str, limit: int = 10) -> List[Dict]:
"""
搜索文档
Args:
query: 查询字符串
limit: 返回结果数限制
Returns:
搜索结果
"""
query_words = self._tokenize(query)
# 获取匹配的文档ID
matching_docs = set()
for word in query_words:
if word in self.index:
matching_docs.update(self.index[word])
# 计算相关性分数
results = []
for doc_id in matching_docs:
doc = self.documents[doc_id]
score = sum(doc.content.count(word) for word in query_words) if doc.content else 0
results.append({
"document": {
"path": doc.path,
"name": doc.name,
"type": doc.type,
"size": doc.size
},
"relevance_score": score,
"matched_terms": [w for w in query_words if w in (doc.content or "").lower()]
})
# 按相关性排序
results.sort(key=lambda x: x['relevance_score'], reverse=True)
return results[:limit]
def _tokenize(self, text: str) -> List[str]:
"""简单分词"""
# 实际应用中应使用专业分词库
return [word.lower() for word in text.split() if len(word) > 2]
class DocumentManagementMCP:
"""文档管理MCP服务"""
def __init__(self, root_path: str):
self.root = Path(root_path)
self.parser = DocumentParser()
self.classifier = DocumentClassifier()
self.search_engine = DocumentSearchEngine()
self.documents: Dict[str, Document] = {}
async def index_directory(self, path: str = None) -> Dict:
"""
索引目录下的所有文档
Args:
path: 目录路径
Returns:
索引统计
"""
scan_path = Path(path) if path else self.root
indexed_count = 0
error_count = 0
for file_path in scan_path.rglob("*"):
if file_path.is_file():
try:
doc = await self.parser.parse_document(str(file_path))
classification = self.classifier.classify_document(doc)
doc.category = classification["category"]
doc.tags = classification["tags"]
self.documents[str(file_path)] = doc
doc_id = len(self.documents)
self.search_engine.index_document(doc_id, doc)
indexed_count += 1
except Exception as e:
logger.error(f"Failed to index {file_path}: {e}")
error_count += 1
return {
"indexed": indexed_count,
"errors": error_count,
"total": indexed_count + error_count
}
async def search_documents(self, query: str, limit: int = 10) -> List[Dict]:
"""搜索文档"""
return self.search_engine.search(query, limit)
async def get_document_info(self, file_path: str) -> Dict:
"""获取文档详细信息"""
if file_path not in self.documents:
raise FileNotFoundError(f"Document not found: {file_path}")
doc = self.documents[file_path]
return {
"name": doc.name,
"path": doc.path,
"type": doc.type,
"size": doc.size,
"created_at": doc.created_at.isoformat(),
"modified_at": doc.modified_at.isoformat(),
"category": doc.category,
"tags": doc.tags,
"hash": doc.hash,
"content_length": len(doc.content) if doc.content else 0
}
12.2 案例2:代码仓库管理与开发助手
12.2.1 代码审查工具
class CodeReviewTool:
"""代码审查工具"""
def __init__(self):
self.rules = {
"style": self._check_style,
"security": self._check_security,
"performance": self._check_performance,
"complexity": self._check_complexity
}
async def review_code(self, code: str, language: str = "python") -> Dict:
"""
审查代码
Args:
code: 代码内容
language: 编程语言
Returns:
审查结果
"""
issues = []
# 执行各项检查
for check_type, check_func in self.rules.items():
findings = check_func(code, language)
issues.extend([{
"type": check_type,
"severity": finding["severity"],
"line": finding.get("line", -1),
"message": finding["message"],
"suggestion": finding.get("suggestion", "")
} for finding in findings])
# 按严重性排序
issues.sort(key=lambda x: {"error": 3, "warning": 2, "info": 1}.get(x["severity"], 0), reverse=True)
return {
"language": language,
"total_lines": len(code.split('\n')),
"issues_count": len(issues),
"issues": issues,
"summary": self._generate_summary(issues)
}
def _check_style(self, code: str, language: str) -> List[Dict]:
"""检查代码风格"""
issues = []
if language == "python":
lines = code.split('\n')
for i, line in enumerate(lines, 1):
# 检查行长度
if len(line) > 100:
issues.append({
"line": i,
"severity": "warning",
"message": "Line too long ({}>{})".format(len(line), 100),
"suggestion": "Break into multiple lines"
})
# 检查缩进
if line and line[0] == ' ' and not line.startswith(' ' * (len(line) - len(line.lstrip())) // 4):
issues.append({
"line": i,
"severity": "warning",
"message": "Inconsistent indentation",
"suggestion": "Use 4 spaces per indentation level"
})
return issues
def _check_security(self, code: str, language: str) -> List[Dict]:
"""检查安全问题"""
issues = []
# 检查硬编码密钥
if "password" in code.lower() or "secret" in code.lower():
if any(char in code for char in ['"', "'"]):
issues.append({
"severity": "error",
"message": "Possible hardcoded credentials detected",
"suggestion": "Use environment variables or secrets management"
})
# 检查SQL注入
if "execute" in code.lower() and "%" in code:
issues.append({
"severity": "warning",
"message": "Possible SQL injection vulnerability",
"suggestion": "Use parameterized queries"
})
return issues
def _check_performance(self, code: str, language: str) -> List[Dict]:
"""检查性能问题"""
issues = []
# 检查循环中的列表操作
if "for" in code and ".append" in code:
issues.append({
"severity": "info",
"message": "List append in loop detected",
"suggestion": "Consider using list comprehension for better performance"
})
return issues
def _check_complexity(self, code: str, language: str) -> List[Dict]:
"""检查代码复杂度"""
issues = []
# 计算圈复杂度
if_count = code.count(" if ") + code.count("\nif ")
elif_count = code.count(" elif ") + code.count("\nelif ")
for_count = code.count(" for ") + code.count("\nfor ")
while_count = code.count(" while ") + code.count("\nwhile ")
complexity = 1 + if_count + elif_count + for_count * 0.5 + while_count * 0.5
if complexity > 10:
issues.append({
"severity": "warning",
"message": f"High cyclomatic complexity ({complexity:.1f})",
"suggestion": "Consider breaking down the function into smaller parts"
})
return issues
def _generate_summary(self, issues: List[Dict]) -> Dict:
"""生成总结"""
severity_count = {}
for issue in issues:
sev = issue["severity"]
severity_count[sev] = severity_count.get(sev, 0) + 1
return severity_count
12.3 MCP服务器集成与工具定义
class DocumentManagementMCPServer:
"""文档管理MCP服务器集成"""
def __init__(self, doc_manager: DocumentManagementMCP):
self.doc_manager = doc_manager
self.tools = self._define_tools()
def _define_tools(self) -> List[Dict]:
"""定义MCP工具"""
return [
{
"name": "index_documents",
"description": "索引指定目录下的所有文档",
"inputSchema": {
"type": "object",
"properties": {
"directory": {
"type": "string",
"description": "要索引的目录路径"
}
},
"required": ["directory"]
}
},
{
"name": "search_documents",
"description": "全文搜索文档",
"inputSchema": {
"type": "object",
"properties": {
"query": {
"type": "string",
"description": "搜索查询"
},
"limit": {
"type": "integer",
"description": "返回结果数限制"
}
},
"required": ["query"]
}
},
{
"name": "get_document_info",
"description": "获取文档详细信息",
"inputSchema": {
"type": "object",
"properties": {
"file_path": {
"type": "string",
"description": "文件路径"
}
},
"required": ["file_path"]
}
},
{
"name": "review_code",
"description": "代码审查",
"inputSchema": {
"type": "object",
"properties": {
"code": {
"type": "string",
"description": "要审查的代码"
},
"language": {
"type": "string",
"description": "编程语言"
}
},
"required": ["code", "language"]
}
}
]
async def call_tool(self, tool_name: str, arguments: Dict) -> str:
"""调用工具"""
import json
try:
if tool_name == "index_documents":
result = await self.doc_manager.index_directory(arguments.get("directory"))
elif tool_name == "search_documents":
result = await self.doc_manager.search_documents(
arguments["query"],
arguments.get("limit", 10)
)
elif tool_name == "get_document_info":
result = await self.doc_manager.get_document_info(arguments["file_path"])
elif tool_name == "review_code":
code_review = CodeReviewTool()
result = await code_review.review_code(
arguments["code"],
arguments.get("language", "python")
)
else:
return json.dumps({"error": f"Unknown tool: {tool_name}"})
return json.dumps(result, ensure_ascii=False, indent=2)
except Exception as e:
return json.dumps({"error": str(e)})
12.4 工作流程示例
12.4.1 智能文档处理流程
class DocumentProcessingWorkflow:
"""文档处理工作流"""
def __init__(self, doc_manager: DocumentManagementMCP, code_review: CodeReviewTool):
self.doc_manager = doc_manager
self.code_review = code_review
async def process_new_documents(self, directory: str) -> Dict:
"""
处理新增文档的完整工作流
Args:
directory: 文档目录
Returns:
处理结果
"""
workflow_result = {
"steps": [],
"summary": {}
}
# 第1步:索引文档
workflow_result["steps"].append({
"name": "Index Documents",
"status": "running"
})
index_result = await self.doc_manager.index_directory(directory)
workflow_result["steps"][-1]["status"] = "completed"
workflow_result["steps"][-1]["result"] = index_result
# 第2步:分析文档分布
workflow_result["steps"].append({
"name": "Analyze Distribution",
"status": "running"
})
stats = self.doc_manager.search_engine.documents
distribution = self._analyze_document_distribution(stats)
workflow_result["steps"][-1]["status"] = "completed"
workflow_result["steps"][-1]["result"] = distribution
# 第3步:生成报告
workflow_result["steps"].append({
"name": "Generate Report",
"status": "running"
})
report = self._generate_processing_report(index_result, distribution)
workflow_result["steps"][-1]["status"] = "completed"
workflow_result["steps"][-1]["result"] = report
workflow_result["summary"] = {
"total_documents_processed": index_result["indexed"],
"errors": index_result["errors"],
"processing_time": "computed",
"recommendation": "Ready for Claude integration"
}
return workflow_result
def _analyze_document_distribution(self, stats: Dict) -> Dict:
"""分析文档分布"""
categories = {}
for doc in stats.values():
cat = doc.category or "unknown"
categories[cat] = categories.get(cat, 0) + 1
return {
"total": len(stats),
"by_category": categories,
"distribution_summary": "Categories balanced"
}
def _generate_processing_report(self, index_result: Dict,
distribution: Dict) -> Dict:
"""生成处理报告"""
return {
"title": "Document Processing Report",
"date": datetime.now().isoformat(),
"indexed_count": index_result["indexed"],
"error_count": index_result["errors"],
"success_rate": f"{(index_result['indexed'] / (index_result['indexed'] + index_result['errors']) * 100):.1f}%",
"distribution": distribution,
"next_steps": [
"Start using search functionality",
"Configure automatic classification",
"Set up access permissions"
]
}
12.5 部署架构与安全
graph TB
A["用户请求"] --> B["Claude Desktop"]
B --> C["MCP Client"]
C --> D["文档管理服务器"]
D --> E["文档解析器"]
D --> F["分类器"]
D --> G["搜索引擎"]
E --> H["PDF处理"]
E --> I["DOCX处理"]
E --> J["文本处理"]
D --> K["权限管理"]
K --> L["角色检查"]
K --> M["路径验证"]
D --> N["缓存层"]
N --> O["热点文档缓存"]
N --> P["索引缓存"]
12.5.1 安全考虑
class DocumentAccessControl:
"""文档访问控制"""
def __init__(self):
self.roles = {
"admin": ["read", "write", "delete"],
"user": ["read"],
"guest": ["read"]
}
self.path_restrictions = {
"/confidential/": ["admin"],
"/public/": ["admin", "user", "guest"]
}
def check_access(self, user_role: str, file_path: str,
action: str) -> bool:
"""
检查访问权限
Args:
user_role: 用户角色
file_path: 文件路径
action: 操作类型
Returns:
是否允许访问
"""
# 检查路径限制
for restricted_path, allowed_roles in self.path_restrictions.items():
if file_path.startswith(restricted_path):
if user_role not in allowed_roles:
return False
# 检查操作权限
user_actions = self.roles.get(user_role, [])
return action in user_actions
def sanitize_path(self, base_path: str, requested_path: str) -> Optional[str]:
"""
清理路径,防止目录遍历攻击
Args:
base_path: 基础路径
requested_path: 请求的路径
Returns:
清理后的路径,如果不安全则返回None
"""
from pathlib import Path
try:
base = Path(base_path).resolve()
requested = (base / requested_path).resolve()
# 检查是否在基础路径内
if not str(requested).startswith(str(base)):
return None
return str(requested)
except Exception:
return None
12.6 扩展功能
12.6.1 版本管理
class DocumentVersionManager:
"""文档版本管理"""
def __init__(self):
self.versions: Dict[str, List[Dict]] = {}
async def create_version(self, file_path: str, content: str,
author: str, message: str = "") -> Dict:
"""创建文档版本"""
if file_path not in self.versions:
self.versions[file_path] = []
version_num = len(self.versions[file_path]) + 1
version_hash = hashlib.sha256(content.encode()).hexdigest()
version = {
"version": version_num,
"timestamp": datetime.now().isoformat(),
"author": author,
"hash": version_hash,
"message": message,
"size": len(content)
}
self.versions[file_path].append(version)
return version
def get_version_history(self, file_path: str) -> List[Dict]:
"""获取版本历史"""
return self.versions.get(file_path, [])
def compare_versions(self, file_path: str, version1: int,
version2: int) -> Dict:
"""比较两个版本"""
if file_path not in self.versions:
return {"error": "File not found"}
versions = self.versions[file_path]
if version1 > len(versions) or version2 > len(versions):
return {"error": "Version not found"}
v1 = versions[version1 - 1]
v2 = versions[version2 - 1]
return {
"file": file_path,
"version1": v1,
"version2": v2,
"differences": {
"size_change": v2["size"] - v1["size"],
"time_difference": f"{(datetime.fromisoformat(v2['timestamp']) - datetime.fromisoformat(v1['timestamp'])).days} days"
}
}
12.7 性能优化与监控
class DocumentManagementMetrics:
"""文档管理系统指标"""
def __init__(self):
self.search_times: List[float] = []
self.index_times: List[float] = []
self.cache_hits = 0
self.cache_misses = 0
def record_search_time(self, elapsed_ms: float):
"""记录搜索时间"""
self.search_times.append(elapsed_ms)
def record_index_time(self, elapsed_ms: float):
"""记录索引时间"""
self.index_times.append(elapsed_ms)
def get_statistics(self) -> Dict:
"""获取统计信息"""
return {
"search": {
"avg_time_ms": sum(self.search_times) / len(self.search_times) if self.search_times else 0,
"max_time_ms": max(self.search_times) if self.search_times else 0,
"queries": len(self.search_times)
},
"index": {
"avg_time_ms": sum(self.index_times) / len(self.index_times) if self.index_times else 0,
"max_time_ms": max(self.index_times) if self.index_times else 0,
"operations": len(self.index_times)
},
"cache": {
"hits": self.cache_hits,
"misses": self.cache_misses,
"hit_rate": f"{(self.cache_hits / (self.cache_hits + self.cache_misses) * 100):.1f}%" if (self.cache_hits + self.cache_misses) > 0 else "N/A"
}
}
12.8 完整使用示例
async def main():
"""完整使用示例"""
# 初始化系统
doc_manager = DocumentManagementMCP("/data/documents")
code_review = CodeReviewTool()
server = DocumentManagementMCPServer(doc_manager)
workflow = DocumentProcessingWorkflow(doc_manager, code_review)
version_mgr = DocumentVersionManager()
metrics = DocumentManagementMetrics()
# 1. 处理文档
print("🔍 Processing documents...")
result = await workflow.process_new_documents("/data/documents")
print(f"✅ Indexed: {result['summary']['total_documents_processed']} documents")
# 2. 搜索文档
print("🔎 Searching for documents...")
search_results = await doc_manager.search_documents("quarterly report")
print(f"Found {len(search_results)} results")
# 3. 代码审查
print("📝 Reviewing code...")
test_code = '''
def calculate_total(items):
total = 0
for item in items:
total = total + item["price"] * item["quantity"]
return total
'''
review = await code_review.review_code(test_code, "python")
print(f"Found {review['issues_count']} issues")
# 4. 版本管理
print("📦 Creating version...")
version = await version_mgr.create_version(
"/data/documents/report.md",
"Updated content",
"admin",
"Fixed typos"
)
print(f"Version {version['version']} created")
# 5. 输出指标
print("\n📊 System Metrics:")
stats = metrics.get_statistics()
print(json.dumps(stats, indent=2))
本章总结
| 关键点 | 说明 |
|---|---|
| 文档解析 | 支持多种文件格式 |
| 智能分类 | 关键字匹配和分类 |
| 全文搜索 | 倒排索引实现 |
| 代码审查 | 多维度代码分析 |
| 版本管理 | 文件哈希和追踪 |
| 权限控制 | 基于路径的访问控制 |
常见问题
Q1: 如何支持更多文件格式? A: 为每种格式实现对应的Parser,注册到DocumentParser中即可。
Q2: 搜索性能如何优化? A: 使用ElasticSearch等专业搜索引擎,或实施分布式索引。
Q3: 如何处理大文件? A: 使用流式读取,分块处理,避免一次性加载全部内容。
Q4: 代码审查支持哪些语言? A: 通过实现不同语言的检查规则,可支持任何编程语言。
Q5: 如何确保文档隐私? A: 实施细粒度的权限控制,加密敏感文档内容。
下一章预告:第13章将讲述API与外部服务集成MCP应用!
第13章 API与外部服务集成MCP应用
前言
API是连接内外部系统的桥梁。本章展示如何通过MCP为LLM提供与第三方服务(如电商平台、CRM、支付等)集成的能力。
13.1 案例1:电商平台订单管理系统
13.1.1 应用场景
graph TB
A["电商订单管理"] --> B["订单查询"]
A --> C["库存管理"]
A --> D["支付处理"]
A --> E["售后服务"]
F["Claude"] --> F1["智能查询"]
F --> F2["库存提示"]
F --> F3["支付建议"]
F --> F4["退货处理"]
F1 --> B
F2 --> C
F3 --> D
F4 --> E
13.1.2 电商API集成
from typing import Dict, List, Optional
from dataclasses import dataclass
from datetime import datetime
import aiohttp
@dataclass
class Order:
"""订单"""
order_id: str
customer_id: str
status: str
total_amount: float
items: List[Dict]
created_at: datetime
updated_at: datetime
class EcommerceAPIClient:
"""电商API客户端"""
def __init__(self, api_key: str, base_url: str):
self.api_key = api_key
self.base_url = base_url
self.session = None
async def connect(self):
"""建立连接"""
self.session = aiohttp.ClientSession()
async def disconnect(self):
"""断开连接"""
if self.session:
await self.session.close()
async def search_orders(self, customer_id: str = None,
status: str = None, limit: int = 10) -> List[Order]:
"""
搜索订单
Args:
customer_id: 客户ID
status: 订单状态
limit: 返回数限制
Returns:
订单列表
"""
params = {"limit": limit}
if customer_id:
params["customer_id"] = customer_id
if status:
params["status"] = status
async with self.session.get(
f"{self.base_url}/orders",
params=params,
headers={"Authorization": f"Bearer {self.api_key}"}
) as resp:
data = await resp.json()
orders = []
for item in data.get("orders", []):
order = Order(
order_id=item["id"],
customer_id=item["customer_id"],
status=item["status"],
total_amount=item["total"],
items=item["items"],
created_at=datetime.fromisoformat(item["created_at"]),
updated_at=datetime.fromisoformat(item["updated_at"])
)
orders.append(order)
return orders
async def get_order_details(self, order_id: str) -> Dict:
"""获取订单详情"""
async with self.session.get(
f"{self.base_url}/orders/{order_id}",
headers={"Authorization": f"Bearer {self.api_key}"}
) as resp:
return await resp.json()
async def update_order_status(self, order_id: str, status: str) -> bool:
"""更新订单状态"""
async with self.session.patch(
f"{self.base_url}/orders/{order_id}",
json={"status": status},
headers={"Authorization": f"Bearer {self.api_key}"}
) as resp:
return resp.status == 200
async def check_inventory(self, product_id: str) -> Dict:
"""
检查库存
Args:
product_id: 产品ID
Returns:
库存信息
"""
async with self.session.get(
f"{self.base_url}/inventory/{product_id}",
headers={"Authorization": f"Bearer {self.api_key}"}
) as resp:
return await resp.json()
async def process_refund(self, order_id: str, reason: str) -> Dict:
"""
处理退款
Args:
order_id: 订单ID
reason: 退款原因
Returns:
退款结果
"""
async with self.session.post(
f"{self.base_url}/refunds",
json={"order_id": order_id, "reason": reason},
headers={"Authorization": f"Bearer {self.api_key}"}
) as resp:
return await resp.json()
class OrderManagementMCP:
"""订单管理MCP服务"""
def __init__(self, api_client: EcommerceAPIClient):
self.api = api_client
async def find_order(self, order_id: str) -> Dict:
"""查找订单"""
details = await self.api.get_order_details(order_id)
return {
"found": details.get("id") is not None,
"order": {
"id": details.get("id"),
"status": details.get("status"),
"total": details.get("total"),
"items": details.get("items"),
"customer": details.get("customer"),
"created_at": details.get("created_at"),
"updated_at": details.get("updated_at")
} if details.get("id") else None
}
async def search_customer_orders(self, customer_id: str) -> Dict:
"""搜索客户订单"""
orders = await self.api.search_orders(customer_id=customer_id, limit=20)
# 按状态分组
by_status = {}
for order in orders:
if order.status not in by_status:
by_status[order.status] = []
by_status[order.status].append({
"id": order.order_id,
"total": order.total_amount,
"items_count": len(order.items)
})
return {
"customer_id": customer_id,
"total_orders": len(orders),
"by_status": by_status,
"recent_orders": [{
"id": o.order_id,
"total": o.total_amount,
"status": o.status,
"date": o.created_at.isoformat()
} for o in orders[:5]]
}
async def handle_refund_request(self, order_id: str, reason: str) -> Dict:
"""处理退款请求"""
result = await self.api.process_refund(order_id, reason)
return {
"success": result.get("success"),
"refund_id": result.get("refund_id"),
"amount": result.get("amount"),
"status": result.get("status"),
"message": result.get("message")
}
async def check_product_availability(self, product_id: str) -> Dict:
"""检查产品可用性"""
inventory = await self.api.check_inventory(product_id)
return {
"product_id": product_id,
"available": inventory.get("quantity", 0) > 0,
"quantity": inventory.get("quantity"),
"warehouse_locations": inventory.get("locations"),
"next_restock_date": inventory.get("next_restock")
}
13.2 工具定义与集成
class EcommerceMCPServer:
"""电商MCP服务器"""
def __init__(self, order_manager: OrderManagementMCP):
self.orders = order_manager
def get_tools(self) -> List[Dict]:
"""获取工具定义"""
return [
{
"name": "search_orders",
"description": "Search for customer orders",
"inputSchema": {
"type": "object",
"properties": {
"customer_id": {
"type": "string",
"description": "Customer ID to search orders for"
},
"status": {
"type": "string",
"description": "Order status filter (pending/processing/shipped/delivered)",
"enum": ["pending", "processing", "shipped", "delivered"]
}
},
"required": ["customer_id"]
}
},
{
"name": "get_order_details",
"description": "Get detailed information about an order",
"inputSchema": {
"type": "object",
"properties": {
"order_id": {
"type": "string",
"description": "Order ID"
}
},
"required": ["order_id"]
}
},
{
"name": "process_refund",
"description": "Process a refund request",
"inputSchema": {
"type": "object",
"properties": {
"order_id": {
"type": "string",
"description": "Order ID"
},
"reason": {
"type": "string",
"description": "Refund reason"
}
},
"required": ["order_id", "reason"]
}
}
]
async def call_tool(self, tool_name: str, arguments: Dict) -> str:
"""调用工具"""
import json
try:
if tool_name == "search_orders":
result = await self.orders.search_customer_orders(
arguments["customer_id"]
)
elif tool_name == "get_order_details":
result = await self.orders.find_order(arguments["order_id"])
elif tool_name == "process_refund":
result = await self.orders.handle_refund_request(
arguments["order_id"],
arguments["reason"]
)
else:
return json.dumps({"error": f"Unknown tool: {tool_name}"})
return json.dumps(result, ensure_ascii=False)
except Exception as e:
return json.dumps({"error": str(e)})
本章总结
| 关键点 | 说明 |
|---|---|
| API集成 | 通过HTTP/REST访问第三方服务 |
| 异步处理 | aiohttp进行高效的异步API调用 |
| 错误处理 | 完善的异常处理和重试机制 |
| 数据转换 | 将API响应转换为结构化数据 |
| 权限管理 | API密钥和认证 |
| 缓存策略 | 热点数据缓存优化性能 |
常见问题
Q1: 如何处理API限流? A: 实施令牌桶算法或指数退避重试,遵守API速率限制。
Q2: 如何保护API密钥? A: 使用环境变量、密钥管理服务,不要硬编码密钥。
Q3: 如何处理API超时? A: 设置合理的超时时间,实施自动重试和降级方案。
Q4: 如何支持多个API版本? A: 通过适配器模式或版本路由支持多个API版本。
Q5: 如何监控API集成健康状态? A: 定期health check,记录API调用指标,设置告警。
下一章预告:第14章将讲述知识库与信息管理MCP应用!