MCP × 亚马逊数据:用Model Context Protocol构建实时选品AI工作流

0 阅读5分钟

亚马逊MCP数据运营:接入协议,用真实数据赢得选品攻坚战.png

前言

MCP(Model Context Protocol)在2026年已经不再是"概念",它正在成为AI工程师构建生产级工具链的核心协议标准。本文聚焦于一个非常具体的工程场景:如何利用MCP协议,将亚马逊实时数据接入AI工作流,实现亚马逊MCP数据运营的完整闭环。

适合人群:AI应用工程师、跨境电商技术负责人、对MCP协议感兴趣的开发者。


MCP协议核心原理

MCP采用JSON-RPC 2.0作为底层传输协议,支持stdio和HTTP+SSE两种传输方式。其最核心的设计决策是Server-driven Tool Declaration:工具的能力描述由Server端声明,Host/Client无需预先知道可用工具的列表,一切在运行时动态协商。

这个设计让"为AI配备实时数据工具"这件事变得极度灵活。我们只需要把Pangolinfo的亚马逊数据采集能力封装成符合MCP规范的Server,AI模型就可以在任何支持MCP的Host环境中调用它。

工具暴露规范

{
  "name": "scan_bsr_opportunities",
  "description": "实时扫描亚马逊指定类目BSR机会品,基于Pangolinfo分钟级数据更新",
  "inputSchema": {
    "type": "object",
    "properties": {
      "category_id": {"type": "string", "description": "亚马逊类目节点ID"},
      "marketplace": {"type": "string", "default": "US"},
      "max_reviews": {"type": "integer", "default": 150},
      "days": {"type": "integer", "default": 7}
    },
    "required": ["category_id"]
  }
}

系统架构设计

┌─────────────────────────────────────────────────┐
│                   MCP Host                       │
│  ┌──────────────┐    ┌──────────────────────┐   │
│  │  AI Model    │◄──►│   MCP Client         │   │
│  │  (Claude)    │    │   (Session Manager)  │   │
│  └──────────────┘    └──────────┬───────────┘   │
└────────────────────────────────┼────────────────┘
                                 │ JSON-RPC (stdio/SSE)
┌────────────────────────────────▼────────────────┐
│               MCP Server                         │
│  ┌────────────────────────────────────────────┐  │
│  │         Tool Registry                      │  │
│  │  scan_bsr_opportunities()                 │  │
│  │  get_product_reviews_analysis()           │  │
│  │  get_ad_placement_intelligence()          │  │
│  └────────────────┬───────────────────────────┘  │
└───────────────────┼─────────────────────────────┘
                    │ HTTP REST
┌───────────────────▼─────────────────────────────┐
│          Pangolinfo Data Layer                   │
│  ┌────────────────┐  ┌──────────────────────┐   │
│  │  Scrape API    │  │  Reviews Scraper API  │   │
│  │  (BSR/榜单/   │  │  (评论/Customer Says) │   │
│  │   广告位数据)  │  └──────────────────────┘   │
│  └────────────────┘                             │
│  分钟级更新 | SP广告位98%采集率 | 结构化JSON输出 │
└─────────────────────────────────────────────────┘
         │ 亚马逊平台公开数据采集
┌────────▼────────────────────────────────────────┐
│              Amazon Marketplace                  │
│  BSR榜单 | New Releases | ABA搜索词 | SP广告位   │
└─────────────────────────────────────────────────┘

核心实现

MCP Server(工具暴露层)

# amazon_mcp_server.py
import asyncio
import json
import requests
import os
from mcp.server import Server
from mcp.server.stdio import stdio_server
from mcp.server.models import InitializationOptions
from mcp import types

server = Server("pangolinfo-amazon")
API_KEY = os.getenv("PANGOLINFO_API_KEY")
BASE = "https://api.pangolinfo.com/v2"

@server.list_tools()
async def list_tools():
    return [
        types.Tool(
            name="scan_bsr_opportunities",
            description="实时扫描亚马逊类目BSR机会品(Pangolinfo分钟级数据)",
            inputSchema={
                "type": "object",
                "properties": {
                    "category_id": {"type": "string"},
                    "marketplace": {"type": "string", "default": "US"},
                    "days": {"type": "integer", "default": 7},
                    "max_reviews": {"type": "integer", "default": 150},
                    "min_sales": {"type": "integer", "default": 300}
                },
                "required": ["category_id"]
            }
        ),
        types.Tool(
            name="analyze_product_reviews",
            description="获取ASIN差评数据并分析产品迭代优先级(含Customer Says摘要)",
            inputSchema={
                "type": "object",
                "properties": {
                    "asin": {"type": "string"},
                    "marketplace": {"type": "string", "default": "US"},
                    "focus": {"type": "string", "default": "negative"}
                },
                "required": ["asin"]
            }
        )
    ]


@server.call_tool()
async def call_tool(name: str, arguments: dict):
    headers = {"Authorization": f"Bearer {API_KEY}", "Content-Type": "application/json"}
    
    if name == "scan_bsr_opportunities":
        resp = requests.post(f"{BASE}/amazon/bestsellers", json={
            "category_id": arguments["category_id"],
            "country": arguments.get("marketplace", "US"),
            "days": arguments.get("days", 7),
            "filters": {
                "max_reviews": arguments.get("max_reviews", 150),
                "min_monthly_sales": arguments.get("min_sales", 300)
            }
        }, headers=headers, timeout=30)
        
        data = resp.json()
        result = {
            "opportunities_count": len(data.get("results", [])),
            "top_opportunities": [{
                "asin": p["asin"],
                "title": p["title"][:60] + "...",
                "bsr_rank": p["bsr_current"],
                "bsr_change": f"+{p['bsr_change']}",
                "reviews": p["review_count"],
                "monthly_sales": p["monthly_sales"],
                "price": p["price"]
            } for p in data.get("results", [])[:10]]
        }
    
    elif name == "analyze_product_reviews":
        resp = requests.post(f"{BASE}/amazon/reviews", json={
            "asin": arguments["asin"],
            "country": arguments.get("marketplace", "US"),
            "rating_filter": "critical",
            "include_customer_says": True,
            "max_pages": 10
        }, headers=headers, timeout=60)
        
        data = resp.json()
        result = {
            "asin": arguments["asin"],
            "customer_says_summary": data.get("customer_says", ""),
            "negative_themes": data.get("negative_themes", [])[:5],
            "improvement_priorities": data.get("improvement_suggestions", [])
        }
    
    else:
        result = {"error": f"Unknown tool: {name}"}
    
    return [types.TextContent(type="text", text=json.dumps(result, ensure_ascii=False, indent=2))]


async def main():
    async with stdio_server() as (r, w):
        await server.run(r, w, InitializationOptions(
            server_name="pangolinfo-amazon",
            server_version="1.0.0"
        ))

if __name__ == "__main__":
    asyncio.run(main())

工作流客户端

# workflow_client.py
import asyncio
import json
import anthropic
from mcp import ClientSession, StdioServerParameters
from mcp.client.stdio import stdio_client

async def run_selection_analysis(category_id: str, marketplace: str = "US"):
    """完整的亚马逊MCP数据运营工作流"""
    
    params = StdioServerParameters(
        command="python",
        args=["amazon_mcp_server.py"]
    )
    
    client = anthropic.Anthropic()
    
    async with stdio_client(params) as (r, w):
        async with ClientSession(r, w) as session:
            await session.initialize()
            
            # 获取工具列表
            tools_resp = await session.list_tools()
            tools = [{
                "name": t.name,
                "description": t.description,
                "input_schema": t.inputSchema
            } for t in tools_resp.tools]
            
            messages = [{
                "role": "user",
                "content": f"""
                请对亚马逊 {marketplace}{category_id} 类目做一次完整的机会分析:
                1. 扫描过去7天的BSR机会品(评论<150,月销>300)
                2. 对评分最高的机会品分析其差评和改进方向
                3. 给出3个最值得关注的机会品推荐 + 切入建议
                """
            }]
            
            # AI 推理循环
            while True:
                response = client.messages.create(
                    model="claude-opus-4-5",
                    max_tokens=4096,
                    tools=tools,
                    messages=messages
                )
                
                if response.stop_reason == "end_turn":
                    return next((b.text for b in response.content if hasattr(b, "text")), "")
                
                if response.stop_reason == "tool_use":
                    tool_results = []
                    for block in response.content:
                        if block.type == "tool_use":
                            result = await session.call_tool(block.name, block.input)
                            tool_results.append({
                                "type": "tool_result",
                                "tool_use_id": block.id,
                                "content": result.content[0].text
                            })
                    
                    messages.extend([
                        {"role": "assistant", "content": response.content},
                        {"role": "user", "content": tool_results}
                    ])


if __name__ == "__main__":
    result = asyncio.run(run_selection_analysis("2975312011"))
    print(result)

最佳实践

数据缓存策略

import redis
import hashlib

cache = redis.Redis()

def cached_api_call(endpoint: str, payload: dict, ttl: int = 300):
    """5分钟TTL缓存,避免重复请求相同类目数据"""
    cache_key = hashlib.md5(f"{endpoint}{json.dumps(payload, sort_keys=True)}".encode()).hexdigest()
    
    cached = cache.get(cache_key)
    if cached:
        return json.loads(cached)
    
    result = requests.post(endpoint, json=payload, headers=get_headers()).json()
    cache.setex(cache_key, ttl, json.dumps(result))
    return result

错误处理最佳实践

from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type

@retry(
    stop=stop_after_attempt(3),
    wait=wait_exponential(multiplier=1, min=2, max=10),
    retry=retry_if_exception_type(requests.HTTPError)
)
def resilient_api_call(url, payload, headers):
    resp = requests.post(url, json=payload, headers=headers, timeout=30)
    resp.raise_for_status()
    return resp.json()

总结

本文展示了基于MCP协议的亚马逊数据运营工作流的完整工程实现。核心设计要点:

  • MCP Server负责将Pangolinfo API能力封装为标准化工具,保证AI可动态发现和调用
  • 工具描述的质量直接影响AI选择工具的准确性,description字段要写得具体
  • 数据质量是整个系统的上限:Pangolinfo的分钟级更新和结构化JSON输出是让AI推理有意义的前提
  • 生产环境建议加入缓存层和重试机制,保证工作流稳定性

完整代码已整理完毕。如需AMZ Data Tracker可视化版本的集成方案,欢迎在评论区留言交流。