利用亚马逊 API 实现商品详情实时数据采集(开发接入示例)

84 阅读6分钟

在当今数字化的商业环境中,实时获取亚马逊商品数据对于市场分析、竞品监控和价格追踪至关重要。亚马逊提供了官方的 SP-API (Selling Partner API),允许开发者合法地获取商品信息。本文将介绍如何使用 Python 接入亚马逊 SP-API,实现商品详情的实时数据采集。

前期准备 在开始开发之前,需要完成以下准备工作:

注册亚马逊api请求账户 申请接口账号 创建 SP-API 应用并获取必要的凭证 安装必要的 Python 库 接入亚马逊 SP-API 的步骤 亚马逊 SP-API 采用 OAuth 2.0 认证机制,主要包括以下几个步骤:

生成 LWA (Login with Amazon) 访问令牌 生成 API 请求签名 调用商品信息相关的 API 端点 下面是一个完整的 Python 实现示例:

import requests import time import hmac import hashlib import base64 import json from urllib.parse import quote_plus from datetime import datetime

class AmazonProductDataCollector: def init(self, config): """初始化亚马逊数据采集器""" self.client_id = config['client_id'] self.client_secret = config['client_secret'] self.refresh_token = config['refresh_token'] self.access_key = config['access_key'] self.secret_key = config['secret_key'] self.role_arn = config['role_arn'] self.region = config['region'] self.marketplace_id = config['marketplace_id']

    # API端点
    self.lwa_endpoint = "https://api.amazon.com/auth/o2/token"
    self.api_endpoint = f"https://sellingpartnerapi-{self.region}.amazon.com"
    
    # 缓存的访问令牌
    self.access_token = None
    self.token_expiry = 0
    
    # 获取STS凭证
    self.sts_credentials = self._get_sts_credentials()

def _get_sts_credentials(self):
    """获取AWS STS凭证"""
    url = "https://sts.amazonaws.com/"
    timestamp = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ")
    
    # 构建查询字符串
    query_params = {
        "Action": "AssumeRole",
        "DurationSeconds": "3600",
        "RoleArn": self.role_arn,
        "RoleSessionName": "amazon-api-session",
        "Version": "2011-06-15",
        "X-Amz-Algorithm": "AWS4-HMAC-SHA256",
        "X-Amz-Credential": f"{self.access_key}/{timestamp[:10]}/{self.region}/sts/aws4_request",
        "X-Amz-Date": timestamp,
        "X-Amz-SignedHeaders": "host"
    }
    
    # 排序参数并构建待签名字符串
    sorted_params = sorted(query_params.items())
    canonical_querystring = "&".join([f"{k}={quote_plus(str(v))}" for k, v in sorted_params])
    
    # 构建规范请求
    canonical_request = f"GET\n/\n{canonical_querystring}\nhost:sts.amazonaws.com\n\nhost\nUNSIGNED-PAYLOAD"
    
    # 构建待签名字符串
    string_to_sign = f"AWS4-HMAC-SHA256\n{timestamp}\n{timestamp[:10]}/{self.region}/sts/aws4_request\n{hashlib.sha256(canonical_request.encode()).hexdigest()}"
    
    # 计算签名
    k_date = self._sign(f"AWS4{self.secret_key}", timestamp[:10])
    k_region = self._sign(k_date, self.region)
    k_service = self._sign(k_region, "sts")
    k_signing = self._sign(k_service, "aws4_request")
    signature = self._sign(k_signing, string_to_sign, hex=True)
    
    # 发送请求
    url_with_params = f"{url}?{canonical_querystring}&X-Amz-Signature={signature}"
    response = requests.get(url_with_params)
    
    if response.status_code == 200:
        data = response.json()
        credentials = data["AssumeRoleResponse"]["AssumeRoleResult"]["Credentials"]
        return {
            "access_key": credentials["AccessKeyId"],
            "secret_key": credentials["SecretAccessKey"],
            "session_token": credentials["SessionToken"],
            "expiration": credentials["Expiration"]
        }
    else:
        raise Exception(f"获取STS凭证失败: {response.text}")

def _get_access_token(self):
    """获取LWA访问令牌"""
    # 检查缓存的令牌是否有效
    if time.time() < self.token_expiry and self.access_token:
        return self.access_token
    
    # 构建请求体
    payload = {
        "grant_type": "refresh_token",
        "refresh_token": self.refresh_token,
        "client_id": self.client_id,
        "client_secret": self.client_secret
    }
    
    # 发送请求
    response = requests.post(self.lwa_endpoint, data=payload)
    
    if response.status_code == 200:
        data = response.json()
        self.access_token = data["access_token"]
        self.token_expiry = time.time() + data["expires_in"] - 60  # 提前60秒过期
        return self.access_token
    else:
        raise Exception(f"获取访问令牌失败: {response.text}")

def _sign(self, key, msg, hex=False):
    """计算HMAC签名"""
    if hex:
        return hmac.new(key.encode(), msg.encode(), hashlib.sha256).hexdigest()
    else:
        return hmac.new(key.encode(), msg.encode(), hashlib.sha256).digest()

def _generate_aws_v4_signature(self, method, endpoint, path, query_params, headers, payload):
    """生成AWS V4签名"""
    # 构建规范请求
    canonical_headers = "\n".join([f"{k.lower()}:{v}" for k, v in sorted(headers.items())])
    signed_headers = ";".join([k.lower() for k in sorted(headers.keys())])
    
    # 构建规范查询字符串
    canonical_querystring = "&".join([f"{k}={quote_plus(str(v))}" for k, v in sorted(query_params.items())])
    
    # 计算payload哈希
    payload_hash = hashlib.sha256(json.dumps(payload).encode() if payload else b'').hexdigest()
    
    # 构建规范请求
    canonical_request = f"{method}\n{path}\n{canonical_querystring}\n{canonical_headers}\n\n{signed_headers}\n{payload_hash}"
    
    # 构建待签名字符串
    timestamp = headers["x-amz-date"]
    credential_scope = f"{timestamp[:8]}/{self.region}/execute-api/aws4_request"
    string_to_sign = f"AWS4-HMAC-SHA256\n{timestamp}\n{credential_scope}\n{hashlib.sha256(canonical_request.encode()).hexdigest()}"
    
    # 计算签名
    k_date = self._sign(f"AWS4{self.sts_credentials['secret_key']}", timestamp[:8])
    k_region = self._sign(k_date, self.region)
    k_service = self._sign(k_region, "execute-api")
    k_signing = self._sign(k_service, "aws4_request")
    signature = self._sign(k_signing, string_to_sign, hex=True)
    
    return signature

def get_product_details(self, asin_list):
    """获取商品详情"""
    # 获取访问令牌
    access_token = self._get_access_token()
    
    # 构建请求
    path = "/products/pricing/v0/price"
    url = f"{self.api_endpoint}{path}"
    
    # 构建查询参数
    query_params = {
        "MarketplaceId": self.marketplace_id,
        "ASIN": ",".join(asin_list)
    }
    
    # 构建请求头
    timestamp = datetime.utcnow().strftime("%Y%m%dT%H%M%SZ")
    headers = {
        "host": f"sellingpartnerapi-{self.region}.amazon.com",
        "x-amz-date": timestamp,
        "Authorization": f"Bearer {access_token}",
        "x-amz-access-token": access_token,
        "x-amz-security-token": self.sts_credentials["session_token"]
    }
    
    # 生成签名
    signature = self._generate_aws_v4_signature("GET", self.api_endpoint, path, query_params, headers, None)
    
    # 添加授权头
    headers["Authorization"] = (
        f"AWS4-HMAC-SHA256 "
        f"Credential={self.sts_credentials['access_key']}/{timestamp[:8]}/{self.region}/execute-api/aws4_request, "
        f"SignedHeaders={';'.join(sorted(headers.keys()))}, "
        f"Signature={signature}"
    )
    
    # 发送请求
    response = requests.get(url, params=query_params, headers=headers)
    
    if response.status_code == 200:
        return response.json()
    else:
        raise Exception(f"获取商品详情失败: {response.text}")

def get_product_listings(self, seller_sku_list):
    """获取商品列表信息"""
    # 获取访问令牌
    access_token = self._get_access_token()
    
    # 构建请求
    path = "/listings/v2/items"
    url = f"{self.api_endpoint}{path}"
    
    # 构建请求头
    timestamp = datetime.utcnow().strftime("%Y%m%dT%H%M%SZ")
    headers = {
        "host": f"sellingpartnerapi-{self.region}.amazon.com",
        "x-amz-date": timestamp,
        "Authorization": f"Bearer {access_token}",
        "x-amz-access-token": access_token,
        "x-amz-security-token": self.sts_credentials["session_token"],
        "Content-Type": "application/json"
    }
    
    # 构建请求体
    payload = {
        "marketplaceIds": [self.marketplace_id],
        "sku": seller_sku_list
    }
    
    # 生成签名
    signature = self._generate_aws_v4_signature("POST", self.api_endpoint, path, {}, headers, payload)
    
    # 添加授权头
    headers["Authorization"] = (
        f"AWS4-HMAC-SHA256 "
        f"Credential={self.sts_credentials['access_key']}/{timestamp[:8]}/{self.region}/execute-api/aws4_request, "
        f"SignedHeaders={';'.join(sorted(headers.keys()))}, "
        f"Signature={signature}"
    )
    
    # 发送请求
    response = requests.post(url, headers=headers, json=payload)
    
    if response.status_code == 200:
        return response.json()
    else:
        raise Exception(f"获取商品列表信息失败: {response.text}")

使用示例

if name == "main": # 配置信息 config = { "client_id": "YOUR_CLIENT_ID", "client_secret": "YOUR_CLIENT_SECRET", "refresh_token": "YOUR_REFRESH_TOKEN", "access_key": "YOUR_AWS_ACCESS_KEY", "secret_key": "YOUR_AWS_SECRET_KEY", "role_arn": "YOUR_IAM_ROLE_ARN", "region": "us-east-1", # 根据你的区域选择 "marketplace_id": "ATVPDKIKX0DER" # 美国市场 }

# 初始化采集器
collector = AmazonProductDataCollector(config)

# 获取商品详情示例
asin_list = ["B07W2N765K", "B07Q9KX7J1"]  # 示例ASIN
product_details = collector.get_product_details(asin_list)
print("商品详情:")
print(json.dumps(product_details, indent=2))

# 获取商品列表信息示例
seller_sku_list = ["SKU001", "SKU002"]  # 示例SKU
product_listings = collector.get_product_listings(seller_sku_list)
print("商品列表信息:")
print(json.dumps(product_listings, indent=2))

AI生成项目

代码解析 上面的代码实现了一个完整的亚马逊商品数据采集器,主要功能包括:

认证与授权:

使用 LWA (Login with Amazon) 获取访问令牌 通过 AWS STS 获取临时凭证 实现 AWS V4 签名算法 API 调用:

get_product_details() - 获取指定 ASIN 的商品价格和可用性信息 get_product_listings() - 获取卖家 SKU 的商品列表信息 请求处理:

处理请求头、参数和签名 处理响应并返回结构化数据 使用说明 首先需要在亚马逊开发者中心注册并创建应用,获取必要的凭证 将凭证填入配置部分的相应字段 安装必要的依赖:pip install requests 运行代码,指定要查询的 ASIN 或卖家 SKU 注意事项 亚马逊 SP-API 有严格的使用限制和请求频率限制,需要合理规划 API 调用 处理错误和异常情况,特别是网络问题和认证失效 对于大规模数据采集,考虑使用队列和异步处理 保护好 API 凭证,避免泄露 遵守亚马逊的 API 使用条款,不要用于非法目的 扩展功能 你可以基于这个基础框架扩展更多功能:

添加数据存储功能,将采集的数据保存到数据库 实现定时任务,定期采集商品数据 添加数据可视化功能,分析价格趋势 实现自动邮件通知,当商品价格变动时发送通知 通过亚马逊官方 API 采集商品数据是一种合法、稳定的方式,可以为电商分析、价格监控和市场调研提供有力支持。