在当今数字化的商业环境中,实时获取亚马逊商品数据对于市场分析、竞品监控和价格追踪至关重要。亚马逊提供了官方的 SP-API (Selling Partner API),允许开发者合法地获取商品信息。本文将介绍如何使用 Python 接入亚马逊 SP-API,实现商品详情的实时数据采集。
前期准备 在开始开发之前,需要完成以下准备工作:
注册亚马逊api请求账户 申请接口账号 创建 SP-API 应用并获取必要的凭证 安装必要的 Python 库 接入亚马逊 SP-API 的步骤 亚马逊 SP-API 采用 OAuth 2.0 认证机制,主要包括以下几个步骤:
生成 LWA (Login with Amazon) 访问令牌 生成 API 请求签名 调用商品信息相关的 API 端点 下面是一个完整的 Python 实现示例:
import requests import time import hmac import hashlib import base64 import json from urllib.parse import quote_plus from datetime import datetime
class AmazonProductDataCollector: def init(self, config): """初始化亚马逊数据采集器""" self.client_id = config['client_id'] self.client_secret = config['client_secret'] self.refresh_token = config['refresh_token'] self.access_key = config['access_key'] self.secret_key = config['secret_key'] self.role_arn = config['role_arn'] self.region = config['region'] self.marketplace_id = config['marketplace_id']
# API端点
self.lwa_endpoint = "https://api.amazon.com/auth/o2/token"
self.api_endpoint = f"https://sellingpartnerapi-{self.region}.amazon.com"
# 缓存的访问令牌
self.access_token = None
self.token_expiry = 0
# 获取STS凭证
self.sts_credentials = self._get_sts_credentials()
def _get_sts_credentials(self):
"""获取AWS STS凭证"""
url = "https://sts.amazonaws.com/"
timestamp = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ")
# 构建查询字符串
query_params = {
"Action": "AssumeRole",
"DurationSeconds": "3600",
"RoleArn": self.role_arn,
"RoleSessionName": "amazon-api-session",
"Version": "2011-06-15",
"X-Amz-Algorithm": "AWS4-HMAC-SHA256",
"X-Amz-Credential": f"{self.access_key}/{timestamp[:10]}/{self.region}/sts/aws4_request",
"X-Amz-Date": timestamp,
"X-Amz-SignedHeaders": "host"
}
# 排序参数并构建待签名字符串
sorted_params = sorted(query_params.items())
canonical_querystring = "&".join([f"{k}={quote_plus(str(v))}" for k, v in sorted_params])
# 构建规范请求
canonical_request = f"GET\n/\n{canonical_querystring}\nhost:sts.amazonaws.com\n\nhost\nUNSIGNED-PAYLOAD"
# 构建待签名字符串
string_to_sign = f"AWS4-HMAC-SHA256\n{timestamp}\n{timestamp[:10]}/{self.region}/sts/aws4_request\n{hashlib.sha256(canonical_request.encode()).hexdigest()}"
# 计算签名
k_date = self._sign(f"AWS4{self.secret_key}", timestamp[:10])
k_region = self._sign(k_date, self.region)
k_service = self._sign(k_region, "sts")
k_signing = self._sign(k_service, "aws4_request")
signature = self._sign(k_signing, string_to_sign, hex=True)
# 发送请求
url_with_params = f"{url}?{canonical_querystring}&X-Amz-Signature={signature}"
response = requests.get(url_with_params)
if response.status_code == 200:
data = response.json()
credentials = data["AssumeRoleResponse"]["AssumeRoleResult"]["Credentials"]
return {
"access_key": credentials["AccessKeyId"],
"secret_key": credentials["SecretAccessKey"],
"session_token": credentials["SessionToken"],
"expiration": credentials["Expiration"]
}
else:
raise Exception(f"获取STS凭证失败: {response.text}")
def _get_access_token(self):
"""获取LWA访问令牌"""
# 检查缓存的令牌是否有效
if time.time() < self.token_expiry and self.access_token:
return self.access_token
# 构建请求体
payload = {
"grant_type": "refresh_token",
"refresh_token": self.refresh_token,
"client_id": self.client_id,
"client_secret": self.client_secret
}
# 发送请求
response = requests.post(self.lwa_endpoint, data=payload)
if response.status_code == 200:
data = response.json()
self.access_token = data["access_token"]
self.token_expiry = time.time() + data["expires_in"] - 60 # 提前60秒过期
return self.access_token
else:
raise Exception(f"获取访问令牌失败: {response.text}")
def _sign(self, key, msg, hex=False):
"""计算HMAC签名"""
if hex:
return hmac.new(key.encode(), msg.encode(), hashlib.sha256).hexdigest()
else:
return hmac.new(key.encode(), msg.encode(), hashlib.sha256).digest()
def _generate_aws_v4_signature(self, method, endpoint, path, query_params, headers, payload):
"""生成AWS V4签名"""
# 构建规范请求
canonical_headers = "\n".join([f"{k.lower()}:{v}" for k, v in sorted(headers.items())])
signed_headers = ";".join([k.lower() for k in sorted(headers.keys())])
# 构建规范查询字符串
canonical_querystring = "&".join([f"{k}={quote_plus(str(v))}" for k, v in sorted(query_params.items())])
# 计算payload哈希
payload_hash = hashlib.sha256(json.dumps(payload).encode() if payload else b'').hexdigest()
# 构建规范请求
canonical_request = f"{method}\n{path}\n{canonical_querystring}\n{canonical_headers}\n\n{signed_headers}\n{payload_hash}"
# 构建待签名字符串
timestamp = headers["x-amz-date"]
credential_scope = f"{timestamp[:8]}/{self.region}/execute-api/aws4_request"
string_to_sign = f"AWS4-HMAC-SHA256\n{timestamp}\n{credential_scope}\n{hashlib.sha256(canonical_request.encode()).hexdigest()}"
# 计算签名
k_date = self._sign(f"AWS4{self.sts_credentials['secret_key']}", timestamp[:8])
k_region = self._sign(k_date, self.region)
k_service = self._sign(k_region, "execute-api")
k_signing = self._sign(k_service, "aws4_request")
signature = self._sign(k_signing, string_to_sign, hex=True)
return signature
def get_product_details(self, asin_list):
"""获取商品详情"""
# 获取访问令牌
access_token = self._get_access_token()
# 构建请求
path = "/products/pricing/v0/price"
url = f"{self.api_endpoint}{path}"
# 构建查询参数
query_params = {
"MarketplaceId": self.marketplace_id,
"ASIN": ",".join(asin_list)
}
# 构建请求头
timestamp = datetime.utcnow().strftime("%Y%m%dT%H%M%SZ")
headers = {
"host": f"sellingpartnerapi-{self.region}.amazon.com",
"x-amz-date": timestamp,
"Authorization": f"Bearer {access_token}",
"x-amz-access-token": access_token,
"x-amz-security-token": self.sts_credentials["session_token"]
}
# 生成签名
signature = self._generate_aws_v4_signature("GET", self.api_endpoint, path, query_params, headers, None)
# 添加授权头
headers["Authorization"] = (
f"AWS4-HMAC-SHA256 "
f"Credential={self.sts_credentials['access_key']}/{timestamp[:8]}/{self.region}/execute-api/aws4_request, "
f"SignedHeaders={';'.join(sorted(headers.keys()))}, "
f"Signature={signature}"
)
# 发送请求
response = requests.get(url, params=query_params, headers=headers)
if response.status_code == 200:
return response.json()
else:
raise Exception(f"获取商品详情失败: {response.text}")
def get_product_listings(self, seller_sku_list):
"""获取商品列表信息"""
# 获取访问令牌
access_token = self._get_access_token()
# 构建请求
path = "/listings/v2/items"
url = f"{self.api_endpoint}{path}"
# 构建请求头
timestamp = datetime.utcnow().strftime("%Y%m%dT%H%M%SZ")
headers = {
"host": f"sellingpartnerapi-{self.region}.amazon.com",
"x-amz-date": timestamp,
"Authorization": f"Bearer {access_token}",
"x-amz-access-token": access_token,
"x-amz-security-token": self.sts_credentials["session_token"],
"Content-Type": "application/json"
}
# 构建请求体
payload = {
"marketplaceIds": [self.marketplace_id],
"sku": seller_sku_list
}
# 生成签名
signature = self._generate_aws_v4_signature("POST", self.api_endpoint, path, {}, headers, payload)
# 添加授权头
headers["Authorization"] = (
f"AWS4-HMAC-SHA256 "
f"Credential={self.sts_credentials['access_key']}/{timestamp[:8]}/{self.region}/execute-api/aws4_request, "
f"SignedHeaders={';'.join(sorted(headers.keys()))}, "
f"Signature={signature}"
)
# 发送请求
response = requests.post(url, headers=headers, json=payload)
if response.status_code == 200:
return response.json()
else:
raise Exception(f"获取商品列表信息失败: {response.text}")
使用示例
if name == "main": # 配置信息 config = { "client_id": "YOUR_CLIENT_ID", "client_secret": "YOUR_CLIENT_SECRET", "refresh_token": "YOUR_REFRESH_TOKEN", "access_key": "YOUR_AWS_ACCESS_KEY", "secret_key": "YOUR_AWS_SECRET_KEY", "role_arn": "YOUR_IAM_ROLE_ARN", "region": "us-east-1", # 根据你的区域选择 "marketplace_id": "ATVPDKIKX0DER" # 美国市场 }
# 初始化采集器
collector = AmazonProductDataCollector(config)
# 获取商品详情示例
asin_list = ["B07W2N765K", "B07Q9KX7J1"] # 示例ASIN
product_details = collector.get_product_details(asin_list)
print("商品详情:")
print(json.dumps(product_details, indent=2))
# 获取商品列表信息示例
seller_sku_list = ["SKU001", "SKU002"] # 示例SKU
product_listings = collector.get_product_listings(seller_sku_list)
print("商品列表信息:")
print(json.dumps(product_listings, indent=2))
AI生成项目
代码解析 上面的代码实现了一个完整的亚马逊商品数据采集器,主要功能包括:
认证与授权:
使用 LWA (Login with Amazon) 获取访问令牌 通过 AWS STS 获取临时凭证 实现 AWS V4 签名算法 API 调用:
get_product_details() - 获取指定 ASIN 的商品价格和可用性信息 get_product_listings() - 获取卖家 SKU 的商品列表信息 请求处理:
处理请求头、参数和签名 处理响应并返回结构化数据 使用说明 首先需要在亚马逊开发者中心注册并创建应用,获取必要的凭证 将凭证填入配置部分的相应字段 安装必要的依赖:pip install requests 运行代码,指定要查询的 ASIN 或卖家 SKU 注意事项 亚马逊 SP-API 有严格的使用限制和请求频率限制,需要合理规划 API 调用 处理错误和异常情况,特别是网络问题和认证失效 对于大规模数据采集,考虑使用队列和异步处理 保护好 API 凭证,避免泄露 遵守亚马逊的 API 使用条款,不要用于非法目的 扩展功能 你可以基于这个基础框架扩展更多功能:
添加数据存储功能,将采集的数据保存到数据库 实现定时任务,定期采集商品数据 添加数据可视化功能,分析价格趋势 实现自动邮件通知,当商品价格变动时发送通知 通过亚马逊官方 API 采集商品数据是一种合法、稳定的方式,可以为电商分析、价格监控和市场调研提供有力支持。