在当今数字化的商业环境中,实时获取亚马逊商品数据对于市场分析、竞品监控和价格追踪至关重要。亚马逊提供了官方的 SP-API (Selling Partner API),允许开发者合法地获取商品信息。本文将介绍如何使用 Python 接入亚马逊 SP-API,实现商品详情的实时数据采集。
前期准备
在开始开发之前,需要完成以下准备工作:
- 注册****亚马逊api****请求账户
- 申请接口账号
- 创建 SP-API 应用并获取必要的凭证
- 安装必要的 Python 库
接入亚马逊 SP-API 的步骤
亚马逊 SP-API 采用 OAuth 2.0 认证机制,主要包括以下几个步骤:
- 生成 LWA (Login with Amazon) 访问令牌
- 生成 API 请求签名
- 调用商品信息相关的 API 端点
下面是一个完整的 Python 实现示例:
import requests
import time
import hmac
import hashlib
import base64
import json
from urllib.parse import quote_plus
from datetime import datetime
class AmazonProductDataCollector:
def __init__(self, config):
"""初始化亚马逊数据采集器"""
self.client_id = config['client_id']
self.client_secret = config['client_secret']
self.refresh_token = config['refresh_token']
self.access_key = config['access_key']
self.secret_key = config['secret_key']
self.role_arn = config['role_arn']
self.region = config['region']
self.marketplace_id = config['marketplace_id']
# API端点
self.lwa_endpoint = "https://api.amazon.com/auth/o2/token"
self.api_endpoint = f"https://sellingpartnerapi-{self.region}.amazon.com"
# 缓存的访问令牌
self.access_token = None
self.token_expiry = 0
# 获取STS凭证
self.sts_credentials = self._get_sts_credentials()
def _get_sts_credentials(self):
"""获取AWS STS凭证"""
url = "https://sts.amazonaws.com/"
timestamp = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ")
# 构建查询字符串
query_params = {
"Action": "AssumeRole",
"DurationSeconds": "3600",
"RoleArn": self.role_arn,
"RoleSessionName": "amazon-api-session",
"Version": "2011-06-15",
"X-Amz-Algorithm": "AWS4-HMAC-SHA256",
"X-Amz-Credential": f"{self.access_key}/{timestamp[:10]}/{self.region}/sts/aws4_request",
"X-Amz-Date": timestamp,
"X-Amz-SignedHeaders": "host"
}
# 排序参数并构建待签名字符串
sorted_params = sorted(query_params.items())
canonical_querystring = "&".join([f"{k}={quote_plus(str(v))}" for k, v in sorted_params])
# 构建规范请求
canonical_request = f"GET\n/\n{canonical_querystring}\nhost:sts.amazonaws.com\n\nhost\nUNSIGNED-PAYLOAD"
# 构建待签名字符串
string_to_sign = f"AWS4-HMAC-SHA256\n{timestamp}\n{timestamp[:10]}/{self.region}/sts/aws4_request\n{hashlib.sha256(canonical_request.encode()).hexdigest()}"
# 计算签名
k_date = self._sign(f"AWS4{self.secret_key}", timestamp[:10])
k_region = self._sign(k_date, self.region)
k_service = self._sign(k_region, "sts")
k_signing = self._sign(k_service, "aws4_request")
signature = self._sign(k_signing, string_to_sign, hex=True)
# 发送请求
url_with_params = f"{url}?{canonical_querystring}&X-Amz-Signature={signature}"
response = requests.get(url_with_params)
if response.status_code == 200:
data = response.json()
credentials = data["AssumeRoleResponse"]["AssumeRoleResult"]["Credentials"]
return {
"access_key": credentials["AccessKeyId"],
"secret_key": credentials["SecretAccessKey"],
"session_token": credentials["SessionToken"],
"expiration": credentials["Expiration"]
}
else:
raise Exception(f"获取STS凭证失败: {response.text}")
def _get_access_token(self):
"""获取LWA访问令牌"""
# 检查缓存的令牌是否有效
if time.time() < self.token_expiry and self.access_token:
return self.access_token
# 构建请求体
payload = {
"grant_type": "refresh_token",
"refresh_token": self.refresh_token,
"client_id": self.client_id,
"client_secret": self.client_secret
}
# 发送请求
response = requests.post(self.lwa_endpoint, data=payload)
if response.status_code == 200:
data = response.json()
self.access_token = data["access_token"]
self.token_expiry = time.time() + data["expires_in"] - 60 # 提前60秒过期
return self.access_token
else:
raise Exception(f"获取访问令牌失败: {response.text}")
def _sign(self, key, msg, hex=False):
"""计算HMAC签名"""
if hex:
return hmac.new(key.encode(), msg.encode(), hashlib.sha256).hexdigest()
else:
return hmac.new(key.encode(), msg.encode(), hashlib.sha256).digest()
def _generate_aws_v4_signature(self, method, endpoint, path, query_params, headers, payload):
"""生成AWS V4签名"""
# 构建规范请求
canonical_headers = "\n".join([f"{k.lower()}:{v}" for k, v in sorted(headers.items())])
signed_headers = ";".join([k.lower() for k in sorted(headers.keys())])
# 构建规范查询字符串
canonical_querystring = "&".join([f"{k}={quote_plus(str(v))}" for k, v in sorted(query_params.items())])
# 计算payload哈希
payload_hash = hashlib.sha256(json.dumps(payload).encode() if payload else b'').hexdigest()
# 构建规范请求
canonical_request = f"{method}\n{path}\n{canonical_querystring}\n{canonical_headers}\n\n{signed_headers}\n{payload_hash}"
# 构建待签名字符串
timestamp = headers["x-amz-date"]
credential_scope = f"{timestamp[:8]}/{self.region}/execute-api/aws4_request"
string_to_sign = f"AWS4-HMAC-SHA256\n{timestamp}\n{credential_scope}\n{hashlib.sha256(canonical_request.encode()).hexdigest()}"
# 计算签名
k_date = self._sign(f"AWS4{self.sts_credentials['secret_key']}", timestamp[:8])
k_region = self._sign(k_date, self.region)
k_service = self._sign(k_region, "execute-api")
k_signing = self._sign(k_service, "aws4_request")
signature = self._sign(k_signing, string_to_sign, hex=True)
return signature
def get_product_details(self, asin_list):
"""获取商品详情"""
# 获取访问令牌
access_token = self._get_access_token()
# 构建请求
path = "/products/pricing/v0/price"
url = f"{self.api_endpoint}{path}"
# 构建查询参数
query_params = {
"MarketplaceId": self.marketplace_id,
"ASIN": ",".join(asin_list)
}
# 构建请求头
timestamp = datetime.utcnow().strftime("%Y%m%dT%H%M%SZ")
headers = {
"host": f"sellingpartnerapi-{self.region}.amazon.com",
"x-amz-date": timestamp,
"Authorization": f"Bearer {access_token}",
"x-amz-access-token": access_token,
"x-amz-security-token": self.sts_credentials["session_token"]
}
# 生成签名
signature = self._generate_aws_v4_signature("GET", self.api_endpoint, path, query_params, headers, None)
# 添加授权头
headers["Authorization"] = (
f"AWS4-HMAC-SHA256 "
f"Credential={self.sts_credentials['access_key']}/{timestamp[:8]}/{self.region}/execute-api/aws4_request, "
f"SignedHeaders={';'.join(sorted(headers.keys()))}, "
f"Signature={signature}"
)
# 发送请求
response = requests.get(url, params=query_params, headers=headers)
if response.status_code == 200:
return response.json()
else:
raise Exception(f"获取商品详情失败: {response.text}")
def get_product_listings(self, seller_sku_list):
"""获取商品列表信息"""
# 获取访问令牌
access_token = self._get_access_token()
# 构建请求
path = "/listings/v2/items"
url = f"{self.api_endpoint}{path}"
# 构建请求头
timestamp = datetime.utcnow().strftime("%Y%m%dT%H%M%SZ")
headers = {
"host": f"sellingpartnerapi-{self.region}.amazon.com",
"x-amz-date": timestamp,
"Authorization": f"Bearer {access_token}",
"x-amz-access-token": access_token,
"x-amz-security-token": self.sts_credentials["session_token"],
"Content-Type": "application/json"
}
# 构建请求体
payload = {
"marketplaceIds": [self.marketplace_id],
"sku": seller_sku_list
}
# 生成签名
signature = self._generate_aws_v4_signature("POST", self.api_endpoint, path, {}, headers, payload)
# 添加授权头
headers["Authorization"] = (
f"AWS4-HMAC-SHA256 "
f"Credential={self.sts_credentials['access_key']}/{timestamp[:8]}/{self.region}/execute-api/aws4_request, "
f"SignedHeaders={';'.join(sorted(headers.keys()))}, "
f"Signature={signature}"
)
# 发送请求
response = requests.post(url, headers=headers, json=payload)
if response.status_code == 200:
return response.json()
else:
raise Exception(f"获取商品列表信息失败: {response.text}")
# 使用示例
if __name__ == "__main__":
# 配置信息
config = {
"client_id": "YOUR_CLIENT_ID",
"client_secret": "YOUR_CLIENT_SECRET",
"refresh_token": "YOUR_REFRESH_TOKEN",
"access_key": "YOUR_AWS_ACCESS_KEY",
"secret_key": "YOUR_AWS_SECRET_KEY",
"role_arn": "YOUR_IAM_ROLE_ARN",
"region": "us-east-1", # 根据你的区域选择
"marketplace_id": "ATVPDKIKX0DER" # 美国市场
}
# 初始化采集器
collector = AmazonProductDataCollector(config)
# 获取商品详情示例
asin_list = ["B07W2N765K", "B07Q9KX7J1"] # 示例ASIN
product_details = collector.get_product_details(asin_list)
print("商品详情:")
print(json.dumps(product_details, indent=2))
# 获取商品列表信息示例
seller_sku_list = ["SKU001", "SKU002"] # 示例SKU
product_listings = collector.get_product_listings(seller_sku_list)
print("商品列表信息:")
print(json.dumps(product_listings, indent=2))
代码解析
上面的代码实现了一个完整的亚马逊商品数据采集器,主要功能包括:
-
认证与授权:
- 使用 LWA (Login with Amazon) 获取访问令牌
- 通过 AWS STS 获取临时凭证
- 实现 AWS V4 签名算法
-
API 调用:
get_product_details()- 获取指定 ASIN 的商品价格和可用性信息get_product_listings()- 获取卖家 SKU 的商品列表信息
-
请求处理:
- 处理请求头、参数和签名
- 处理响应并返回结构化数据
使用说明
- 首先需要在亚马逊开发者中心注册并创建应用,获取必要的凭证
- 将凭证填入配置部分的相应字段
- 安装必要的依赖:
pip install requests - 运行代码,指定要查询的 ASIN 或卖家 SKU
注意事项
- 亚马逊 SP-API 有严格的使用限制和请求频率限制,需要合理规划 API 调用
- 处理错误和异常情况,特别是网络问题和认证失效
- 对于大规模数据采集,考虑使用队列和异步处理
- 保护好 API 凭证,避免泄露
- 遵守亚马逊的 API 使用条款,不要用于非法目的
扩展功能
你可以基于这个基础框架扩展更多功能:
- 添加数据存储功能,将采集的数据保存到数据库
- 实现定时任务,定期采集商品数据
- 添加数据可视化功能,分析价格趋势
- 实现自动邮件通知,当商品价格变动时发送通知
通过亚马逊官方 API 采集商品数据是一种合法、稳定的方式,可以为电商分析、价格监控和市场调研提供有力支持。