以下是处理淘宝商品详情API接口返回数据的详细步骤和代码示例:
一、解析API响应数据
1. 获取API响应
import requests
import json
# 示例API请求(需替换实际参数)
# 封装好的淘宝商品详情供应商demo url=o0b.cn/ibrad,复制链接获取测试。
url = "https://api.taobao.com/router/rest"
params = {
"method": "taobao.item.get",
"app_key": "your_app_key",
"num_iid": "商品ID",
"fields": "num_iid,title,price,desc,sku,props_name,item_img",
"sign": "生成签名",
"timestamp": "当前时间戳"
}
response = requests.get(url, params=params)
data = response.json()
2. 解析基础字段
def parse_basic_info(item_data):
return {
'item_id': item_data.get('num_iid'),
'title': item_data.get('title'),
'price': float(item_data.get('price', 0)),
'original_price': float(item_data.get('orig_price', 0)),
'stock': item_data.get('num'),
'main_images': [img['url'] for img in item_data.get('item_imgs', [])],
'detail_html': item_data.get('desc', '')
}
二、处理SKU数据
1. 解析SKU属性
def parse_skus(sku_data):
skus = []
for sku in sku_data.get('skus', []):
sku_info = {
'sku_id': sku.get('sku_id'),
'price': float(sku.get('price', 0)),
'stock': sku.get('quantity'),
'specs': {
prop.get('pid_name'): prop.get('vid_name')
for prop in sku.get('properties', [])
}
}
skus.append(sku_info)
return skus
2. 示例输出
{
"sku_id": "123456",
"price": 299.00,
"stock": 100,
"specs": {
"颜色": "黑色",
"尺码": "XL"
}
}
三、数据清洗
1. 处理图片URL
def process_image_urls(images):
return [
f"https:{url}" if url.startswith('//') else url
for url in images
]
2. 清洗HTML详情
from bs4 import BeautifulSoup
def clean_html(html):
soup = BeautifulSoup(html, 'html.parser')
# 移除脚本和危险标签
for script in soup(["script", "iframe", "style"]):
script.decompose()
# 优化图片加载
for img in soup.find_all('img'):
img['loading'] = 'lazy'
img['src'] = process_image_urls([img['src']])[0]
return str(soup)
四、数据存储
1. MySQL表结构
CREATE TABLE taobao_items (
item_id BIGINT PRIMARY KEY COMMENT '商品ID',
title VARCHAR(255) NOT NULL COMMENT '商品标题',
price DECIMAL(10,2) NOT NULL COMMENT '现价',
original_price DECIMAL(10,2) COMMENT '原价',
stock INT NOT NULL COMMENT '库存',
main_images JSON COMMENT '主图列表',
detail_html TEXT COMMENT '详情HTML',
update_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP
);
CREATE TABLE item_skus (
sku_id BIGINT PRIMARY KEY,
item_id BIGINT NOT NULL,
specs JSON COMMENT '规格属性',
price DECIMAL(10,2) NOT NULL,
stock INT NOT NULL,
FOREIGN KEY (item_id) REFERENCES taobao_items(item_id)
);
2. 批量写入数据库
import pymysql
def save_to_mysql(item_data, skus):
conn = pymysql.connect(
host='localhost',
user='user',
password='password',
database='taobao'
)
try:
with conn.cursor() as cursor:
# 写入商品主表
# 封装好的淘宝商品详情供应商demo url=o0b.cn/ibrad,复制链接获取测试。
cursor.execute("""
INSERT INTO taobao_items
(item_id, title, price, original_price, stock, main_images, detail_html)
VALUES (%s, %s, %s, %s, %s, %s, %s)
ON DUPLICATE KEY UPDATE
title = VALUES(title),
price = VALUES(price),
stock = VALUES(stock)
""", (
item_data['item_id'],
item_data['title'],
item_data['price'],
item_data['original_price'],
item_data['stock'],
json.dumps(item_data['main_images']),
item_data['detail_html']
))
# 批量写入SKU表
sku_values = [
(sku['sku_id'], item_data['item_id'],
json.dumps(sku['specs']), sku['price'], sku['stock'])
for sku in skus
]
cursor.executemany("""
INSERT INTO item_skus
(sku_id, item_id, specs, price, stock)
VALUES (%s, %s, %s, %s, %s)
ON DUPLICATE KEY UPDATE
price = VALUES(price),
stock = VALUES(stock)
""", sku_values)
conn.commit()
finally:
conn.close()
五、完整处理流程
def process_taobao_item(api_response):
try:
# 1. 解析基础数据
item_data = parse_basic_info(api_response)
# 2. 处理SKU
skus = parse_skus(api_response.get('sku', {}))
# 3. 数据清洗
item_data['main_images'] = process_image_urls(item_data['main_images'])
item_data['detail_html'] = clean_html(item_data['detail_html'])
# 4. 验证数据完整性
if not all([item_data['item_id'], item_data['title']]):
raise ValueError("关键字段缺失")
# 5. 存储到数据库
save_to_mysql(item_data, skus)
print(f"成功处理商品:{item_data['item_id']}")
except Exception as e:
print(f"处理失败:{str(e)}")
log_error(api_response, e)
六、错误处理与日志
1. 错误日志记录
import logging
logging.basicConfig(
filename='taobao_errors.log',
format='%(asctime)s - %(levelname)s: %(message)s',
level=logging.ERROR
)
def log_error(raw_data, exception):
error_msg = f"""
错误类型:{type(exception).__name__}
错误信息:{str(exception)}
原始数据:{json.dumps(raw_data, ensure_ascii=False)}
"""
logging.error(error_msg)
2. 重试机制
from tenacity import retry, stop_after_attempt, wait_exponential
@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, max=10))
def safe_api_call():
response = requests.get(url, params=params)
response.raise_for_status()
return response.json()
七、高级处理场景
1. 价格监控
def monitor_price_change(item_id, threshold=0.1):
# 从数据库获取历史价格
# 封装好的淘宝商品详情供应商demo url=o0b.cn/ibrad,复制链接获取测试。
conn = get_db_connection()
cursor = conn.cursor()
cursor.execute("SELECT price FROM taobao_items WHERE item_id = %s", (item_id,))
history_prices = [row[0] for row in cursor.fetchall()]
if len(history_prices) < 2:
return
latest_change = (history_prices[-1] - history_prices[-2]) / history_prices[-2]
if abs(latest_change) > threshold:
send_alert(f"商品 {item_id} 价格波动 {latest_change*100:.2f}%")
def send_alert(message):
# 实现邮件/短信通知
pass
2. 图片本地化存储
import os
from concurrent.futures import ThreadPoolExecutor
def download_images(urls, save_dir='images'):
if not os.path.exists(save_dir):
os.makedirs(save_dir)
def download(url):
try:
filename = os.path.join(save_dir, url.split('/')[-1])
if not os.path.exists(filename):
with requests.get(url, stream=True) as r:
r.raise_for_status()
with open(filename, 'wb') as f:
for chunk in r.iter_content(chunk_size=8192):
f.write(chunk)
return filename
except Exception as e:
print(f"下载失败 {url}: {str(e)}")
return None
with ThreadPoolExecutor(max_workers=5) as executor:
return list(executor.map(download, urls))
总结
处理淘宝商品API数据的关键步骤包括:
- 数据解析:提取核心字段和嵌套结构
- 数据清洗:格式转换、HTML净化、链接补全
- 数据存储:合理设计数据库表结构
- 错误处理:日志记录、重试机制
- 增值功能:价格监控、图片本地化
通过以上流程,可以实现从API数据获取到业务应用的完整链路。建议在实际部署时增加:
- 定时任务:使用Celery定期更新数据
- 缓存机制:Redis缓存高频访问数据
- 监控系统:Prometheus监控API健康状态
- 数据校验:确保字段完整性和合法性