淘宝商品详情API接口返回数据的详细步骤和代码示例

72 阅读4分钟

以下是处理淘宝商品详情API接口返回数据的详细步骤和代码示例:


一、解析API响应数据

1. 获取API响应

import requests
import json

# 示例API请求(需替换实际参数)
# 封装好的淘宝商品详情供应商demo url=o0b.cn/ibrad,复制链接获取测试。
url = "https://api.taobao.com/router/rest"
params = {
    "method": "taobao.item.get",
    "app_key": "your_app_key",
    "num_iid": "商品ID",
    "fields": "num_iid,title,price,desc,sku,props_name,item_img",
    "sign": "生成签名",
    "timestamp": "当前时间戳"
}

response = requests.get(url, params=params)
data = response.json()

2. 解析基础字段

def parse_basic_info(item_data):
    return {
        'item_id': item_data.get('num_iid'),
        'title': item_data.get('title'),
        'price': float(item_data.get('price', 0)),
        'original_price': float(item_data.get('orig_price', 0)),
        'stock': item_data.get('num'),
        'main_images': [img['url'] for img in item_data.get('item_imgs', [])],
        'detail_html': item_data.get('desc', '')
    }

二、处理SKU数据

1. 解析SKU属性

def parse_skus(sku_data):
    skus = []
    for sku in sku_data.get('skus', []):
        sku_info = {
            'sku_id': sku.get('sku_id'),
            'price': float(sku.get('price', 0)),
            'stock': sku.get('quantity'),
            'specs': {
                prop.get('pid_name'): prop.get('vid_name') 
                for prop in sku.get('properties', [])
            }
        }
        skus.append(sku_info)
    return skus

2. 示例输出

{
  "sku_id": "123456",
  "price": 299.00,
  "stock": 100,
  "specs": {
    "颜色": "黑色",
    "尺码": "XL"
  }
}

三、数据清洗

1. 处理图片URL

def process_image_urls(images):
    return [
        f"https:{url}" if url.startswith('//') else url
        for url in images
    ]

2. 清洗HTML详情

from bs4 import BeautifulSoup

def clean_html(html):
    soup = BeautifulSoup(html, 'html.parser')
    # 移除脚本和危险标签
    for script in soup(["script", "iframe", "style"]):
        script.decompose()
    # 优化图片加载
    for img in soup.find_all('img'):
        img['loading'] = 'lazy'
        img['src'] = process_image_urls([img['src']])[0]
    return str(soup)

四、数据存储

1. MySQL表结构

CREATE TABLE taobao_items (
    item_id BIGINT PRIMARY KEY COMMENT '商品ID',
    title VARCHAR(255) NOT NULL COMMENT '商品标题',
    price DECIMAL(10,2) NOT NULL COMMENT '现价',
    original_price DECIMAL(10,2) COMMENT '原价',
    stock INT NOT NULL COMMENT '库存',
    main_images JSON COMMENT '主图列表',
    detail_html TEXT COMMENT '详情HTML',
    update_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP
);

CREATE TABLE item_skus (
    sku_id BIGINT PRIMARY KEY,
    item_id BIGINT NOT NULL,
    specs JSON COMMENT '规格属性',
    price DECIMAL(10,2) NOT NULL,
    stock INT NOT NULL,
    FOREIGN KEY (item_id) REFERENCES taobao_items(item_id)
);

2. 批量写入数据库

import pymysql

def save_to_mysql(item_data, skus):
    conn = pymysql.connect(
        host='localhost',
        user='user',
        password='password',
        database='taobao'
    )
    
    try:
        with conn.cursor() as cursor:
            # 写入商品主表
            # 封装好的淘宝商品详情供应商demo url=o0b.cn/ibrad,复制链接获取测试。
            cursor.execute("""
                INSERT INTO taobao_items 
                (item_id, title, price, original_price, stock, main_images, detail_html)
                VALUES (%s, %s, %s, %s, %s, %s, %s)
                ON DUPLICATE KEY UPDATE
                    title = VALUES(title),
                    price = VALUES(price),
                    stock = VALUES(stock)
            """, (
                item_data['item_id'],
                item_data['title'],
                item_data['price'],
                item_data['original_price'],
                item_data['stock'],
                json.dumps(item_data['main_images']),
                item_data['detail_html']
            ))
            
            # 批量写入SKU表
            sku_values = [
                (sku['sku_id'], item_data['item_id'], 
                 json.dumps(sku['specs']), sku['price'], sku['stock'])
                for sku in skus
            ]
            cursor.executemany("""
                INSERT INTO item_skus 
                (sku_id, item_id, specs, price, stock)
                VALUES (%s, %s, %s, %s, %s)
                ON DUPLICATE KEY UPDATE
                    price = VALUES(price),
                    stock = VALUES(stock)
            """, sku_values)
            
        conn.commit()
    finally:
        conn.close()

五、完整处理流程

def process_taobao_item(api_response):
    try:
        # 1. 解析基础数据
        item_data = parse_basic_info(api_response)
        
        # 2. 处理SKU
        skus = parse_skus(api_response.get('sku', {}))
        
        # 3. 数据清洗
        item_data['main_images'] = process_image_urls(item_data['main_images'])
        item_data['detail_html'] = clean_html(item_data['detail_html'])
        
        # 4. 验证数据完整性
        if not all([item_data['item_id'], item_data['title']]):
            raise ValueError("关键字段缺失")
            
        # 5. 存储到数据库
        save_to_mysql(item_data, skus)
        
        print(f"成功处理商品:{item_data['item_id']}")
        
    except Exception as e:
        print(f"处理失败:{str(e)}")
        log_error(api_response, e)

六、错误处理与日志

1. 错误日志记录

import logging

logging.basicConfig(
    filename='taobao_errors.log',
    format='%(asctime)s - %(levelname)s: %(message)s',
    level=logging.ERROR
)

def log_error(raw_data, exception):
    error_msg = f"""
    错误类型:{type(exception).__name__}
    错误信息:{str(exception)}
    原始数据:{json.dumps(raw_data, ensure_ascii=False)}
    """
    logging.error(error_msg)

2. 重试机制

from tenacity import retry, stop_after_attempt, wait_exponential

@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, max=10))
def safe_api_call():
    response = requests.get(url, params=params)
    response.raise_for_status()
    return response.json()

七、高级处理场景

1. 价格监控

def monitor_price_change(item_id, threshold=0.1):
    # 从数据库获取历史价格
    # 封装好的淘宝商品详情供应商demo url=o0b.cn/ibrad,复制链接获取测试。
    conn = get_db_connection()
    cursor = conn.cursor()
    cursor.execute("SELECT price FROM taobao_items WHERE item_id = %s", (item_id,))
    history_prices = [row[0] for row in cursor.fetchall()]
    
    if len(history_prices) < 2:
        return
    
    latest_change = (history_prices[-1] - history_prices[-2]) / history_prices[-2]
    if abs(latest_change) > threshold:
        send_alert(f"商品 {item_id} 价格波动 {latest_change*100:.2f}%")

def send_alert(message):
    # 实现邮件/短信通知
    pass

2. 图片本地化存储

import os
from concurrent.futures import ThreadPoolExecutor

def download_images(urls, save_dir='images'):
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)
    
    def download(url):
        try:
            filename = os.path.join(save_dir, url.split('/')[-1])
            if not os.path.exists(filename):
                with requests.get(url, stream=True) as r:
                    r.raise_for_status()
                    with open(filename, 'wb') as f:
                        for chunk in r.iter_content(chunk_size=8192):
                            f.write(chunk)
            return filename
        except Exception as e:
            print(f"下载失败 {url}: {str(e)}")
            return None
    
    with ThreadPoolExecutor(max_workers=5) as executor:
        return list(executor.map(download, urls))

总结

处理淘宝商品API数据的关键步骤包括:

  1. 数据解析:提取核心字段和嵌套结构
  2. 数据清洗:格式转换、HTML净化、链接补全
  3. 数据存储:合理设计数据库表结构
  4. 错误处理:日志记录、重试机制
  5. 增值功能:价格监控、图片本地化

通过以上流程,可以实现从API数据获取到业务应用的完整链路。建议在实际部署时增加:

  • 定时任务:使用Celery定期更新数据
  • 缓存机制:Redis缓存高频访问数据
  • 监控系统:Prometheus监控API健康状态
  • 数据校验:确保字段完整性和合法性