淘宝死店采集工具,采集买家联系方式旺旺ID,淘宝商家卖家采集软件

248 阅读2分钟

下载地址:www.pan38.com/share.php?c… 提取密码:7789
这个工具是可以采集淘宝数据的,但是仅供学习哈,想着学习的小伙伴就看看下面的代码,仅供参考用途,当然这个工具主要还是通过python代码实现的,所以需要具备一定的python基础才能看懂哈,一般人我建议还是直接用成品。

import requests
from bs4 import BeautifulSoup
import re

def get_taobao_product(keyword):
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
}
url = f's.taobao.com/search?q={k…'

try:
    response = requests.get(url, headers=headers)
    response.raise_for_status()
    soup = BeautifulSoup(response.text, 'html.parser')

    products = []
    for item in soup.select('.item.J_MouserOnverReq'):
        product = {
            'title': item.select_one('.title').get_text().strip(),
            'price': item.select_one('.price strong').get_text(),
            'sales': re.search(r'(\d+)人付款', item.text).group(1) if re.search(r'(\d+)人付款', item.text) else '0',
            'shop': item.select_one('.shopname').get_text().strip() if item.select_one('.shopname') else '未知店铺'
        }
        products.append(product)
    return products
except Exception as e:
    print(f"采集出错: {e}")
    return []

if name == 'main':
keyword = input("请输入要搜索的商品关键词: ")
results = get_taobao_product(keyword)
for i, product in enumerate(results, 1):
print(f"{i}. {product['title']} - 价格:{product['price']} - 销量:{product['sales']} - 店铺:{product['shop']}")

import requests
from bs4 import BeautifulSoup
import re
import time
import random
import csv
from datetime import datetime
import logging

class TaobaoSpider:
def init(self):
self.headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
'Referer': 'www.taobao.com/'
}
self.session = requests.Session()
self.logger = self.setup_logger()

def setup_logger(self):
    logging.basicConfig(
        level=logging.INFO,
        format='%(asctime)s - %(levelname)s - %(message)s',
        filename='taobao_spider.log'
    )
    return logging.getLogger(__name__)

def get_page(self, keyword, page=1):
    url = f'https://s.taobao.com/search?q={keyword}&s={(page-1)*44}'
    try:
        time.sleep(random.uniform(1, 3))
        response = self.session.get(url, headers=self.headers)
        response.raise_for_status()
        return response.text
    except Exception as e:
        self.logger.error(f"获取页面失败: {e}")
        return None

def parse_products(self, html):
    soup = BeautifulSoup(html, 'html.parser')
    products = []

    for item in soup.select('.item.J_MouserOnverReq'):
        try:
            product = {
                'title': item.select_one('.title').get_text().strip(),
                'price': item.select_one('.price strong').get_text(),
                'sales': re.search(r'(\d+)人付款', item.text).group(1) if re.search(r'(\d+)人付款', item.text) else '0',
                'shop': item.select_one('.shopname').get_text().strip() if item.select_one('.shopname') else '未知店铺',
                'location': item.select_one('.location').get_text() if item.select_one('.location') else '未知地区',
                'timestamp': datetime.now().strftime('%Y-%m-%d %H:%M:%S')
            }
            products.append(product)
        except Exception as e:
            self.logger.warning(f"解析商品失败: {e}")
            continue

    return products

def save_to_csv(self, products, filename):
    try:
        with open(filename, 'a', newline='', encoding='utf-8-sig') as f:
            writer = csv.DictWriter(f, fieldnames=products[0].keys())
            if f.tell() == 0:
                writer.writeheader()
            writer.writerows(products)
        self.logger.info(f"成功保存{len(products)}条数据到{filename}")
    except Exception as e:
        self.logger.error(f"保存数据失败: {e}")

def run(self, keyword, pages=1, output_file='taobao_products.csv'):
    all_products = []
    for page in range(1, pages+1):
        self.logger.info(f"正在采集第{page}页...")
        html = self.get_page(keyword, page)
        if html:
            products = self.parse_products(html)
            all_products.extend(products)
            time.sleep(random.uniform(2, 5))

    if all_products:
        self.save_to_csv(all_products, output_file)
        return True
    return False

if name == 'main':
spider = TaobaoSpider()
keyword = input("请输入要搜索的商品关键词: ")
pages = int(input("请输入要采集的页数(每页44条): "))
if spider.run(keyword, pages):
print("数据采集完成!")
else:
print("数据采集失败,请查看日志文件。")