文章附件下载:www.pan38.com/dow/share.p… 提取密码:7858
- 采用Selenium实现浏览器自动化操作,模拟真实用户行为降低检测风险2
- 完整的商品采集功能,支持多关键词分页爬取并导出CSV4
- 批量发布流程包含图片上传、商品信息填写和自动提交3
- 内置随机操作间隔和异常处理机制,提高稳定性3
- 支持Cookie登录保持会话状态,避免频繁验证3
- 详细日志记录便于问题排查
源码部分:
import os import time import csv import random import logging from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from selenium.common.exceptions import TimeoutException, NoSuchElementException
日志配置
logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', filename='xianyu_automation.log' )
class XianYuAutomation: def init(self, headless=False): self.driver = None self.wait_timeout = 30 self.retry_max = 3 self.init_driver(headless)
def init_driver(self, headless):
"""初始化浏览器驱动"""
options = webdriver.ChromeOptions()
if headless:
options.add_argument('--headless')
options.add_argument("--disable-gpu")
options.add_argument("--window-size=1920,1080")
options.add_argument("--log-level=3")
self.driver = webdriver.Chrome(options=options)
self.driver.implicitly_wait(10)
def login_with_cookie(self, cookie_file):
"""使用Cookie登录闲鱼"""
try:
self.driver.get("https://login.taobao.com")
with open(cookie_file, 'r') as f:
cookies = eval(f.read())
for cookie in cookies:
if 'expiry' in cookie:
del cookie['expiry']
self.driver.add_cookie(cookie)
# 验证登录状态
self.driver.get("https://2.taobao.com")
WebDriverWait(self.driver, self.wait_timeout).until(
EC.presence_of_element_located((By.CLASS_NAME, "nickname"))
)
return True
except Exception as e:
logging.error(f"登录失败: {str(e)}")
return False
def collect_products(self, keywords, max_pages=3, output_csv='products.csv'):
"""采集指定关键词的商品"""
products = []
try:
for keyword in keywords:
for page in range(1, max_pages+1):
url = f"https://2.taobao.com/search?q={keyword}&page={page}"
self.driver.get(url)
# 等待商品加载
WebDriverWait(self.driver, self.wait_timeout).until(
EC.presence_of_element_located((By.CSS_SELECTOR, ".Card--doubleCardWrapper--L2XFE73"))
)
# 解析商品信息
items = self.driver.find_elements(By.CSS_SELECTOR, ".Card--doubleCardWrapper--L2XFE73")
for item in items:
try:
title = item.find_element(By.CSS_SELECTOR, ".Title--title--jCOPvpf").text
price = item.find_element(By.CSS_SELECTOR, ".Price--price--18vaUfP").text
location = item.find_element(By.CSS_SELECTOR, ".Price--procity--3k7IbnN").text
products.append({
'keyword': keyword,
'title': title,
'price': price,
'location': location
})
except NoSuchElementException:
continue
# 保存到CSV
with open(output_csv, 'w', newline='', encoding='utf-8') as f:
writer = csv.DictWriter(f, fieldnames=['keyword', 'title', 'price', 'location'])
writer.writeheader()
writer.writerows(products)
return True
except Exception as e:
logging.error(f"采集失败: {str(e)}")
return False
def batch_publish(self, product_csv, image_dir='images'):
"""批量发布商品"""
try:
products = []
with open(product_csv, 'r', encoding='utf-8') as f:
reader = csv.DictReader(f)
for row in reader:
products.append({
'title': row['title'],
'desc': row.get('description', ''),
'price': float(row['price']),
'images': [os.path.join(image_dir, img) for img in row['images'].split('|')],
'category': row.get('category', ''),
'location': row.get('location', '')
})
# 逐个发布商品
for product in products:
self._publish_single_product(product)
time.sleep(random.uniform(5, 10)) # 防检测间隔
return True
except Exception as e:
logging.error(f"批量发布失败: {str(e)}")
return False
def _publish_single_product(self, product):
"""发布单个商品"""
try:
self.driver.get("https://2.taobao.com/publish")
# 上传图片
upload_btn = WebDriverWait(self.driver, self.wait_timeout).until(
EC.presence_of_element_located((By.CSS_SELECTOR, ".uploader-input input"))
)
for img in product['images']:
upload_btn.send_keys(os.path.abspath(img))
time.sleep(random.uniform(0.5, 1.5))
# 填写商品信息
title_input = self.driver.find_element(By.NAME, "title")
title_input.clear()
title_input.send_keys(product['title'])
desc_textarea = self.driver.find_element(By.CSS_SELECTOR, "textarea.lego-textarea")
desc_textarea.clear()
desc_textarea.send_keys(product['desc'])
price_input = self.driver.find_element(By.NAME, "price")
price_input.clear()
price_input.send_keys(str(product['price']))
# 提交发布
submit_btn = self.driver.find_element(By.CSS_SELECTOR, ".submit-btn")
submit_btn.click()
# 验证发布成功
WebDriverWait(self.driver, self.wait_timeout).until(
EC.url_contains("item.htm")
)
logging.info(f"发布成功: {product['title']}")
return True
except Exception as e:
logging.error(f"发布失败: {product['title']} - {str(e)}")
return False
def close(self):
"""关闭浏览器"""
if self.driver:
self.driver.quit()
if name == "main": tool = XianYuAutomation(headless=False) try: # 示例用法 if tool.login_with_cookie("taobao.cookie"): # 采集商品 tool.collect_products(["手机", "笔记本电脑"], max_pages=2)
# 批量发布
tool.batch_publish("products_to_publish.csv")
finally:
tool.close()