文章附件下载:www.pan38.com/dow/share.p… 提取密码:6658
完整的闲鱼自动化工具,包含登录、商品爬取、数据处理和自动发布功能 使用Selenium实现浏览器自动化操作,支持模拟真实用户行为 内置反爬机制,包括随机等待时间、UserAgent轮换等 商品数据处理模块,支持价格调整、标题优化和分类识别 完善的日志系统,记录程序运行状态和错误信息 配置文件管理,支持自定义爬取数量、价格策略等参数 图片下载和处理功能,确保商品图片正常上传
import requests import json import time import random import re from bs4 import BeautifulSoup from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.common.keys import Keys from selenium.webdriver.chrome.options import Options from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from fake_useragent import UserAgent import pandas as pd from PIL import Image import io import os import logging from datetime import datetime
class XianYuAutoTool: def init(self): self.logger = self._setup_logger() self.config = self._load_config() self.driver = self._init_webdriver() self.session = requests.Session() self.ua = UserAgent()
def _setup_logger(self):
logger = logging.getLogger('xianyu_auto')
logger.setLevel(logging.INFO)
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
# 文件日志
file_handler = logging.FileHandler('xianyu_auto.log')
file_handler.setFormatter(formatter)
logger.addHandler(file_handler)
# 控制台日志
console_handler = logging.StreamHandler()
console_handler.setFormatter(formatter)
logger.addHandler(console_handler)
return logger
def _load_config(self):
try:
with open('config.json', 'r', encoding='utf-8') as f:
return json.load(f)
except FileNotFoundError:
default_config = {
"login_phone": "",
"login_password": "",
"max_items": 50,
"price_markup": 1.2,
"min_price": 20,
"max_price": 1000,
"keywords": ["数码", "家居", "服饰"],
"proxy": "",
"headless": False,
"wait_time": [3, 8]
}
with open('config.json', 'w', encoding='utf-8') as f:
json.dump(default_config, f, ensure_ascii=False, indent=4)
return default_config
def _init_webdriver(self):
options = Options()
if self.config.get('headless', False):
options.add_argument('--headless')
options.add_argument('--disable-gpu')
options.add_argument('--no-sandbox')
options.add_argument('--disable-dev-shm-usage')
options.add_argument(f'user-agent={self.ua.random}')
if self.config.get('proxy'):
options.add_argument(f'--proxy-server={self.config["proxy"]}')
driver = webdriver.Chrome(options=options)
driver.set_window_size(1200, 800)
return driver
def login(self):
self.logger.info("开始登录闲鱼账号")
try:
self.driver.get("https://login.taobao.com/")
WebDriverWait(self.driver, 20).until(
EC.presence_of_element_located((By.ID, "fm-login-id"))
)
# 输入用户名和密码
username = self.driver.find_element(By.ID, "fm-login-id")
username.clear()
username.send_keys(self.config['login_phone'])
password = self.driver.find_element(By.ID, "fm-login-password")
password.clear()
password.send_keys(self.config['login_password'])
# 点击登录按钮
login_btn = self.driver.find_element(By.CSS_SELECTOR, ".password-login")
login_btn.click()
# 等待登录成功
WebDriverWait(self.driver, 30).until(
EC.url_contains("taobao.com")
)
self.logger.info("登录成功")
return True
except Exception as e:
self.logger.error(f"登录失败: {str(e)}")
return False
def crawl_items(self, keyword, max_items=10):
self.logger.info(f"开始爬取关键词: {keyword} 的商品")
url = f"https://s.2.taobao.com/list/list.htm?q={keyword}&search_type=item"
self.driver.get(url)
items = []
try:
WebDriverWait(self.driver, 20).until(
EC.presence_of_element_located((By.CSS_SELECTOR, ".item-lists .item"))
)
# 滚动加载更多商品
last_height = self.driver.execute_script("return document.body.scrollHeight")
while len(items) < max_items:
self.driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
time.sleep(random.uniform(2, 4))
new_height = self.driver.execute_script("return document.body.scrollHeight")
if new_height == last_height:
break
last_height = new_height
# 解析商品列表
soup = BeautifulSoup(self.driver.page_source, 'html.parser')
item_elements = soup.select('.item-lists .item')
for item in item_elements:
if len(items) >= max_items:
break
try:
item_data = {
'title': item.select_one('.item-title').get_text(strip=True) if item.select_one('.item-title') else '',
'price': item.select_one('.item-price').get_text(strip=True) if item.select_one('.item-price') else '',
'location': item.select_one('.item-location').get_text(strip=True) if item.select_one('.item-location') else '',
'image': item.select_one('.item-pic img')['src'] if item.select_one('.item-pic img') else '',
'link': 'https:' + item.select_one('a')['href'] if item.select_one('a') else '',
'crawl_time': datetime.now().strftime('%Y-%m-%d %H:%M:%S')
}
items.append(item_data)
except Exception as e:
self.logger.warning(f"解析商品失败: {str(e)}")
continue
self.logger.info(f"成功爬取 {len(items)} 个商品")
return items
except Exception as e:
self.logger.error(f"爬取商品失败: {str(e)}")
return items
def process_items(self, items):
self.logger.info("开始处理爬取的商品数据")
processed_items = []
for item in items:
try:
# 价格处理
price = float(re.sub(r'[^\d.]', '', item['price']))
new_price = price * self.config['price_markup']
new_price = max(min(new_price, self.config['max_price']), self.config['min_price'])
# 标题优化
title = item['title']
if len(title) < 10:
title = f"优质好货 {title}"
processed_item = {
'original_title': item['title'],
'new_title': title,
'original_price': price,
'new_price': round(new_price, 2),
'location': item['location'],
'image_url': item['image'],
'original_link': item['link'],
'description': f"全新闲置,{title},品质保证,欢迎咨询",
'category': self._determine_category(title)
}
processed_items.append(processed_item)
except Exception as e:
self.logger.warning(f"处理商品失败: {str(e)}")
continue
self.logger.info(f"成功处理 {len(processed_items)} 个商品")
return processed_items
def _determine_category(self, title):
for keyword in self.config['keywords']:
if keyword in title:
return keyword
return "其他"
def download_image(self, url):
try:
headers = {'User-Agent': self.ua.random}
response = self.session.get(url, headers=headers, timeout=10)
if response.status_code == 200:
image = Image.open(io.BytesIO(response.content))
return image
return None
except Exception as e:
self.logger.warning(f"下载图片失败: {str(e)}")
return None
def post_item(self, item):
self.logger.info(f"开始发布商品: {item['new_title']}")
try:
self.driver.get("https://2.taobao.com/publish/publish.htm")
WebDriverWait(self.driver, 20).until(
EC.presence_of_element_located((By.ID, "J_Title"))
)
# 填写商品信息
title_input = self.driver.find_element(By.ID, "J_Title")
title_input.clear()
title_input.send_keys(item['new_title'])
price_input = self.driver.find_element(By.ID, "J_Price")
price_input.clear()
price_input.send_keys(str(item['new_price']))
desc_input = self.driver.find_element(By.ID, "J_Desc")
desc_input.clear()
desc_input.send_keys(item['description'])
# 上传图片
image = self.download_image(item['image_url'])
if image:
temp_path = f"temp_{int(time.time())}.jpg"
image.save(temp_path)
upload_input = self.driver.find_element(By.CSS_SELECTOR, "input[type=file]")
upload_input.send_keys(os.path.abspath(temp_path))
time.sleep(5) # 等待图片上传完成
os.remove(temp_path)
# 选择分类
category_btn = self.driver.find_element(By.CSS_SELECTOR, ".category-selector")
category_btn.click()
time.sleep(1)
# 这里需要根据实际情况调整分类选择逻辑
# 示例代码可能需要根据闲鱼实际页面结构调整
# 提交发布
submit_btn = self.driver.find_element(By.ID, "J_PublishBtn")
submit_btn.click()
# 等待发布成功
WebDriverWait(self.driver, 30).until(
EC.url_contains("item.htm")
)
self.logger.info(f"成功发布商品: {item['new_title']}")
time.sleep(random.uniform(*self.config['wait_time']))
return True
except Exception as e:
self.logger.error(f"发布商品失败: {str(e)}")
return False
def batch_publish(self):
all_items = []
for keyword in self.config['keywords']:
items = self.crawl_items(keyword, self.config['max_items'] // len(self.config['keywords']))
processed_items = self.process_items(items)
all_items.extend(processed_items)
# 保存爬取的数据
df = pd.DataFrame(all_items)
df.to_excel('xianyu_items.xlsx', index=False)
# 批量发布
success_count = 0
for item in all_items:
if self.post_item(item):
success_count += 1
if success_count >= self.config['max_items']:
break
self.logger.info(f"批量发布完成,成功发布 {success_count} 个商品")
def run(self):
try:
if not self.login():
return
self.batch_publish()
except Exception as e:
self.logger.error(f"程序运行出错: {str(e)}")
finally:
self.driver.quit()
self.logger.info("程序结束")
if name == "main": tool = XianYuAutoTool() tool.run()