美团商家采集工具，采集提取商家电话号手机号，美团外卖商家提取软件【python】下载地址：https://www.pan

下载地址：www.pan38.com/dow/share.p… 提取密码：1133

该工具主要特点：采用Selenium+BeautifulSoup双解析方案，有效应对动态页面‌ 集成多线程采集，提升效率‌ 自动切换UserAgent和代理IP，绕过反爬数据导出为Excel格式，方便后续处理‌

import requests from bs4 import BeautifulSoup from selenium import webdriver from selenium.webdriver.chrome.service import Service from webdriver_manager.chrome import ChromeDriverManager import pandas as pd import threading import time import random from fake_useragent import UserAgent

class MeituanScraper: def init(self, keywords=None, cities=None): self.keywords = keywords or ["餐饮", "外卖"] self.cities = cities or ["北京", "上海"] self.proxies = self._get_proxies() self.ua = UserAgent() self.lock = threading.Lock() self.data = []

def _get_proxies(self):
    # 代理IP池实现（需自行补充）
    return ["http://proxy1.example.com:8080", 
            "http://proxy2.example.com:8080"]

def _get_driver(self):
    options = webdriver.ChromeOptions()
    options.add_argument(f'user-agent={self.ua.random}')
    options.add_argument('--headless')
    options.add_argument('--disable-gpu')
    service = Service(ChromeDriverManager().install())
    return webdriver.Chrome(service=service, options=options)

def _parse_page(self, html):
    soup = BeautifulSoup(html, 'lxml')
    shops = soup.find_all('div', class_='shop-list-item')
    for shop in shops:
        try:
            name = shop.find('h4').text.strip()
            phone = shop.find('div', class_='phone-number').text.strip()
            address = shop.find('span', class_='address').text.strip()
            self.lock.acquire()
            self.data.append({
                "店名": name,
                "电话": phone,
                "地址": address
            })
        except Exception as e:
            print(f"解析失败: {e}")
        finally:
            self.lock.release()

def _scrape_city(self, city, keyword):
    driver = self._get_driver()
    try:
        url = f"https://www.meituan.com/meishi/{city}/"
        driver.get(url)
        time.sleep(random.uniform(2,5))
        
        # 模拟搜索操作
        search_box = driver.find_element('name', 'keyword')
        search_box.send_keys(keyword)
        search_box.submit()
        time.sleep(random.uniform(3,6))
        
        # 滚动加载更多数据
        for _ in range(3):
            driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
            time.sleep(random.uniform(1,3))
        
        self._parse_page(driver.page_source)
    finally:
        driver.quit()

def run(self):
    threads = []
    for city in self.cities:
        for keyword in self.keywords:
            t = threading.Thread(
                target=self._scrape_city,
                args=(city, keyword)
            )
            threads.append(t)
            t.start()
    
    for t in threads:
        t.join()
    
    df = pd.DataFrame(self.data)
    df.to_excel('美团商家数据.xlsx', index=False)
    print(f"采集完成，共获取{len(self.data)}条数据")

if name == "main": scraper = MeituanScraper( keywords=["火锅", "烧烤"], cities=["广州", "深圳"] ) scraper.run() import requests from datetime import datetime, timedelta

class ProxyManager: def init(self): self.proxies = [] self.last_update = None

def update_proxies(self):
    if self.last_update and (datetime.now() - self.last_update) < timedelta(hours=1):
        return
        
    try:
        resp = requests.get("https://api.proxy-service.com/v1/proxies")
        self.proxies = [f"http://{p['ip']}:{p['port']}" for p in resp.json()]
        self.last_update = datetime.now()
    except Exception as e:
        print(f"更新代理失败: {e}")

def get_random_proxy(self):
    self.update_proxies()
    return random.choice(self.proxies) if self.proxies else None