美团商家提取工具，采集美团商家电话号手机号评分，python免费开源代码下载地址：https://www.pan38.c

下载地址：www.pan38.com/dow/share.p… 提取密码：1133

这个代码实现了一个完整的美团商家数据采集工具，主要功能包括：

自动获取城市列表
根据关键词搜索商家
获取商家详细信息（包括电话、评分等）
将数据保存为CSV文件

import requests import re import json import time import random from bs4 import BeautifulSoup from urllib.parse import urlencode import csv import os from fake_useragent import UserAgent

class MeituanSpider: def init(self): self.base_url = "www.meituan.com" self.search_url = "www.meituan.com/meishi/" self.headers = { 'User-Agent': UserAgent().random, 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,/;q=0.8', 'Accept-Language': 'zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2', 'Accept-Encoding': 'gzip, deflate, br', 'Connection': 'keep-alive', 'Upgrade-Insecure-Requests': '1', 'Cache-Control': 'max-age=0' } self.session = requests.Session() self.data_list = [] self.proxies = None self.timeout = 10 self.max_retry = 3 self.delay = random.uniform(1, 3)

def set_proxy(self, proxy):
    self.proxies = {
        'http': proxy,
        'https': proxy
    }

def get_html(self, url):
    retry = 0
    while retry < self.max_retry:
        try:
            response = self.session.get(
                url,
                headers=self.headers,
                proxies=self.proxies,
                timeout=self.timeout
            )
            if response.status_code == 200:
                return response.text
            else:
                print(f"请求失败，状态码: {response.status_code}")
                retry += 1
                time.sleep(self.delay)
        except Exception as e:
            print(f"请求异常: {str(e)}")
            retry += 1
            time.sleep(self.delay)
    return None

def parse_city_list(self):
    url = f"{self.base_url}/changecity/"
    html = self.get_html(url)
    if not html:
        return []
    
    soup = BeautifulSoup(html, 'html.parser')
    city_links = soup.select('div.city-area a')
    cities = []
    for link in city_links:
        city_name = link.get_text().strip()
        city_url = link['href']
        cities.append({
            'name': city_name,
            'url': city_url
        })
    return cities

def search_shops(self, city_url, keyword="", page=1):
    params = {
        'keyword': keyword,
        'page': page
    }
    url = f"{city_url}?{urlencode(params)}"
    html = self.get_html(url)
    if not html:
        return []
    
    soup = BeautifulSoup(html, 'html.parser')
    shop_list = soup.select('div[data-poiid]')
    shops = []
    
    for shop in shop_list:
        try:
            shop_id = shop['data-poiid']
            name = shop.select_one('div.title').get_text().strip()
            address = shop.select_one('div.address').get_text().strip()
            avg_price = shop.select_one('div.avg-price').get_text().strip() if shop.select_one('div.avg-price') else '未知'
            score = shop.select_one('div.star > span').get_text().strip() if shop.select_one('div.star > span') else '0'
            review_count = shop.select_one('div.comment').get_text().strip() if shop.select_one('div.comment') else '0'
            shop_url = shop.select_one('a[href^="/meishi/"]')['href']
            
            shops.append({
                'id': shop_id,
                'name': name,
                'address': address,
                'avg_price': avg_price,
                'score': score,
                'review_count': review_count,
                'url': f"{self.base_url}{shop_url}"
            })
        except Exception as e:
            print(f"解析店铺信息出错: {str(e)}")
            continue
    
    return shops

def get_shop_detail(self, shop_url):
    html = self.get_html(shop_url)
    if not html:
        return None
    
    soup = BeautifulSoup(html, 'html.parser')
    detail = {}
    
    try:
        # 基本信息
        detail['name'] = soup.select_one('h1.shop-name').get_text().strip()
        detail['address'] = soup.select_one('div.address').get_text().strip()
        detail['score'] = soup.select_one('span.star-score').get_text().strip()
        detail['avg_price'] = soup.select_one('span.avg-price').get_text().strip()
        
        # 联系方式
        phone_script = soup.find('script', text=re.compile('phone'))
        if phone_script:
            phone_match = re.search(r'"phone":"([^"]+)"', phone_script.text)
            if phone_match:
                detail['phone'] = phone_match.group(1)
        
        # 营业时间
        hours = soup.select_one('div.business-hours')
        if hours:
            detail['business_hours'] = hours.get_text().strip()
        
        # 其他信息
        info_items = soup.select('div.shop-info-item')
        for item in info_items:
            key = item.select_one('span.label').get_text().strip()
            value = item.select_one('span.value').get_text().strip()
            detail[key] = value
        
        return detail
    except Exception as e:
        print(f"解析店铺详情出错: {str(e)}")
        return None

def save_to_csv(self, filename, data):
    if not data:
        return
    
    file_exists = os.path.isfile(filename)
    with open(filename, 'a', newline='', encoding='utf-8-sig') as f:
        writer = csv.DictWriter(f, fieldnames=data[0].keys())
        if not file_exists:
            writer.writeheader()
        writer.writerows(data)

def run(self, city_name=None, keyword="", max_pages=5):
    cities = self.parse_city_list()
    if not cities:
        print("无法获取城市列表")
        return
    
    target_cities = []
    if city_name:
        target_cities = [city for city in cities if city_name in city['name']]
    else:
        target_cities = cities[:1]  # 默认只爬第一个城市
        
    for city in target_cities:
        print(f"开始爬取城市: {city['name']}")
        for page in range(1, max_pages + 1):
            print(f"正在爬取第 {page} 页...")
            shops = self.search_shops(city['url'], keyword, page)
            if not shops:
                break
            
            for shop in shops:
                print(f"获取店铺详情: {shop['name']}")
                detail = self.get_shop_detail(shop['url'])
                if detail:
                    shop.update(detail)
                    self.data_list.append(shop)
                time.sleep(random.uniform(0.5, 2))
            
            time.sleep(random.uniform(1, 3))
        
        # 保存数据
        if self.data_list:
            filename = f"meituan_{city['name']}_{keyword}.csv"
            self.save_to_csv(filename, self.data_list)
            print(f"数据已保存到 {filename}")
            self.data_list = []

if name == "main": spider = MeituanSpider() # 可以设置代理 # spider.set_proxy("http://127.0.0.1:8888")

# 运行爬虫
spider.run(city_name="北京", keyword="火锅", max_pages=3)