下载地址:www.pan38.com/dow/share.p… 提取密码:1133
多平台API集成:支持百度/高德地图POI数据采集12 数据清洗模块:自动提取有效手机号并去重45 地理围栏功能:通过经纬度计算距离筛选3 多格式导出:支持Excel等标准格式25 注意事项:
需自行申请各平台开发者KEY并替换代码中的占位符3 商业使用需遵守各平台API调用限制16 周边人员数据采集需根据实际业务需求对接合法数据源
import requests import pandas as pd from geopy.distance import geodesic import json import time import re
class MapDataCollector: def init(self): self.headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)' } self.api_keys = { 'baidu': 'YOUR_BAIDU_KEY', 'gaode': 'YOUR_GAODE_KEY', 'tencent': 'YOUR_TENCENT_KEY' }
def clean_phone(self, phone_str): """清洗电话号码格式""" phones = re.findall(r'1[3-9]\d{9}', str(phone_str)) return ','.join(set(phones)) if phones else None
def get_baidu_poi(self, keyword, city, radius=5000): """百度地图POI采集""" url = "api.map.baidu.com/place/v2/se…" params = { 'query': keyword, 'region': city, 'output': 'json', 'ak': self.api_keys['baidu'], 'scope': '2', 'page_size': 20, 'page_num': 0 } results = [] while True: resp = requests.get(url, params=params, headers=self.headers) data = resp.json() if 'results' not in data: break
for item in data['results']:
result = {
'name': item.get('name'),
'address': item.get('address'),
'phone': self.clean_phone(item.get('telephone')),
'lng': item.get('location', {}).get('lng'),
'lat': item.get('location', {}).get('lat'),
'source': 'baidu'
}
results.append(result)
if len(data['results']) < 20:
break
params['page_num'] += 1
time.sleep(0.5)
return results
def get_gaode_poi(self, keyword, city, radius=5000): """高德地图POI采集""" url = "restapi.amap.com/v3/place/te…" params = { 'keywords': keyword, 'city': city, 'output': 'json', 'key': self.api_keys['gaode'], 'offset': 20, 'page': 1 } results = [] while True: resp = requests.get(url, params=params, headers=self.headers) data = resp.json() if 'pois' not in data: break
for poi in data['pois']:
result = {
'name': poi.get('name'),
'address': poi.get('address'),
'phone': self.clean_phone(poi.get('tel')),
'lng': poi.get('location', '').split(','),
'lat': poi.get('location', '').split(',')^1^,
'source': 'gaode'
}
results.append(result)
if len(data['pois']) < 20:
break
params['page'] += 1
time.sleep(0.5)
return results
def get_nearby_people(self, center_point, radius=1000): """模拟采集周边数据(需替换实际接口)""" # 实际开发中需接入对应平台API mock_data = [ {'name': '用户A', 'distance': 500, 'phone': '13800138001'}, {'name': '用户B', 'distance': 800, 'phone': '13800138002'} ] return [ {**item, 'phone': self.clean_phone(item['phone'])} for item in mock_data if item['distance'] <= radius ]
def export_to_excel(self, data, filename): """导出Excel文件""" df = pd.DataFrame(data) df.to_excel(filename, index=False) print(f"数据已导出到 {filename}") if name == 'main': collector = MapDataCollector()
示例采集餐饮类商家
baidu_data = collector.get_baidu_poi('餐饮', '北京') gaode_data = collector.get_gaode_poi('餐饮', '北京')
合并数据并导出
all_data = baidu_data + gaode_data collector.export_to_excel(all_data, 'map_poi_data.xlsx')
模拟采集周边数据
nearby_data = collector.get_nearby_people((39.9042, 116.4074)) print("附近人员数据:", nearby_data)