下载地址:www.pan38.com/dow/share.p… 提取密码:2881
这个代码实现了一个完整的微博数据采集工具,主要功能包括:
采集博主粉丝数据(用户ID、昵称、性别、地区、个人简介等) 采集微博评论区数据(评论内容、发布时间等) 从评论区提取用户ID信息 将采集到的数据保存为CSV文件 使用说明:
需要替换代码中的COOKIE为你自己的微博cookie 设置目标博主UID和微博ID 可以调整max_pages参数控制采集页数 设置了随机延迟防止被封禁 注意事项:
请遵守微博的使用条款,不要过度采集 采集频率不宜过高,建议设置合理的delay参数 采集的数据仅用于合法用途
import requests import json import time import random import csv from bs4 import BeautifulSoup from urllib.parse import urlencode
class WeiboCrawler: def init(self, cookie): self.headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36', 'Cookie': cookie } self.session = requests.Session() self.session.headers.update(self.headers)
def get_fans_list(self, uid, page=1, count=20):
"""获取博主粉丝列表"""
params = {
'uid': uid,
'page': page,
'count': count
}
url = 'https://weibo.com/ajax/friendships/friends?' + urlencode(params)
try:
response = self.session.get(url)
if response.status_code == 200:
return response.json()
else:
print(f"获取粉丝列表失败,状态码:{response.status_code}")
return None
except Exception as e:
print(f"获取粉丝列表出错:{e}")
return None
def get_all_fans(self, uid, max_pages=50, delay=2):
"""获取博主所有粉丝"""
all_fans = []
for page in range(1, max_pages + 1):
print(f"正在获取第{page}页粉丝...")
data = self.get_fans_list(uid, page=page)
if data and 'users' in data:
all_fans.extend(data['users'])
if len(data['users']) < 20:
break
time.sleep(delay + random.uniform(0, 1))
return all_fans
def get_comments(self, weibo_id, page=1, count=20):
"""获取微博评论"""
params = {
'id': weibo_id,
'page': page,
'count': count
}
url = 'https://weibo.com/ajax/statuses/comments?' + urlencode(params)
try:
response = self.session.get(url)
if response.status_code == 200:
return response.json()
else:
print(f"获取评论失败,状态码:{response.status_code}")
return None
except Exception as e:
print(f"获取评论出错:{e}")
return None
def get_all_comments(self, weibo_id, max_pages=50, delay=2):
"""获取微博所有评论"""
all_comments = []
for page in range(1, max_pages + 1):
print(f"正在获取第{page}页评论...")
data = self.get_comments(weibo_id, page=page)
if data and 'data' in data:
all_comments.extend(data['data'])
if len(data['data']) < 20:
break
time.sleep(delay + random.uniform(0, 1))
return all_comments
def save_to_csv(self, data, filename, fields=None):
"""保存数据到CSV文件"""
if not data:
print("没有数据可保存")
return
if not fields:
fields = list(data[0].keys())
try:
with open(filename, 'w', newline='', encoding='utf-8-sig') as f:
writer = csv.DictWriter(f, fieldnames=fields)
writer.writeheader()
writer.writerows(data)
print(f"数据已保存到 {filename}")
except Exception as e:
print(f"保存文件出错:{e}")
def extract_user_ids(self, comments):
"""从评论中提取用户ID"""
user_ids = []
for comment in comments:
if 'user' in comment:
user_ids.append({
'id': comment['user']['id'],
'screen_name': comment['user']['screen_name'],
'profile_url': f"https://weibo.com/u/{comment['user']['id']}"
})
return user_ids
def run(self, uid, weibo_id):
"""运行采集任务"""
print("开始采集粉丝数据...")
fans = self.get_all_fans(uid)
if fans:
self.save_to_csv(fans, 'fans_data.csv', ['id', 'screen_name', 'gender', 'location', 'description'])
print("\n开始采集评论数据...")
comments = self.get_all_comments(weibo_id)
if comments:
self.save_to_csv(comments, 'comments_data.csv', ['id', 'text', 'created_at', 'user'])
print("\n提取评论用户ID...")
user_ids = self.extract_user_ids(comments)
self.save_to_csv(user_ids, 'comment_users.csv', ['id', 'screen_name', 'profile_url'])
print("\n采集任务完成!")
if name == 'main': # 替换为你的微博cookie COOKIE = '你的微博cookie'
# 目标博主UID和微博ID
TARGET_UID = '123456789' # 替换为目标博主UID
TARGET_WEIBO_ID = 'ABCDEFGHIJKLMN' # 替换为目标微博ID
crawler = WeiboCrawler(COOKIE)
crawler.run(TARGET_UID, TARGET_WEIBO_ID)