下载地址:www.pan38.com/share.php?c… 提取密码:7789
这个微博数据采集工具包含三个主要文件:主程序、配置文件和工具函数。主程序实现了微博评论采集和粉丝ID采集功能,使用requests库发送请求并处理返回的JSON数据。使用时需要替换为有效的微博cookie和目标微博/用户ID。代码包含了错误处理和随机延迟,以避免被反爬机制检测到。
import requests
import json
import time
import random
import csv
from bs4 import BeautifulSoup
from urllib.parse import urlencode
class WeiboCrawler:
def init(self, cookie):
self.headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
'Cookie': cookie
}
self.session = requests.Session()
self.session.headers.update(self.headers)
def get_weibo_comments(self, weibo_id, max_pages=10):
comments = []
base_url = 'https://weibo.com/ajax/statuses/buildComments'
for page in range(1, max_pages + 1):
params = {
'flow': 0,
'is_reload': 1,
'id': weibo_id,
'is_show_bulletin': 2,
'is_mix': 0,
'count': 20,
'uid': '',
'fetch_level': 0,
'page': page
}
try:
response = self.session.get(base_url, params=params)
if response.status_code == 200:
data = response.json()
for comment in data['data']:
comments.append({
'id': comment['id'],
'text': comment['text'],
'user_id': comment['user']['id'],
'user_name': comment['user']['screen_name'],
'created_at': comment['created_at'],
'like_count': comment['like_counts']
})
print(f'已获取第{page}页评论,共{len(comments)}条')
else:
print(f'获取评论失败,状态码:{response.status_code}')
break
time.sleep(random.uniform(1, 3))
except Exception as e:
print(f'获取评论出错:{str(e)}')
break
return comments
def get_user_followers(self, user_id, max_pages=20):
followers = []
base_url = 'https://weibo.com/ajax/friendships/friends'
for page in range(1, max_pages + 1):
params = {
'page': page,
'uid': user_id,
'relate': 'fans',
'count': 20,
'type': 'fans',
'fansSortType': 'time'
}
try:
response = self.session.get(base_url, params=params)
if response.status_code == 200:
data = response.json()
for user in data['users']:
followers.append({
'id': user['id'],
'name': user['screen_name'],
'gender': user['gender'],
'location': user['location'],
'description': user['description'],
'followers_count': user['followers_count'],
'friends_count': user['friends_count'],
'statuses_count': user['statuses_count'],
'verified': user['verified'],
'verified_reason': user.get('verified_reason', '')
})
print(f'已获取第{page}页粉丝,共{len(followers)}个')
else:
print(f'获取粉丝失败,状态码:{response.status_code}')
break
time.sleep(random.uniform(2, 5))
except Exception as e:
print(f'获取粉丝出错:{str(e)}')
break
return followers
def save_to_csv(self, data, filename):
if not data:
print('没有数据可保存')
return
keys = data[0].keys()
with open(filename, 'w', newline='', encoding='utf-8-sig') as f:
writer = csv.DictWriter(f, fieldnames=keys)
writer.writeheader()
writer.writerows(data)
print(f'数据已保存到{filename}')
if name == 'main':
# 替换为你的微博cookie
cookie = 'YOUR_WEIBO_COOKIE'
crawler = WeiboCrawler(cookie)
# 示例:采集微博评论
weibo_id = '1234567890' # 替换为目标微博ID
comments = crawler.get_weibo_comments(weibo_id, max_pages=5)
crawler.save_to_csv(comments, f'weibo_{weibo_id}_comments.csv')
# 示例:采集用户粉丝
user_id = '9876543210' # 替换为目标用户ID
followers = crawler.get_user_followers(user_id, max_pages=10)
crawler.save_to_csv(followers, f'user_{user_id}_followers.csv')