微博粉丝采集提取工具,采集博主粉丝数据,评论区用户ID采集提取【python】

78 阅读3分钟

下载地址:www.pan38.com/dow/share.p… 提取密码:2881

这个代码实现了一个完整的微博数据采集工具,主要功能包括:

采集博主粉丝数据(用户ID、昵称、性别、地区、个人简介等) 采集微博评论区数据(评论内容、发布时间等) 从评论区提取用户ID信息 将采集到的数据保存为CSV文件 使用说明:

需要替换代码中的COOKIE为你自己的微博cookie 设置目标博主UID和微博ID 可以调整max_pages参数控制采集页数 设置了随机延迟防止被封禁 注意事项:

请遵守微博的使用条款,不要过度采集 采集频率不宜过高,建议设置合理的delay参数 采集的数据仅用于合法用途

import requests import json import time import random import csv from bs4 import BeautifulSoup from urllib.parse import urlencode

class WeiboCrawler: def init(self, cookie): self.headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36', 'Cookie': cookie } self.session = requests.Session() self.session.headers.update(self.headers)

def get_fans_list(self, uid, page=1, count=20):
    """获取博主粉丝列表"""
    params = {
        'uid': uid,
        'page': page,
        'count': count
    }
    url = 'https://weibo.com/ajax/friendships/friends?' + urlencode(params)
    
    try:
        response = self.session.get(url)
        if response.status_code == 200:
            return response.json()
        else:
            print(f"获取粉丝列表失败,状态码:{response.status_code}")
            return None
    except Exception as e:
        print(f"获取粉丝列表出错:{e}")
        return None
        
def get_all_fans(self, uid, max_pages=50, delay=2):
    """获取博主所有粉丝"""
    all_fans = []
    for page in range(1, max_pages + 1):
        print(f"正在获取第{page}页粉丝...")
        data = self.get_fans_list(uid, page=page)
        if data and 'users' in data:
            all_fans.extend(data['users'])
            if len(data['users']) < 20:
                break
        time.sleep(delay + random.uniform(0, 1))
    return all_fans
    
def get_comments(self, weibo_id, page=1, count=20):
    """获取微博评论"""
    params = {
        'id': weibo_id,
        'page': page,
        'count': count
    }
    url = 'https://weibo.com/ajax/statuses/comments?' + urlencode(params)
    
    try:
        response = self.session.get(url)
        if response.status_code == 200:
            return response.json()
        else:
            print(f"获取评论失败,状态码:{response.status_code}")
            return None
    except Exception as e:
        print(f"获取评论出错:{e}")
        return None
        
def get_all_comments(self, weibo_id, max_pages=50, delay=2):
    """获取微博所有评论"""
    all_comments = []
    for page in range(1, max_pages + 1):
        print(f"正在获取第{page}页评论...")
        data = self.get_comments(weibo_id, page=page)
        if data and 'data' in data:
            all_comments.extend(data['data'])
            if len(data['data']) < 20:
                break
        time.sleep(delay + random.uniform(0, 1))
    return all_comments
    
def save_to_csv(self, data, filename, fields=None):
    """保存数据到CSV文件"""
    if not data:
        print("没有数据可保存")
        return
        
    if not fields:
        fields = list(data[0].keys())
        
    try:
        with open(filename, 'w', newline='', encoding='utf-8-sig') as f:
            writer = csv.DictWriter(f, fieldnames=fields)
            writer.writeheader()
            writer.writerows(data)
        print(f"数据已保存到 {filename}")
    except Exception as e:
        print(f"保存文件出错:{e}")
        
def extract_user_ids(self, comments):
    """从评论中提取用户ID"""
    user_ids = []
    for comment in comments:
        if 'user' in comment:
            user_ids.append({
                'id': comment['user']['id'],
                'screen_name': comment['user']['screen_name'],
                'profile_url': f"https://weibo.com/u/{comment['user']['id']}"
            })
    return user_ids
    
def run(self, uid, weibo_id):
    """运行采集任务"""
    print("开始采集粉丝数据...")
    fans = self.get_all_fans(uid)
    if fans:
        self.save_to_csv(fans, 'fans_data.csv', ['id', 'screen_name', 'gender', 'location', 'description'])
        
    print("\n开始采集评论数据...")
    comments = self.get_all_comments(weibo_id)
    if comments:
        self.save_to_csv(comments, 'comments_data.csv', ['id', 'text', 'created_at', 'user'])
        
        print("\n提取评论用户ID...")
        user_ids = self.extract_user_ids(comments)
        self.save_to_csv(user_ids, 'comment_users.csv', ['id', 'screen_name', 'profile_url'])
        
    print("\n采集任务完成!")

if name == 'main': # 替换为你的微博cookie COOKIE = '你的微博cookie'

# 目标博主UID和微博ID
TARGET_UID = '123456789'  # 替换为目标博主UID
TARGET_WEIBO_ID = 'ABCDEFGHIJKLMN'  # 替换为目标微博ID

crawler = WeiboCrawler(COOKIE)
crawler.run(TARGET_UID, TARGET_WEIBO_ID)