抖音评论采集工具,提取抖音评论UID,直播间采集评论弹幕信息【python】

360 阅读1分钟

下载地址:www.pan38.com/dow/share.p… 提取密码:1238

这个工具包含两个主要功能:1)通过抖音API获取视频评论并提取用户UID 2)通过WebSocket连接直播间获取实时弹幕。使用时需要替换有效的Cookie和视频/直播间ID。

import requests import re import json import time import websocket from threading import Thread from urllib.parse import urlencode

class DouyinSpider: def init(self): self.headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36', 'Cookie': 'YOUR_COOKIE_HERE' } self.comment_api = 'www.douyin.com/aweme/v1/we…' self.live_api = 'wss://webcast3-ws-web-hl.douyin.com/webcast/im/push/v2/'

def get_video_id(self, url):
    """从分享链接提取视频ID"""
    try:
        res = requests.get(url, headers=self.headers)
        video_id = re.findall(r'video/(\d+)?', res.url)[0]
        return video_id
    except Exception as e:
        print(f"获取视频ID失败: {e}")
        return None

def get_comments(self, video_url, max_count=1000):
    """获取视频评论及UID"""
    video_id = self.get_video_id(video_url)
    if not video_id:
        return []
        
    params = {
        'aweme_id': video_id,
        'count': 20,
        'cursor': 0
    }
    
    comments = []
    while len(comments) < max_count:
        try:
            url = f"{self.comment_api}?{urlencode(params)}"
            res = requests.get(url, headers=self.headers)
            data = res.json()
            
            for comment in data.get('comments', []):
                user_info = {
                    'uid': comment['user']['uid'],
                    'nickname': comment['user']['nickname'],
                    'comment': comment['text'],
                    'timestamp': comment['create_time']
                }
                comments.append(user_info)
            
            if not data.get('has_more', False):
                break
                
            params['cursor'] = data['cursor']
            time.sleep(1)
            
        except Exception as e:
            print(f"获取评论出错: {e}")
            break
            
    return comments

def on_live_message(self, ws, message):
    """处理直播间WebSocket消息"""
    try:
        data = json.loads(message)
        if data.get('method') == 'WebcastChatMessage':
            user = data['payload']['user']
            content = data['payload']['content']
            print(f"[弹幕] {user['nickname']}({user['uid']}): {content}")
            
    except Exception as e:
        print(f"解析消息出错: {e}")

def connect_live(self, room_id):
    """连接直播间WebSocket"""
    ws_url = f"{self.live_api}?room_id={room_id}"
    ws = websocket.WebSocketApp(
        ws_url,
        on_message=self.on_live_message,
        header=self.headers
    )
    
    def run():
        while True:
            try:
                ws.run_forever()
            except Exception as e:
                print(f"连接断开,尝试重连: {e}")
                time.sleep(5)
                
    Thread(target=run).start()

if name == 'main': spider = DouyinSpider()

# 示例:采集视频评论
video_url = "https://v.douyin.com/xxxxxx/"
comments = spider.get_comments(video_url)
with open('comments.json', 'w', encoding='utf-8') as f:
    json.dump(comments, f, ensure_ascii=False, indent=2)

# 示例:连接直播间
room_id = "123456789"
spider.connect_live(room_id)