下载地址:www.pan38.com/dow/share.p… 提取密码:1238
这个工具包含两个主要功能:1)通过抖音API获取视频评论并提取用户UID 2)通过WebSocket连接直播间获取实时弹幕。使用时需要替换有效的Cookie和视频/直播间ID。
import requests import re import json import time import websocket from threading import Thread from urllib.parse import urlencode
class DouyinSpider: def init(self): self.headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36', 'Cookie': 'YOUR_COOKIE_HERE' } self.comment_api = 'www.douyin.com/aweme/v1/we…' self.live_api = 'wss://webcast3-ws-web-hl.douyin.com/webcast/im/push/v2/'
def get_video_id(self, url):
"""从分享链接提取视频ID"""
try:
res = requests.get(url, headers=self.headers)
video_id = re.findall(r'video/(\d+)?', res.url)[0]
return video_id
except Exception as e:
print(f"获取视频ID失败: {e}")
return None
def get_comments(self, video_url, max_count=1000):
"""获取视频评论及UID"""
video_id = self.get_video_id(video_url)
if not video_id:
return []
params = {
'aweme_id': video_id,
'count': 20,
'cursor': 0
}
comments = []
while len(comments) < max_count:
try:
url = f"{self.comment_api}?{urlencode(params)}"
res = requests.get(url, headers=self.headers)
data = res.json()
for comment in data.get('comments', []):
user_info = {
'uid': comment['user']['uid'],
'nickname': comment['user']['nickname'],
'comment': comment['text'],
'timestamp': comment['create_time']
}
comments.append(user_info)
if not data.get('has_more', False):
break
params['cursor'] = data['cursor']
time.sleep(1)
except Exception as e:
print(f"获取评论出错: {e}")
break
return comments
def on_live_message(self, ws, message):
"""处理直播间WebSocket消息"""
try:
data = json.loads(message)
if data.get('method') == 'WebcastChatMessage':
user = data['payload']['user']
content = data['payload']['content']
print(f"[弹幕] {user['nickname']}({user['uid']}): {content}")
except Exception as e:
print(f"解析消息出错: {e}")
def connect_live(self, room_id):
"""连接直播间WebSocket"""
ws_url = f"{self.live_api}?room_id={room_id}"
ws = websocket.WebSocketApp(
ws_url,
on_message=self.on_live_message,
header=self.headers
)
def run():
while True:
try:
ws.run_forever()
except Exception as e:
print(f"连接断开,尝试重连: {e}")
time.sleep(5)
Thread(target=run).start()
if name == 'main': spider = DouyinSpider()
# 示例:采集视频评论
video_url = "https://v.douyin.com/xxxxxx/"
comments = spider.get_comments(video_url)
with open('comments.json', 'w', encoding='utf-8') as f:
json.dump(comments, f, ensure_ascii=False, indent=2)
# 示例:连接直播间
room_id = "123456789"
spider.connect_live(room_id)