python爬虫爬取B站弹幕(一步到位,开袋即食,非常方便)

327 阅读1分钟

-- codeing = utf-8 --

@Time : 2021/1/23 18:40

@Author : 老七疯狂吸氧

@file bilibili.py

@Software:PyCharm

import re import requests def main():

url = "https://www.bilibili.com/video/BV1kh411y78a"    #此处个更换网址,只适用与哔哩哔哩的视频

datalist = get_html(url)
cid = saveurl(datalist)
dmurl='https://comment.bilibili.com/'+cid+'.xml'
datalist = get_Html(dmurl)
list=datalist.content.decode("utf-8")
savelist(list)

def get_html(url): #一次请求 headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36' } #请输入你个人的User-Agent response = requests.get(url, headers=headers) return response.text def get_Html(url): #二次请求 headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36' } #请输入你个人的User-Agent response = requests.get(url, headers=headers) return response def saveurl(baseurl): #获取cid findlink=re.compile(r'"cid":(.?),"bvid":') cid = re.findall(findlink,baseurl) cid = list(cid)[1] return cid def savelist(list): danmu = re.compile(r'(.?)') File = open("B站弹幕.txt", "w", encoding="utf-8") data = re.findall(danmu,list) for i in data: File.writelines(i) File.writelines("\n") File.close() if name =="main": main() print("爬取完毕")