爬取B站 睡前200(BV1qf4y1v7nX)视频弹幕 创建词云后 发现了好玩的东西

188 阅读1分钟

源码

import requests
import re

headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36",
}
url = 'https://api.bilibili.com/x/v1/dm/list.so?oid=261084177'
response = requests.get(url, headers=headers)
doc = response.content.decode('utf-8')
format = re.compile('<d.*?>(.*?)</d>')
Danmu = format.findall(doc)
with open("bzhan.txt", 'w', encoding='utf-8') as f:
    for i in Danmu:
        f.write(i+'\r\n')
        print(i)

from wordcloud import WordCloud
import matplotlib.pyplot as plt

def creat_word_cloud(filename):
    text = open("{}.txt".format(filename), encoding='utf-8').read()
    wc = WordCloud(
        background_color="white",
        max_words=2000,
        font_path='C:\Windows\Fonts\msyh.ttc',
        height=600,
        width=1000,
        max_font_size=100,
        random_state=30
    )
    myword = wc.generate(text)
    plt.imshow(myword)
    plt.axis("off")
    wc.to_file('HELLO.png')

结果