Python练习5—bilibili排行榜数据爬取分析

18 阅读2分钟
import requests
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import rcParams

# -----------------------
# 中文显示设置
# -----------------------
rcParams['font.sans-serif'] = ['SimHei']  # 中文字体
rcParams['axes.unicode_minus'] = False

# -----------------------
# 1️⃣ 爬取 B 站排行榜
# -----------------------
url = "https://api.bilibili.com/x/web-interface/ranking?rid=0&day=3"
headers = {"User-Agent": "Mozilla/5.0"}

res = requests.get(url, headers=headers)
data = res.json()

videos = []
for video in data['data']['list'][:10]:
    videos.append({
        "title": video.get('title', '无标题'),
        "author": video.get('author', '无UP主'),
        "views": video.get('play', 0)
    })

# -----------------------
# 2️⃣ 保存到 Excel
# -----------------------
df = pd.DataFrame(videos)
excel_file = "bilibili_ranking.xlsx"
df.to_excel(excel_file, index=False)
print(f"✅ 数据已保存到 {excel_file}")

# -----------------------
# 3️⃣ 绘制黑色背景柱状图
# -----------------------
plt.style.use('dark_background')  # 设置黑色背景

plt.figure(figsize=(14, 7))
x = range(len(df['title']))

# 灰色柱子
bars = plt.bar(x, df['views'], color='gray')

# 设置 x 轴标签(标题换行显示)
titles = ['\n'.join([t[i:i+10] for i in range(0, len(t), 10)]) for t in df['title']]
plt.xticks(x, titles, rotation=45, ha='right', color='white')

# 图表标题与轴标签
plt.title("B站排行榜前10视频观看量", fontsize=16, color='white')
plt.ylabel("观看量", fontsize=14, color='white')

# 在柱子上显示具体数值
for bar in bars:
    height = bar.get_height()
    plt.text(bar.get_x() + bar.get_width()/2, height, str(height), ha='center', va='bottom', fontsize=10, color='white')

plt.tight_layout()
plt.show()
import requests
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import rcParams
import time  # 用于定时
from datetime import datetime

# -----------------------
# 中文显示和高级美化
# -----------------------
rcParams['font.sans-serif'] = ['SimHei']
rcParams['axes.unicode_minus'] = False
plt.style.use('dark_background')
plt.rcParams.update({'figure.autolayout': True})

# -----------------------
# 配置
# -----------------------
TOP_N = 10       # 前N名视频
SAVE_EXCEL = True  # 是否保存Excel
REFRESH_INTERVAL = 0  # 自动刷新时间(秒),0表示不循环

# -----------------------
# 1️⃣ 获取 B 站排行榜数据
# -----------------------
def fetch_bilibili_ranking():
    url = "https://api.bilibili.com/x/web-interface/ranking?rid=0&day=3"
    headers = {"User-Agent": "Mozilla/5.0"}
    res = requests.get(url, headers=headers)
    data = res.json()

    videos = []
    for video in data['data']['list'][:TOP_N]:
        videos.append({
            "title": video.get('title', '无标题'),
            "author": video.get('author', '无UP主'),
            "views": video.get('play', 0),
            "danmu": video.get('stat', {}).get('danmaku', 0)  # 弹幕量
        })
    return pd.DataFrame(videos)

# -----------------------
# 2️⃣ 保存 Excel
# -----------------------
def save_to_excel(df):
    filename = f"bilibili_ranking_{datetime.now().strftime('%Y%m%d_%H%M%S')}.xlsx"
    df.to_excel(filename, index=False)
    print(f"✅ 数据已保存到 {filename}")

# -----------------------
# 3️⃣ 绘制柱状图(观看量+弹幕量可扩展)
# -----------------------
def plot_ranking(df):
    plt.figure(figsize=(14, 7))
    x = range(len(df['title']))

    # 绘制灰色观看量柱子
    bars = plt.bar(x, df['views'], color='#A0A0A0', edgecolor='white', linewidth=0.8)

    # 标题换行处理
    titles = ['\n'.join([t[i:i+10] for i in range(0, len(t), 10)]) for t in df['title']]
    plt.xticks(x, titles, rotation=45, ha='right', fontsize=12, color='white')

    plt.title("B站排行榜前10视频观看量", fontsize=18, color='white', pad=20)
    plt.ylabel("观看量", fontsize=14, color='white')
    plt.yticks(color='white', fontsize=12)
    plt.grid(axis='y', linestyle='--', alpha=0.4)

    # 柱子上显示数值
    for bar in bars:
        height = bar.get_height()
        plt.text(bar.get_x() + bar.get_width()/2, height + max(df['views'])*0.01,
                 f"{height:,}", ha='center', va='bottom', fontsize=10, color='white')

    plt.tight_layout()
    plt.show()

# -----------------------
# 4️⃣ 数据分析和拓展功能占位
# -----------------------
def data_analysis(df):
    # 💡 可拓展分析示例:
    # 1. 排名变化趋势分析(需保存历史数据)
    # 2. 弹幕量分析
    # 3. 关键词统计(标题分词)
    # 4. UP 主分析(哪个UP最火)
    # 5. 可视化优化(折线图、饼图、动态图)
    print("💡 数据分析功能占位,可拓展实现各种分析。")

# -----------------------
# 5️⃣ 主流程
# -----------------------
def main():
    while True:
        df = fetch_bilibili_ranking()
        if SAVE_EXCEL:
            save_to_excel(df)
        plot_ranking(df)
        data_analysis(df)

        if REFRESH_INTERVAL <= 0:
            break
        else:
            print(f"⏰ 等待 {REFRESH_INTERVAL} 秒后刷新数据...")
            time.sleep(REFRESH_INTERVAL)

if __name__ == "__main__":
    main()

ef012d26094f4349a14412c78e6918dd.png

24dbaa16cd4c4a359ccf25fce02d60db.png