💖💖作者:计算机毕业设计杰瑞 💙💙个人简介:曾长期从事计算机专业培训教学,本人也热爱上课教学,语言擅长Java、微信小程序、Python、Golang、安卓Android等,开发项目包括大数据、深度学习、网站、小程序、安卓、算法。平常会做一些项目定制化开发、代码讲解、答辩教学、文档编写、也懂一些降重方面的技巧。平常喜欢分享一些自己开发中遇到的问题的解决办法,也喜欢交流技术,大家有技术代码这一块的问题可以问我! 💛💛想说的话:感谢大家的关注与支持! 💜💜 网站实战项目 安卓/小程序实战项目 大数据实战项目 深度学校实战项目 计算机毕业设计选题推荐
基于大数据的哔哩哔哩热门视频数据可视化分析系统 介绍
基于大数据的哔哩哔哩热门视频数据可视化分析系统是一套集数据采集、存储、分析、可视化于一体的综合性大数据处理平台。系统采用Hadoop分布式文件系统作为数据存储底层,利用Spark强大的内存计算能力对海量视频数据进行快速处理分析,后端基于Django框架构建稳定的Web服务接口,前端通过Vue+ElementUI+Echarts技术栈实现友好的用户交互界面和丰富的图表展示效果。系统核心功能包括系统首页数据概览、个人信息管理、系统参数配置、平台功能介绍以及深度数据分析模块,能够对哔哩哔哩平台的热门视频进行多维度统计分析,包括播放量趋势、用户互动数据、内容分类分布等关键指标的挖掘与呈现。通过Spark SQL和Pandas、NumPy等数据处理库的协同作用,系统可以高效处理大规模视频数据集,为用户提供直观清晰的数据可视化结果,帮助深入了解视频内容的传播规律和用户行为特征。
基于大数据的哔哩哔哩热门视频数据可视化分析系统 演示视频
基于大数据的哔哩哔哩热门视频数据可视化分析系统 演示图片
基于大数据的哔哩哔哩热门视频数据可视化分析系统 代码展示
from pyspark.sql import SparkSession
from django.http import JsonResponse
from django.views.decorators.csrf import csrf_exempt
import json
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
spark = SparkSession.builder.appName("BilibiliVideoAnalysis").config("spark.sql.adaptive.enabled", "true").config("spark.sql.adaptive.coalescePartitions.enabled", "true").getOrCreate()
@csrf_exempt
def analyze_video_data(request):
if request.method == 'POST':
data = json.loads(request.body)
start_date = data.get('start_date')
end_date = data.get('end_date')
df = spark.read.format("org.apache.spark.sql.execution.datasources.jdbc.JdbcRelationProvider").option("url", "jdbc:mysql://localhost:3306/bilibili_db").option("dbtable", "video_info").option("user", "root").option("password", "123456").load()
df.createOrReplaceTempView("videos")
filtered_df = spark.sql(f"SELECT * FROM videos WHERE upload_date BETWEEN '{start_date}' AND '{end_date}'")
play_count_df = filtered_df.groupBy("category").agg({"play_count": "sum", "like_count": "avg", "coin_count": "avg"}).collect()
trend_df = filtered_df.groupBy("upload_date").agg({"play_count": "sum"}).orderBy("upload_date").collect()
category_stats = []
for row in play_count_df:
category_stats.append({
'category': row['category'],
'total_plays': int(row['sum(play_count)']),
'avg_likes': round(float(row['avg(like_count)']), 2),
'avg_coins': round(float(row['avg(coin_count)']), 2)
})
daily_trend = []
for row in trend_df:
daily_trend.append({
'date': str(row['upload_date']),
'plays': int(row['sum(play_count)'])
})
hot_videos = spark.sql("SELECT title, author, play_count, like_count FROM videos ORDER BY play_count DESC LIMIT 20").collect()
top_videos = [{'title': row['title'], 'author': row['author'], 'plays': int(row['play_count']), 'likes': int(row['like_count'])} for row in hot_videos]
result_data = {
'category_analysis': category_stats,
'trend_analysis': daily_trend,
'hot_videos': top_videos,
'total_videos': filtered_df.count()
}
return JsonResponse({'status': 'success', 'data': result_data})
@csrf_exempt
def generate_visualization_data(request):
if request.method == 'GET':
chart_type = request.GET.get('type', 'bar')
df = spark.read.format("org.apache.spark.sql.execution.datasources.jdbc.JdbcRelationProvider").option("url", "jdbc:mysql://localhost:3306/bilibili_db").option("dbtable", "video_info").option("user", "root").option("password", "123456").load()
df.createOrReplaceTempView("video_data")
if chart_type == 'pie':
category_distribution = spark.sql("SELECT category, COUNT(*) as count FROM video_data GROUP BY category").collect()
pie_data = [{'name': row['category'], 'value': int(row['count'])} for row in category_distribution]
return JsonResponse({'chart_type': 'pie', 'data': pie_data})
elif chart_type == 'line':
time_series = spark.sql("SELECT DATE(upload_date) as date, AVG(play_count) as avg_plays FROM video_data GROUP BY DATE(upload_date) ORDER BY date").collect()
line_data = {
'dates': [str(row['date']) for row in time_series],
'values': [round(float(row['avg_plays']), 2) for row in time_series]
}
return JsonResponse({'chart_type': 'line', 'data': line_data})
else:
author_stats = spark.sql("SELECT author, SUM(play_count) as total_plays, COUNT(*) as video_count FROM video_data GROUP BY author ORDER BY total_plays DESC LIMIT 15").collect()
bar_data = {
'authors': [row['author'] for row in author_stats],
'plays': [int(row['total_plays']) for row in author_stats],
'counts': [int(row['video_count']) for row in author_stats]
}
return JsonResponse({'chart_type': 'bar', 'data': bar_data})
@csrf_exempt
def system_data_management(request):
if request.method == 'POST':
operation = json.loads(request.body).get('operation')
df = spark.read.format("org.apache.spark.sql.execution.datasources.jdbc.JdbcRelationProvider").option("url", "jdbc:mysql://localhost:3306/bilibili_db").option("dbtable", "video_info").option("user", "root").option("password", "123456").load()
df.createOrReplaceTempView("system_data")
if operation == 'clean':
invalid_data = spark.sql("SELECT * FROM system_data WHERE play_count < 0 OR like_count < 0 OR upload_date IS NULL")
cleaned_df = spark.sql("SELECT * FROM system_data WHERE play_count >= 0 AND like_count >= 0 AND upload_date IS NOT NULL")
invalid_count = invalid_data.count()
valid_count = cleaned_df.count()
cleaned_df.write.mode("overwrite").format("org.apache.spark.sql.execution.datasources.jdbc.JdbcRelationProvider").option("url", "jdbc:mysql://localhost:3306/bilibili_db").option("dbtable", "cleaned_video_info").option("user", "root").option("password", "123456").save()
return JsonResponse({'status': 'success', 'invalid_records': invalid_count, 'valid_records': valid_count})
elif operation == 'backup':
backup_table_name = f"backup_video_info_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
df.write.mode("overwrite").format("org.apache.spark.sql.execution.datasources.jdbc.JdbcRelationProvider").option("url", "jdbc:mysql://localhost:3306/bilibili_db").option("dbtable", backup_table_name).option("user", "root").option("password", "123456").save()
total_records = df.count()
return JsonResponse({'status': 'success', 'backup_table': backup_table_name, 'total_records': total_records})
else:
stats_df = spark.sql("SELECT COUNT(*) as total, MAX(play_count) as max_plays, MIN(play_count) as min_plays, AVG(play_count) as avg_plays FROM system_data").collect()
category_count = spark.sql("SELECT COUNT(DISTINCT category) as unique_categories FROM system_data").collect()[0]['unique_categories']
author_count = spark.sql("SELECT COUNT(DISTINCT author) as unique_authors FROM system_data").collect()[0]['unique_authors']
system_stats = {
'total_videos': int(stats_df[0]['total']),
'max_plays': int(stats_df[0]['max_plays']),
'min_plays': int(stats_df[0]['min_plays']),
'avg_plays': round(float(stats_df[0]['avg_plays']), 2),
'unique_categories': int(category_count),
'unique_authors': int(author_count)
}
return JsonResponse({'status': 'success', 'system_stats': system_stats})
基于大数据的哔哩哔哩热门视频数据可视化分析系统 文档展示
💖💖作者:计算机毕业设计杰瑞 💙💙个人简介:曾长期从事计算机专业培训教学,本人也热爱上课教学,语言擅长Java、微信小程序、Python、Golang、安卓Android等,开发项目包括大数据、深度学习、网站、小程序、安卓、算法。平常会做一些项目定制化开发、代码讲解、答辩教学、文档编写、也懂一些降重方面的技巧。平常喜欢分享一些自己开发中遇到的问题的解决办法,也喜欢交流技术,大家有技术代码这一块的问题可以问我! 💛💛想说的话:感谢大家的关注与支持! 💜💜 网站实战项目 安卓/小程序实战项目 大数据实战项目 深度学校实战项目 计算机毕业设计选题推荐