前言
💖💖作者:计算机程序员小杨 💙💙个人简介:我是一名计算机相关专业的从业者,擅长Java、微信小程序、Python、Golang、安卓Android等多个IT方向。会做一些项目定制化开发、代码讲解、答辩教学、文档编写、也懂一些降重方面的技巧。热爱技术,喜欢钻研新工具和框架,也乐于通过代码解决实际问题,大家有技术代码这一块的问题可以问我! 💛💛想说的话:感谢大家的关注与支持! 💕💕文末获取源码联系 计算机程序员小杨 💜💜 网站实战项目 安卓/小程序实战项目 大数据实战项目 深度学习实战项目 计算机毕业设计选题 💜💜
一.开发工具简介
开发语言:Python 后端框架:Django 前端:Vue 数据库:MySQL 系统架构:B/S 开发工具:PyCharm
二.系统内容简介
网络小说分析可视化系统是一个基于Python Django框架开发的B/S架构智能分析平台,通过MySQL数据库存储管理小说数据,结合Vue前端技术实现交互式用户界面。该系统集成了用户权限管理、小说数据采集与处理、预测阅读趋势分析、多维度可视化展示等核心功能模块。系统能够对网络小说的阅读量、评分、用户偏好等关键指标进行深度挖掘和统计分析,通过机器学习算法预测小说的潜在阅读趋势,为创作者、出版商和平台运营者提供数据驱动的决策支持。平台采用模块化设计理念,支持多种数据可视化图表展示,包括趋势图、热力图、分布图等形式,帮助用户直观理解小说市场动态和读者行为模式,提升内容运营效率和用户体验质量。
三.系统功能演示
四.系统界面展示
五.系统源码展示
from pyspark.sql import SparkSession
from pyspark.ml.feature import VectorAssembler, StandardScaler
from pyspark.ml.clustering import KMeans
from pyspark.ml.regression import LinearRegression
from pyspark.ml.evaluation import RegressionEvaluator
import pandas as pd
import numpy as np
from django.http import JsonResponse
from django.views.decorators.csrf import csrf_exempt
import json
import mysql.connector
from datetime import datetime, timedelta
spark = SparkSession.builder.appName("NovelAnalysisSystem").config("spark.sql.adaptive.enabled", "true").config("spark.sql.adaptive.coalescePartitions.enabled", "true").getOrCreate()
@csrf_exempt
def novel_data_analysis(request):
if request.method == 'POST':
data = json.loads(request.body)
novel_id = data.get('novel_id')
analysis_type = data.get('analysis_type', 'comprehensive')
connection = mysql.connector.connect(host='localhost', user='root', password='password', database='novel_system')
cursor = connection.cursor()
query = "SELECT novel_id, title, author, genre, word_count, chapter_count, view_count, favorite_count, comment_count, rating, publish_date, update_date FROM novels WHERE novel_id = %s"
cursor.execute(query, (novel_id,))
novel_data = cursor.fetchone()
if not novel_data:
return JsonResponse({'error': 'Novel not found'}, status=404)
query_readers = "SELECT user_id, reading_progress, reading_time, rating, favorite_status, last_read_date FROM user_reading_records WHERE novel_id = %s"
cursor.execute(query_readers, (novel_id,))
reader_data = cursor.fetchall()
df_novel = spark.createDataFrame([novel_data], ["novel_id", "title", "author", "genre", "word_count", "chapter_count", "view_count", "favorite_count", "comment_count", "rating", "publish_date", "update_date"])
df_readers = spark.createDataFrame(reader_data, ["user_id", "reading_progress", "reading_time", "rating", "favorite_status", "last_read_date"])
reading_completion_rate = df_readers.filter(df_readers.reading_progress >= 0.8).count() / df_readers.count() if df_readers.count() > 0 else 0
avg_reading_time = df_readers.agg({"reading_time": "avg"}).collect()[0][0] or 0
retention_rate = df_readers.filter(df_readers.last_read_date >= (datetime.now() - timedelta(days=7)).strftime('%Y-%m-%d')).count() / df_readers.count() if df_readers.count() > 0 else 0
feature_cols = ["word_count", "chapter_count", "view_count", "favorite_count", "comment_count"]
assembler = VectorAssembler(inputCols=feature_cols, outputCol="features")
df_features = assembler.transform(df_novel)
scaler = StandardScaler(inputCol="features", outputCol="scaled_features")
scaler_model = scaler.fit(df_features)
df_scaled = scaler_model.transform(df_features)
quality_score = float(df_scaled.select("scaled_features").collect()[0][0].toArray().mean())
analysis_result = {
'novel_id': novel_id,
'basic_info': {
'title': novel_data[1],
'author': novel_data[2],
'genre': novel_data[3],
'word_count': novel_data[4],
'chapter_count': novel_data[5]
},
'performance_metrics': {
'view_count': novel_data[6],
'favorite_count': novel_data[7],
'comment_count': novel_data[8],
'rating': float(novel_data[9]),
'reading_completion_rate': reading_completion_rate,
'avg_reading_time': avg_reading_time,
'retention_rate': retention_rate,
'quality_score': quality_score
},
'analysis_timestamp': datetime.now().isoformat()
}
cursor.close()
connection.close()
return JsonResponse(analysis_result)
@csrf_exempt
def reading_trend_prediction(request):
if request.method == 'POST':
data = json.loads(request.body)
novel_id = data.get('novel_id')
prediction_days = data.get('prediction_days', 30)
connection = mysql.connector.connect(host='localhost', user='root', password='password', database='novel_system')
cursor = connection.cursor()
query = "SELECT DATE(created_date) as date, SUM(view_count) as daily_views, SUM(favorite_count) as daily_favorites, AVG(rating) as daily_rating FROM daily_novel_stats WHERE novel_id = %s AND created_date >= DATE_SUB(NOW(), INTERVAL 90 DAY) GROUP BY DATE(created_date) ORDER BY date"
cursor.execute(query, (novel_id,))
historical_data = cursor.fetchall()
if len(historical_data) < 7:
return JsonResponse({'error': 'Insufficient historical data for prediction'}, status=400)
df_history = spark.createDataFrame(historical_data, ["date", "daily_views", "daily_favorites", "daily_rating"])
df_history = df_history.withColumn("day_index", spark.sql.functions.row_number().over(spark.sql.Window.orderBy("date")))
feature_cols = ["day_index", "daily_favorites", "daily_rating"]
assembler = VectorAssembler(inputCols=feature_cols, outputCol="features")
df_features = assembler.transform(df_history)
lr = LinearRegression(featuresCol="features", labelCol="daily_views", maxIter=100, regParam=0.01)
lr_model = lr.fit(df_features)
future_data = []
last_day_index = df_history.agg({"day_index": "max"}).collect()[0][0]
last_favorites = df_history.orderBy(df_history.day_index.desc()).select("daily_favorites").first()[0]
last_rating = df_history.orderBy(df_history.day_index.desc()).select("daily_rating").first()[0]
for i in range(1, prediction_days + 1):
future_day_index = last_day_index + i
predicted_favorites = last_favorites * (1 + np.random.normal(0, 0.1))
predicted_rating = max(1.0, min(5.0, last_rating + np.random.normal(0, 0.2)))
future_data.append((future_day_index, predicted_favorites, predicted_rating))
df_future = spark.createDataFrame(future_data, ["day_index", "daily_favorites", "daily_rating"])
df_future_features = assembler.transform(df_future)
predictions = lr_model.transform(df_future_features)
prediction_results = []
for row in predictions.collect():
prediction_date = (datetime.now() + timedelta(days=int(row.day_index - last_day_index))).strftime('%Y-%m-%d')
predicted_views = max(0, int(row.prediction))
confidence_score = 1.0 / (1.0 + abs(row.prediction - row.daily_favorites) / row.daily_favorites) if row.daily_favorites > 0 else 0.5
prediction_results.append({
'date': prediction_date,
'predicted_views': predicted_views,
'predicted_favorites': int(row.daily_favorites),
'predicted_rating': round(row.daily_rating, 2),
'confidence_score': round(confidence_score, 3)
})
evaluator = RegressionEvaluator(labelCol="daily_views", predictionCol="prediction", metricName="rmse")
rmse = evaluator.evaluate(lr_model.transform(df_features))
cursor.close()
connection.close()
return JsonResponse({
'novel_id': novel_id,
'prediction_period': f'{prediction_days} days',
'predictions': prediction_results,
'model_accuracy': {'rmse': round(rmse, 2)},
'generated_at': datetime.now().isoformat()
})
@csrf_exempt
def visualization_data_generator(request):
if request.method == 'POST':
data = json.loads(request.body)
chart_type = data.get('chart_type', 'trend')
time_range = data.get('time_range', 30)
genre_filter = data.get('genre_filter', None)
connection = mysql.connector.connect(host='localhost', user='root', password='password', database='novel_system')
cursor = connection.cursor()
if chart_type == 'genre_distribution':
query = "SELECT genre, COUNT(*) as novel_count, AVG(rating) as avg_rating, SUM(view_count) as total_views FROM novels GROUP BY genre ORDER BY novel_count DESC"
cursor.execute(query)
genre_data = cursor.fetchall()
df_genre = spark.createDataFrame(genre_data, ["genre", "novel_count", "avg_rating", "total_views"])
total_novels = df_genre.agg({"novel_count": "sum"}).collect()[0][0]
genre_stats = []
for row in df_genre.collect():
percentage = (row.novel_count / total_novels) * 100
genre_stats.append({
'genre': row.genre,
'count': row.novel_count,
'percentage': round(percentage, 2),
'avg_rating': round(row.avg_rating, 2),
'total_views': row.total_views
})
return JsonResponse({'chart_type': 'genre_distribution', 'data': genre_stats})
elif chart_type == 'reading_heatmap':
query = "SELECT HOUR(last_read_time) as hour, DAYOFWEEK(last_read_time) as weekday, COUNT(*) as reading_sessions FROM user_reading_records WHERE last_read_time >= DATE_SUB(NOW(), INTERVAL %s DAY) GROUP BY HOUR(last_read_time), DAYOFWEEK(last_read_time)"
cursor.execute(query, (time_range,))
heatmap_data = cursor.fetchall()
df_heatmap = spark.createDataFrame(heatmap_data, ["hour", "weekday", "reading_sessions"])
heatmap_matrix = []
for weekday in range(1, 8):
day_data = []
for hour in range(24):
sessions = df_heatmap.filter((df_heatmap.weekday == weekday) & (df_heatmap.hour == hour)).select("reading_sessions").collect()
session_count = sessions[0][0] if sessions else 0
day_data.append(session_count)
heatmap_matrix.append(day_data)
max_sessions = df_heatmap.agg({"reading_sessions": "max"}).collect()[0][0] or 1
normalized_matrix = [[round(cell/max_sessions, 3) for cell in row] for row in heatmap_matrix]
return JsonResponse({
'chart_type': 'reading_heatmap',
'data': {
'matrix': normalized_matrix,
'max_value': max_sessions,
'time_range': time_range
}
})
elif chart_type == 'popularity_trend':
base_query = "SELECT DATE(created_date) as date, SUM(view_count) as total_views, SUM(favorite_count) as total_favorites, COUNT(DISTINCT novel_id) as active_novels FROM daily_novel_stats WHERE created_date >= DATE_SUB(NOW(), INTERVAL %s DAY)"
params = [time_range]
if genre_filter:
base_query += " AND novel_id IN (SELECT novel_id FROM novels WHERE genre = %s)"
params.append(genre_filter)
base_query += " GROUP BY DATE(created_date) ORDER BY date"
cursor.execute(base_query, params)
trend_data = cursor.fetchall()
df_trend = spark.createDataFrame(trend_data, ["date", "total_views", "total_favorites", "active_novels"])
trend_points = []
for row in df_trend.collect():
engagement_rate = (row.total_favorites / row.total_views) * 100 if row.total_views > 0 else 0
avg_views_per_novel = row.total_views / row.active_novels if row.active_novels > 0 else 0
trend_points.append({
'date': row.date.strftime('%Y-%m-%d'),
'total_views': row.total_views,
'total_favorites': row.total_favorites,
'active_novels': row.active_novels,
'engagement_rate': round(engagement_rate, 2),
'avg_views_per_novel': round(avg_views_per_novel, 2)
})
cursor.close()
connection.close()
return JsonResponse({
'chart_type': 'popularity_trend',
'data': trend_points,
'time_range': time_range,
'genre_filter': genre_filter
})
六.系统文档展示
结束
💕💕文末获取源码联系 计算机程序员小杨