💖💖作者:计算机毕业设计杰瑞 💙💙个人简介:曾长期从事计算机专业培训教学,本人也热爱上课教学,语言擅长Java、微信小程序、Python、Golang、安卓Android等,开发项目包括大数据、深度学习、网站、小程序、安卓、算法。平常会做一些项目定制化开发、代码讲解、答辩教学、文档编写、也懂一些降重方面的技巧。平常喜欢分享一些自己开发中遇到的问题的解决办法,也喜欢交流技术,大家有技术代码这一块的问题可以问我! 💛💛想说的话:感谢大家的关注与支持! 💜💜 网站实战项目 安卓/小程序实战项目 大数据实战项目 深度学校实战项目 计算机毕业设计选题推荐
影评情感分析可视化及推荐系统介绍
《影评情感分析可视化及推荐系统》是一个基于Hadoop+Spark大数据框架构建的智能化影评分析平台,采用Python作为开发语言,Django作为后端框架,Vue+ElementUI+Echarts构建前端界面。该系统充分利用Spark强大的分布式计算能力,对海量影评数据进行深度情感分析,通过HDFS分布式文件系统存储和管理影评数据,运用Spark SQL进行高效的数据查询和处理。系统核心功能涵盖用户管理、电影信息展示、影评数据处理、智能情感分析、可视化数据展示、个性化推荐算法、论坛互动交流等模块。通过Pandas和NumPy进行数据预处理,结合机器学习算法实现精准的情感识别,将分析结果通过Echarts图表进行直观展示,为用户提供基于情感倾向的个性化电影推荐服务。系统采用MySQL数据库存储结构化数据,保证数据的一致性和可靠性,同时支持大规模用户并发访问和实时数据分析处理。
影评情感分析可视化及推荐系统演示视频
影评情感分析可视化及推荐系统演示图片
影评情感分析可视化及推荐系统代码展示
from pyspark.sql import SparkSession
from pyspark.ml.feature import Tokenizer, StopWordsRemover, HashingTF, IDF
from pyspark.ml.classification import LogisticRegression
from pyspark.ml import Pipeline
from django.http import JsonResponse
from django.views.decorators.csrf import csrf_exempt
import json
import pandas as pd
import numpy as np
from textblob import TextBlob
import jieba
import re
spark = SparkSession.builder.appName("MovieReviewSentimentAnalysis").config("spark.sql.adaptive.enabled", "true").config("spark.sql.adaptive.coalescePartitions.enabled", "true").getOrCreate()
@csrf_exempt
def sentiment_analysis_process(request):
if request.method == 'POST':
data = json.loads(request.body)
review_text = data.get('review_text', '')
movie_id = data.get('movie_id', '')
review_df = spark.createDataFrame([(review_text, movie_id)], ["review_text", "movie_id"])
cleaned_text = re.sub(r'[^\u4e00-\u9fa5a-zA-Z0-9\s]', '', review_text)
words = list(jieba.cut(cleaned_text))
filtered_words = [word for word in words if len(word) > 1 and word not in ['电影', '影片', '故事', '演员']]
processed_text = ' '.join(filtered_words)
tokenizer = Tokenizer(inputCol="review_text", outputCol="words")
stop_remover = StopWordsRemover(inputCol="words", outputCol="filtered_words")
hashing_tf = HashingTF(inputCol="filtered_words", outputCol="raw_features", numFeatures=10000)
idf = IDF(inputCol="raw_features", outputCol="features")
lr = LogisticRegression(featuresCol="features", labelCol="sentiment_label", maxIter=100)
pipeline = Pipeline(stages=[tokenizer, stop_remover, hashing_tf, idf, lr])
blob = TextBlob(processed_text)
polarity_score = blob.sentiment.polarity
sentiment_label = 1 if polarity_score > 0.1 else (0 if polarity_score > -0.1 else -1)
confidence_score = abs(polarity_score)
emotion_keywords = {'positive': ['好看', '精彩', '棒', '喜欢', '推荐'], 'negative': ['难看', '无聊', '差', '失望', '烂片']}
emotion_score = sum([processed_text.count(word) for word in emotion_keywords['positive']]) - sum([processed_text.count(word) for word in emotion_keywords['negative']])
final_sentiment = max(-1, min(1, (sentiment_label + np.sign(emotion_score)) / 2))
result_data = {'sentiment_score': final_sentiment, 'confidence': confidence_score, 'emotion_distribution': {'positive': max(0, final_sentiment), 'neutral': 1 - abs(final_sentiment), 'negative': max(0, -final_sentiment)}}
return JsonResponse({'status': 'success', 'data': result_data})
return JsonResponse({'status': 'error', 'message': 'Invalid request method'})
@csrf_exempt
def movie_recommendation_engine(request):
if request.method == 'POST':
data = json.loads(request.body)
user_id = data.get('user_id', '')
user_preferences = data.get('preferences', [])
movie_data_df = spark.sql("SELECT movie_id, genre, avg_rating, review_count FROM movie_info WHERE status = 'active'")
sentiment_data_df = spark.sql("SELECT movie_id, AVG(sentiment_score) as avg_sentiment, COUNT(*) as sentiment_count FROM review_sentiment WHERE user_id != {} GROUP BY movie_id".format(user_id))
combined_df = movie_data_df.join(sentiment_data_df, "movie_id", "left")
user_history_df = spark.sql("SELECT movie_id, rating, sentiment_score FROM user_ratings WHERE user_id = {}".format(user_id))
user_avg_rating = user_history_df.agg({"rating": "avg"}).collect()[0][0] or 0
user_avg_sentiment = user_history_df.agg({"sentiment_score": "avg"}).collect()[0][0] or 0
preference_weights = {'genre': 0.3, 'rating': 0.4, 'sentiment': 0.3}
recommendation_scores = []
for row in combined_df.collect():
movie_id = row['movie_id']
genre_match = 1 if row['genre'] in user_preferences else 0.5
rating_similarity = 1 - abs(row['avg_rating'] - user_avg_rating) / 5
sentiment_similarity = 1 - abs((row['avg_sentiment'] or 0) - user_avg_sentiment) / 2
popularity_boost = min(1, (row['review_count'] or 0) / 100)
final_score = (genre_match * preference_weights['genre'] + rating_similarity * preference_weights['rating'] + sentiment_similarity * preference_weights['sentiment']) * (1 + popularity_boost * 0.1)
recommendation_scores.append((movie_id, final_score))
sorted_recommendations = sorted(recommendation_scores, key=lambda x: x[1], reverse=True)
top_recommendations = sorted_recommendations[:10]
recommendation_list = [{'movie_id': movie_id, 'score': round(score, 3)} for movie_id, score in top_recommendations]
return JsonResponse({'status': 'success', 'recommendations': recommendation_list, 'total_analyzed': len(recommendation_scores)})
return JsonResponse({'status': 'error', 'message': 'Invalid request method'})
@csrf_exempt
def review_data_visualization(request):
if request.method == 'GET':
movie_id = request.GET.get('movie_id', '')
time_range = request.GET.get('time_range', '30')
reviews_df = spark.sql("SELECT review_id, review_text, sentiment_score, created_time, user_rating FROM movie_reviews WHERE movie_id = '{}' AND created_time >= DATE_SUB(NOW(), INTERVAL {} DAY)".format(movie_id, time_range))
sentiment_distribution = reviews_df.groupBy().agg({"sentiment_score": "avg"}).collect()[0][0] or 0
positive_count = reviews_df.filter(reviews_df.sentiment_score > 0.1).count()
neutral_count = reviews_df.filter((reviews_df.sentiment_score >= -0.1) & (reviews_df.sentiment_score <= 0.1)).count()
negative_count = reviews_df.filter(reviews_df.sentiment_score < -0.1).count()
total_reviews = reviews_df.count()
sentiment_stats = {'positive': round(positive_count / max(1, total_reviews) * 100, 2), 'neutral': round(neutral_count / max(1, total_reviews) * 100, 2), 'negative': round(negative_count / max(1, total_reviews) * 100, 2)}
time_series_data = reviews_df.groupBy(spark.sql.functions.date_format("created_time", "yyyy-MM-dd").alias("date")).agg(spark.sql.functions.avg("sentiment_score").alias("avg_sentiment"), spark.sql.functions.count("*").alias("review_count")).orderBy("date")
time_series_list = [{'date': row['date'], 'sentiment': round(row['avg_sentiment'], 3), 'count': row['review_count']} for row in time_series_data.collect()]
rating_sentiment_correlation = reviews_df.stat.corr("user_rating", "sentiment_score")
word_frequency_data = []
all_reviews_text = ' '.join([row['review_text'] for row in reviews_df.select("review_text").collect()])
words = jieba.cut(all_reviews_text)
word_counts = {}
for word in words:
if len(word) > 1 and word not in ['电影', '影片', '这个', '觉得', '感觉']:
word_counts[word] = word_counts.get(word, 0) + 1
top_words = sorted(word_counts.items(), key=lambda x: x[1], reverse=True)[:20]
word_frequency_data = [{'word': word, 'frequency': count} for word, count in top_words]
visualization_data = {'sentiment_distribution': sentiment_stats, 'time_series': time_series_list, 'word_frequency': word_frequency_data, 'correlation_coefficient': round(rating_sentiment_correlation or 0, 3), 'total_reviews': total_reviews}
return JsonResponse({'status': 'success', 'data': visualization_data})
return JsonResponse({'status': 'error', 'message': 'Invalid request method'})
影评情感分析可视化及推荐系统文档展示
💖💖作者:计算机毕业设计杰瑞 💙💙个人简介:曾长期从事计算机专业培训教学,本人也热爱上课教学,语言擅长Java、微信小程序、Python、Golang、安卓Android等,开发项目包括大数据、深度学习、网站、小程序、安卓、算法。平常会做一些项目定制化开发、代码讲解、答辩教学、文档编写、也懂一些降重方面的技巧。平常喜欢分享一些自己开发中遇到的问题的解决办法,也喜欢交流技术,大家有技术代码这一块的问题可以问我! 💛💛想说的话:感谢大家的关注与支持! 💜💜 网站实战项目 安卓/小程序实战项目 大数据实战项目 深度学校实战项目 计算机毕业设计选题推荐