💖💖作者:计算机编程小咖 💙💙个人简介:曾长期从事计算机专业培训教学,本人也热爱上课教学,语言擅长Java、微信小程序、Python、Golang、安卓Android等,开发项目包括大数据、深度学习、网站、小程序、安卓、算法。平常会做一些项目定制化开发、代码讲解、答辩教学、文档编写、也懂一些降重方面的技巧。平常喜欢分享一些自己开发中遇到的问题的解决办法,也喜欢交流技术,大家有技术代码这一块的问题可以问我! 💛💛想说的话:感谢大家的关注与支持! 💜💜 网站实战项目 安卓/小程序实战项目 大数据实战项目 深度学习实战项目
@TOC
基于大数据的旅游上榜景点评论数据可视化分析系统介绍
《基于大数据的旅游上榜景点评论数据可视化分析系统》是一套采用先进大数据技术栈构建的综合性旅游数据分析平台,该系统以Hadoop分布式存储框架为基础,利用HDFS实现海量旅游景点评论数据的可靠存储,并结合Spark大数据处理引擎和Spark SQL进行高效的数据计算与分析处理。系统在技术架构上支持Python和Java双语言开发,后端分别采用Django和Spring Boot框架提供稳定的API服务,前端运用Vue.js结合ElementUI组件库构建用户界面,通过Echarts图表库实现丰富的数据可视化展示效果,并辅以HTML、CSS、JavaScript和jQuery技术确保良好的交互体验。系统核心功能涵盖完整的用户管理模块包括系统首页、个人信息管理、密码修改等基础功能,更重要的是提供了强大的数据分析能力,包括数据大屏可视化展示、总体评分分析帮助了解景点整体口碑情况、游客类型分析识别不同用户群体特征、用户排行分析发现活跃评论者、关联关系分析挖掘景点间的潜在联系、时序趋势分析追踪评论数据的时间变化规律、游客来源分析统计不同地区游客分布情况,以及文本内容分析深度挖掘评论文本中的情感倾向和关键信息。整个系统通过MySQL数据库存储结构化数据,结合Pandas和NumPy等Python数据处理库进行数据预处理和统计分析,形成了一套完整的从数据采集、存储、处理到可视化展示的大数据分析解决方案,为旅游行业的数据驱动决策提供了有力支撑。
基于大数据的旅游上榜景点评论数据可视化分析系统演示视频
基于大数据的旅游上榜景点评论数据可视化分析系统演示图片
基于大数据的旅游上榜景点评论数据可视化分析系统代码展示
spark = SparkSession.builder.appName("TourismCommentAnalysis").config("spark.sql.adaptive.enabled", "true").config("spark.sql.adaptive.coalescePartitions.enabled", "true").getOrCreate()
def analyze_overall_rating(request):
comment_df = spark.read.jdbc(url="jdbc:mysql://localhost:3306/tourism_db", table="scenic_comments", properties={"user": "root", "password": "123456", "driver": "com.mysql.cj.jdbc.Driver"})
rating_stats = comment_df.select("rating", "scenic_spot_id", "comment_date").groupBy("scenic_spot_id").agg(avg("rating").alias("avg_rating"), count("*").alias("comment_count"), max("rating").alias("max_rating"), min("rating").alias("min_rating"))
scenic_info_df = spark.read.jdbc(url="jdbc:mysql://localhost:3306/tourism_db", table="scenic_spots", properties={"user": "root", "password": "password", "driver": "com.mysql.cj.jdbc.Driver"})
result_df = rating_stats.join(scenic_info_df, rating_stats.scenic_spot_id == scenic_info_df.id, "left").select(scenic_info_df.name.alias("scenic_name"), rating_stats.avg_rating, rating_stats.comment_count, rating_stats.max_rating, rating_stats.min_rating)
rating_distribution = comment_df.groupBy("rating").count().orderBy("rating")
pandas_result = result_df.toPandas()
pandas_distribution = rating_distribution.toPandas()
overall_avg = float(comment_df.agg(avg("rating")).collect()[0][0])
total_comments = comment_df.count()
high_rating_spots = pandas_result[pandas_result['avg_rating'] >= 4.5].shape[0]
low_rating_spots = pandas_result[pandas_result['avg_rating'] < 3.0].shape[0]
rating_trend = comment_df.select("rating", date_format("comment_date", "yyyy-MM").alias("month")).groupBy("month").agg(avg("rating").alias("monthly_avg")).orderBy("month")
trend_pandas = rating_trend.toPandas()
response_data = {"overall_average": round(overall_avg, 2), "total_comments": total_comments, "high_rating_count": high_rating_spots, "low_rating_count": low_rating_spots, "scenic_ratings": pandas_result.to_dict('records'), "rating_distribution": pandas_distribution.to_dict('records'), "monthly_trend": trend_pandas.to_dict('records')}
return JsonResponse(response_data)
def analyze_text_content(request):
comment_df = spark.read.jdbc(url="jdbc:mysql://localhost:3306/tourism_db", table="scenic_comments", properties={"user": "root", "password": "123456", "driver": "com.mysql.cj.jdbc.Driver"})
positive_keywords = ["好", "棒", "美", "推荐", "值得", "不错", "喜欢", "满意", "优秀", "完美"]
negative_keywords = ["差", "烂", "坑", "失望", "后悔", "不好", "糟糕", "垃圾", "骗人", "浪费"]
def analyze_sentiment(text):
if text is None:
return "中性"
positive_count = sum(1 for keyword in positive_keywords if keyword in text)
negative_count = sum(1 for keyword in negative_keywords if keyword in text)
if positive_count > negative_count:
return "积极"
elif negative_count > positive_count:
return "消极"
else:
return "中性"
sentiment_udf = udf(analyze_sentiment, StringType())
comment_with_sentiment = comment_df.withColumn("sentiment", sentiment_udf(comment_df.content))
sentiment_stats = comment_with_sentiment.groupBy("sentiment").count()
keyword_analysis = comment_df.select(explode(split(lower(col("content")), "\\s+")).alias("word")).filter(length(col("word")) >= 2).groupBy("word").count().orderBy(desc("count")).limit(50)
length_analysis = comment_df.withColumn("content_length", length(col("content"))).select("content_length", "rating").groupBy("rating").agg(avg("content_length").alias("avg_length"), count("*").alias("count"))
scenic_sentiment = comment_with_sentiment.join(spark.read.jdbc(url="jdbc:mysql://localhost:3306/tourism_db", table="scenic_spots", properties={"user": "root", "password": "password", "driver": "com.mysql.cj.jdbc.Driver"}), comment_with_sentiment.scenic_spot_id == col("id"), "left").groupBy("name", "sentiment").count().orderBy("name", "sentiment")
monthly_sentiment = comment_with_sentiment.select("sentiment", date_format("comment_date", "yyyy-MM").alias("month")).groupBy("month", "sentiment").count().orderBy("month", "sentiment")
pandas_sentiment = sentiment_stats.toPandas()
pandas_keywords = keyword_analysis.toPandas()
pandas_length = length_analysis.toPandas()
pandas_scenic_sentiment = scenic_sentiment.toPandas()
pandas_monthly_sentiment = monthly_sentiment.toPandas()
response_data = {"sentiment_distribution": pandas_sentiment.to_dict('records'), "top_keywords": pandas_keywords.to_dict('records'), "length_analysis": pandas_length.to_dict('records'), "scenic_sentiment": pandas_scenic_sentiment.to_dict('records'), "monthly_sentiment": pandas_monthly_sentiment.to_dict('records')}
return JsonResponse(response_data)
def analyze_time_trend(request):
comment_df = spark.read.jdbc(url="jdbc:mysql://localhost:3306/tourism_db", table="scenic_comments", properties={"user": "root", "password": "123456", "driver": "com.mysql.cj.jdbc.Driver"})
daily_trend = comment_df.select(date_format("comment_date", "yyyy-MM-dd").alias("date"), "rating").groupBy("date").agg(count("*").alias("comment_count"), avg("rating").alias("avg_rating")).orderBy("date")
monthly_trend = comment_df.select(date_format("comment_date", "yyyy-MM").alias("month"), "rating").groupBy("month").agg(count("*").alias("comment_count"), avg("rating").alias("avg_rating")).orderBy("month")
weekly_trend = comment_df.select(date_format("comment_date", "u").alias("day_of_week"), "rating").groupBy("day_of_week").agg(count("*").alias("comment_count"), avg("rating").alias("avg_rating")).orderBy("day_of_week")
seasonal_trend = comment_df.select(quarter("comment_date").alias("quarter"), "rating").groupBy("quarter").agg(count("*").alias("comment_count"), avg("rating").alias("avg_rating")).orderBy("quarter")
hourly_trend = comment_df.select(hour("comment_date").alias("hour"), "rating").groupBy("hour").agg(count("*").alias("comment_count"), avg("rating").alias("avg_rating")).orderBy("hour")
scenic_monthly_trend = comment_df.join(spark.read.jdbc(url="jdbc:mysql://localhost:3306/tourism_db", table="scenic_spots", properties={"user": "root", "password": "password", "driver": "com.mysql.cj.jdbc.Driver"}), comment_df.scenic_spot_id == col("id"), "left").select("name", date_format("comment_date", "yyyy-MM").alias("month"), "rating").groupBy("name", "month").agg(count("*").alias("comment_count"), avg("rating").alias("avg_rating")).orderBy("name", "month")
growth_rate = daily_trend.withColumn("prev_count", lag("comment_count", 1).over(Window.orderBy("date"))).withColumn("growth_rate", when(col("prev_count").isNotNull(), (col("comment_count") - col("prev_count")) / col("prev_count") * 100).otherwise(0))
peak_analysis = daily_trend.orderBy(desc("comment_count")).limit(10)
rating_trend_analysis = daily_trend.withColumn("prev_rating", lag("avg_rating", 1).over(Window.orderBy("date"))).withColumn("rating_change", col("avg_rating") - col("prev_rating"))
pandas_daily = daily_trend.toPandas()
pandas_monthly = monthly_trend.toPandas()
pandas_weekly = weekly_trend.toPandas()
pandas_seasonal = seasonal_trend.toPandas()
pandas_hourly = hourly_trend.toPandas()
pandas_scenic_monthly = scenic_monthly_trend.toPandas()
pandas_growth = growth_rate.toPandas()
pandas_peak = peak_analysis.toPandas()
pandas_rating_trend = rating_trend_analysis.toPandas()
response_data = {"daily_trend": pandas_daily.to_dict('records'), "monthly_trend": pandas_monthly.to_dict('records'), "weekly_pattern": pandas_weekly.to_dict('records'), "seasonal_pattern": pandas_seasonal.to_dict('records'), "hourly_pattern": pandas_hourly.to_dict('records'), "scenic_monthly_trend": pandas_scenic_monthly.to_dict('records'), "growth_analysis": pandas_growth.to_dict('records'), "peak_days": pandas_peak.to_dict('records'), "rating_trend_analysis": pandas_rating_trend.to_dict('records')}
return JsonResponse(response_data)
基于大数据的旅游上榜景点评论数据可视化分析系统文档展示
💖💖作者:计算机编程小咖 💙💙个人简介:曾长期从事计算机专业培训教学,本人也热爱上课教学,语言擅长Java、微信小程序、Python、Golang、安卓Android等,开发项目包括大数据、深度学习、网站、小程序、安卓、算法。平常会做一些项目定制化开发、代码讲解、答辩教学、文档编写、也懂一些降重方面的技巧。平常喜欢分享一些自己开发中遇到的问题的解决办法,也喜欢交流技术,大家有技术代码这一块的问题可以问我! 💛💛想说的话:感谢大家的关注与支持! 💜💜 网站实战项目 安卓/小程序实战项目 大数据实战项目 深度学习实战项目