💖💖作者:计算机毕业设计小途 💙💙个人简介:曾长期从事计算机专业培训教学,本人也热爱上课教学,语言擅长Java、微信小程序、Python、Golang、安卓Android等,开发项目包括大数据、深度学习、网站、小程序、安卓、算法。平常会做一些项目定制化开发、代码讲解、答辩教学、文档编写、也懂一些降重方面的技巧。平常喜欢分享一些自己开发中遇到的问题的解决办法,也喜欢交流技术,大家有技术代码这一块的问题可以问我! 💛💛想说的话:感谢大家的关注与支持! 💜💜 网站实战项目 安卓/小程序实战项目 大数据实战项目 深度学习实战项目
@TOC
基于大数据的北京旅游景点可视化分析系统介绍
本系统是一套基于大数据技术的北京旅游景点可视化分析系统,采用Hadoop+Spark大数据框架作为核心技术架构,实现对北京地区旅游景点数据的分布式存储、高效处理与深度分析。系统提供Python+Django和Java+Spring Boot两种技术实现方案,后端利用Spark SQL和Pandas、NumPy进行数据清洗、统计分析与挖掘,通过HDFS分布式文件系统存储海量旅游数据,保证数据的高可用性和处理效率;前端采用Vue+ElementUI构建响应式用户界面,结合Echarts图表库打造北京旅游景点数据大屏可视化模块,以柱状图、折线图、饼图、地图等多种图表形式直观展示景点热度分布、游客流量趋势、景点评分排名等核心数据指标;系统功能涵盖用户管理、北京旅游景点信息管理、个人信息维护、系统公告发布、轮播图配置等基础模块,同时提供独立的数据大屏可视化展示页面,支持管理员对景点数据进行增删改查操作,用户可以浏览景点详情、查看系统公告、了解系统简介等信息;底层数据存储采用MySQL关系型数据库,与Hadoop生态圈协同工作,既保证传统业务数据的事务性处理,又发挥大数据框架在海量数据分析场景下的性能优势,整个系统架构清晰、技术栈完整,特别适合作为计算机专业大数据方向的毕业设计项目,能够充分展示Hadoop分布式存储、Spark数据处理、前后端分离开发以及数据可视化等核心技术能力。
基于大数据的北京旅游景点可视化分析系统演示视频
基于大数据的北京旅游景点可视化分析系统演示图片
基于大数据的北京旅游景点可视化分析系统代码展示
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, count, avg, sum, desc, year, month, date_format, rank, dense_rank
from pyspark.sql.window import Window
import pandas as pd
import numpy as np
from datetime import datetime
spark = SparkSession.builder.appName("BeijingTourismAnalysis").master("local[*]").config("spark.sql.warehouse.dir", "/user/hive/warehouse").config("spark.driver.memory", "2g").config("spark.executor.memory", "2g").getOrCreate()
sc = spark.sparkContext
sc.setLogLevel("ERROR")
def analyze_scenic_spot_popularity():
scenic_df = spark.read.format("jdbc").option("url", "jdbc:mysql://localhost:3306/tourism_db").option("driver", "com.mysql.jdbc.Driver").option("dbtable", "scenic_spot").option("user", "root").option("password", "123456").load()
visitor_df = spark.read.format("jdbc").option("url", "jdbc:mysql://localhost:3306/tourism_db").option("driver", "com.mysql.jdbc.Driver").option("dbtable", "visitor_record").option("user", "root").option("password", "123456").load()
comment_df = spark.read.format("jdbc").option("url", "jdbc:mysql://localhost:3306/tourism_db").option("driver", "com.mysql.jdbc.Driver").option("dbtable", "scenic_comment").option("user", "root").option("password", "123456").load()
joined_df = scenic_df.join(visitor_df, scenic_df.spot_id == visitor_df.spot_id, "left").join(comment_df, scenic_df.spot_id == comment_df.spot_id, "left")
popularity_stats = joined_df.groupBy("spot_name", "spot_area").agg(count(visitor_df.visitor_id).alias("visitor_count"),avg(comment_df.rating).alias("avg_rating"),count(comment_df.comment_id).alias("comment_count"))
popularity_stats = popularity_stats.withColumn("popularity_score", (col("visitor_count") * 0.5 + col("avg_rating") * 10 + col("comment_count") * 0.3))
window_spec = Window.orderBy(desc("popularity_score"))
ranked_df = popularity_stats.withColumn("rank", dense_rank().over(window_spec))
top_spots = ranked_df.filter(col("rank") <= 20).orderBy(desc("popularity_score"))
area_stats = popularity_stats.groupBy("spot_area").agg(count("spot_name").alias("spot_count"),avg("visitor_count").alias("avg_visitors"),avg("avg_rating").alias("area_avg_rating"),sum("visitor_count").alias("total_visitors")).orderBy(desc("total_visitors"))
top_spots_pandas = top_spots.toPandas()
area_stats_pandas = area_stats.toPandas()
result_dict = {"top_spots": top_spots_pandas.to_dict(orient="records"),"area_distribution": area_stats_pandas.to_dict(orient="records"),"total_spots": scenic_df.count(),"total_visitors": visitor_df.count()}
return result_dict
def analyze_visitor_flow_trend():
visitor_df = spark.read.format("jdbc").option("url", "jdbc:mysql://localhost:3306/tourism_db").option("driver", "com.mysql.jdbc.Driver").option("dbtable", "visitor_record").option("user", "root").option("password", "123456").load()
scenic_df = spark.read.format("jdbc").option("url", "jdbc:mysql://localhost:3306/tourism_db").option("driver", "com.mysql.jdbc.Driver").option("dbtable", "scenic_spot").option("user", "root").option("password", "123456").load()
visitor_df = visitor_df.withColumn("visit_year", year(col("visit_date"))).withColumn("visit_month", month(col("visit_date"))).withColumn("visit_date_str", date_format(col("visit_date"), "yyyy-MM-dd"))
monthly_trend = visitor_df.groupBy("visit_year", "visit_month").agg(count("visitor_id").alias("monthly_visitors"),count("visitor_id").alias("visit_count")).orderBy("visit_year", "visit_month")
daily_trend = visitor_df.groupBy("visit_date_str").agg(count("visitor_id").alias("daily_visitors")).orderBy("visit_date_str")
recent_30days = daily_trend.limit(30)
joined_flow = visitor_df.join(scenic_df, visitor_df.spot_id == scenic_df.spot_id, "left")
spot_monthly_flow = joined_flow.groupBy("spot_name", "visit_year", "visit_month").agg(count("visitor_id").alias("monthly_spot_visitors")).orderBy(desc("monthly_spot_visitors"))
window_spec_spot = Window.partitionBy("visit_year", "visit_month").orderBy(desc("monthly_spot_visitors"))
spot_monthly_ranked = spot_monthly_flow.withColumn("monthly_rank", rank().over(window_spec_spot))
top_monthly_spots = spot_monthly_ranked.filter(col("monthly_rank") <= 10)
peak_analysis = visitor_df.groupBy("visit_year", "visit_month").agg(count("visitor_id").alias("flow_count")).orderBy(desc("flow_count"))
peak_months = peak_analysis.limit(5)
monthly_pandas = monthly_trend.toPandas()
daily_pandas = recent_30days.toPandas()
spot_monthly_pandas = top_monthly_spots.toPandas()
peak_pandas = peak_months.toPandas()
monthly_avg = monthly_pandas['monthly_visitors'].mean() if len(monthly_pandas) > 0 else 0
daily_avg = daily_pandas['daily_visitors'].mean() if len(daily_pandas) > 0 else 0
trend_result = {"monthly_trend": monthly_pandas.to_dict(orient="records"),"daily_trend_recent": daily_pandas.to_dict(orient="records"),"top_spots_by_month": spot_monthly_pandas.to_dict(orient="records"),"peak_months": peak_pandas.to_dict(orient="records"),"monthly_average": float(monthly_avg),"daily_average": float(daily_avg)}
return trend_result
def generate_visualization_dashboard_data():
scenic_df = spark.read.format("jdbc").option("url", "jdbc:mysql://localhost:3306/tourism_db").option("driver", "com.mysql.jdbc.Driver").option("dbtable", "scenic_spot").option("user", "root").option("password", "123456").load()
visitor_df = spark.read.format("jdbc").option("url", "jdbc:mysql://localhost:3306/tourism_db").option("driver", "com.mysql.jdbc.Driver").option("dbtable", "visitor_record").option("user", "root").option("password", "123456").load()
comment_df = spark.read.format("jdbc").option("url", "jdbc:mysql://localhost:3306/tourism_db").option("driver", "com.mysql.jdbc.Driver").option("dbtable", "scenic_comment").option("user", "root").option("password", "123456").load()
spot_type_dist = scenic_df.groupBy("spot_type").agg(count("spot_id").alias("type_count")).orderBy(desc("type_count"))
spot_area_dist = scenic_df.groupBy("spot_area").agg(count("spot_id").alias("area_count")).orderBy(desc("area_count"))
joined_visitor = visitor_df.join(scenic_df, visitor_df.spot_id == scenic_df.spot_id, "left")
visitor_spot_stats = joined_visitor.groupBy("spot_name").agg(count("visitor_id").alias("total_visitors")).orderBy(desc("total_visitors")).limit(15)
rating_distribution = comment_df.groupBy("rating").agg(count("comment_id").alias("rating_count")).orderBy("rating")
avg_rating_by_spot = comment_df.join(scenic_df, comment_df.spot_id == scenic_df.spot_id, "left").groupBy("spot_name").agg(avg("rating").alias("average_rating"),count("comment_id").alias("comment_count")).filter(col("comment_count") >= 5).orderBy(desc("average_rating")).limit(15)
visitor_monthly = visitor_df.withColumn("month", month(col("visit_date"))).groupBy("month").agg(count("visitor_id").alias("visitors_per_month")).orderBy("month")
spot_visitor_joined = scenic_df.join(visitor_df, scenic_df.spot_id == visitor_df.spot_id, "left").join(comment_df, scenic_df.spot_id == comment_df.spot_id, "left")
comprehensive_stats = spot_visitor_joined.groupBy(scenic_df.spot_name, scenic_df.spot_area, scenic_df.spot_type).agg(count(visitor_df.visitor_id).alias("visitor_total"),avg(comment_df.rating).alias("rating_avg"),count(comment_df.comment_id).alias("comment_total")).orderBy(desc("visitor_total")).limit(20)
type_pandas = spot_type_dist.toPandas()
area_pandas = spot_area_dist.toPandas()
visitor_spot_pandas = visitor_spot_stats.toPandas()
rating_pandas = rating_distribution.toPandas()
avg_rating_pandas = avg_rating_by_spot.toPandas()
monthly_pandas = visitor_monthly.toPandas()
comprehensive_pandas = comprehensive_stats.toPandas()
dashboard_data = {"spot_type_chart": type_pandas.to_dict(orient="records"),"spot_area_chart": area_pandas.to_dict(orient="records"),"visitor_ranking_chart": visitor_spot_pandas.to_dict(orient="records"),"rating_distribution_chart": rating_pandas.to_dict(orient="records"),"top_rated_spots_chart": avg_rating_pandas.to_dict(orient="records"),"monthly_visitor_trend_chart": monthly_pandas.to_dict(orient="records"),"comprehensive_table": comprehensive_pandas.to_dict(orient="records"),"total_scenic_spots": scenic_df.count(),"total_visitor_records": visitor_df.count(),"total_comments": comment_df.count()}
return dashboard_data
基于大数据的北京旅游景点可视化分析系统文档展示
💖💖作者:计算机毕业设计小途 💙💙个人简介:曾长期从事计算机专业培训教学,本人也热爱上课教学,语言擅长Java、微信小程序、Python、Golang、安卓Android等,开发项目包括大数据、深度学习、网站、小程序、安卓、算法。平常会做一些项目定制化开发、代码讲解、答辩教学、文档编写、也懂一些降重方面的技巧。平常喜欢分享一些自己开发中遇到的问题的解决办法,也喜欢交流技术,大家有技术代码这一块的问题可以问我! 💛💛想说的话:感谢大家的关注与支持! 💜💜 网站实战项目 安卓/小程序实战项目 大数据实战项目 深度学习实战项目