一、个人简介
💖💖作者:计算机编程果茶熊 💙💙个人简介:曾长期从事计算机专业培训教学,担任过编程老师,同时本人也热爱上课教学,擅长Java、微信小程序、Python、Golang、安卓Android等多个IT方向。会做一些项目定制化开发、代码讲解、答辩教学、文档编写、也懂一些降重方面的技巧。平常喜欢分享一些自己开发中遇到的问题的解决办法,也喜欢交流技术,大家有技术代码这一块的问题可以问我! 💛💛想说的话:感谢大家的关注与支持! 💜💜 网站实战项目 安卓/小程序实战项目 大数据实战项目 计算机毕业设计选题 💕💕文末获取源码联系计算机编程果茶熊
二、系统介绍
大数据框架:Hadoop+Spark(Hive需要定制修改) 开发语言:Java+Python(两个版本都支持) 数据库:MySQL 后端框架:SpringBoot(Spring+SpringMVC+Mybatis)+Django(两个版本都支持) 前端:Vue+Echarts+HTML+CSS+JavaScript+jQuery
《旅游景点推荐与商业价值分析系统》基于Hadoop+Spark大数据框架构建,采用Python作为主要开发语言,后端使用Django框架,前端结合Vue、ElementUI和Echarts技术实现可视化展示。系统通过HDFS存储海量旅游景点数据,利用Spark SQL和Pandas进行数据清洗与分析,结合NumPy完成统计计算。功能涵盖用户管理、景点数据管理、可视化大屏展示等多个模块,重点实现了商业模式分析、地理分布分析、市场热度分析、价格商业价值分析、质量竞争力分析以及用户满意度分析等核心功能。系统通过多维度数据挖掘,为旅游景点的商业决策提供数据支撑,帮助景区管理者了解市场定位、优化定价策略、提升服务质量,同时为游客提供个性化的景点推荐服务,实现了大数据技术在旅游行业的实际应用。
三、视频解说
四、部分功能展示
五、部分代码展示
from pyspark.sql import SparkSession
from pyspark.sql.functions import col,avg,count,sum,desc,row_number,when,lit,round
from pyspark.sql.window import Window
from django.http import JsonResponse
from django.views import View
from decimal import Decimal
import json
import pandas as pd
import numpy as np
spark=SparkSession.builder.appName("TourismAnalysis").config("spark.sql.warehouse.dir","/user/hive/warehouse").config("spark.driver.memory","2g").config("spark.executor.memory","2g").getOrCreate()
class MarketHeatAnalysisView(View):
def post(self,request):
params=json.loads(request.body)
start_date=params.get('start_date')
end_date=params.get('end_date')
region=params.get('region',None)
df=spark.read.format("jdbc").option("url","jdbc:mysql://localhost:3306/tourism_db").option("driver","com.mysql.cj.jdbc.Driver").option("dbtable","scenic_spots").option("user","root").option("password","123456").load()
review_df=spark.read.format("jdbc").option("url","jdbc:mysql://localhost:3306/tourism_db").option("driver","com.mysql.cj.jdbc.Driver").option("dbtable","user_reviews").option("user","root").option("password","123456").load()
visit_df=spark.read.format("jdbc").option("url","jdbc:mysql://localhost:3306/tourism_db").option("driver","com.mysql.cj.jdbc.Driver").option("dbtable","visit_records").option("user","root").option("password","123456").load()
if region:
df=df.filter(col("region")==region)
visit_filtered=visit_df.filter((col("visit_date")>=start_date)&(col("visit_date")<=end_date))
review_filtered=review_df.filter((col("review_date")>=start_date)&(col("review_date")<=end_date))
visit_stats=visit_filtered.groupBy("scenic_id").agg(count("*").alias("visit_count"),count(col("user_id").isNotNull()).alias("unique_visitors"))
review_stats=review_filtered.groupBy("scenic_id").agg(count("*").alias("review_count"),avg("rating").alias("avg_rating"))
merged_df=df.join(visit_stats,df.id==visit_stats.scenic_id,"left").join(review_stats,df.id==review_stats.scenic_id,"left")
merged_df=merged_df.fillna({"visit_count":0,"unique_visitors":0,"review_count":0,"avg_rating":0})
merged_df=merged_df.withColumn("heat_score",(col("visit_count")*0.4+col("review_count")*0.3+col("avg_rating")*10*0.3))
window_spec=Window.orderBy(desc("heat_score"))
merged_df=merged_df.withColumn("heat_rank",row_number().over(window_spec))
result_df=merged_df.select("id","name","region","visit_count","review_count","avg_rating","heat_score","heat_rank").orderBy(desc("heat_score")).limit(50)
pandas_df=result_df.toPandas()
result_list=pandas_df.to_dict('records')
for item in result_list:
item['avg_rating']=float(item['avg_rating']) if item['avg_rating'] else 0
item['heat_score']=float(item['heat_score']) if item['heat_score'] else 0
return JsonResponse({"code":200,"data":result_list,"message":"市场热度分析完成"})
class PriceValueAnalysisView(View):
def post(self,request):
params=json.loads(request.body)
category=params.get('category','all')
price_range=params.get('price_range',[0,1000])
df=spark.read.format("jdbc").option("url","jdbc:mysql://localhost:3306/tourism_db").option("driver","com.mysql.cj.jdbc.Driver").option("dbtable","scenic_spots").option("user","root").option("password","123456").load()
review_df=spark.read.format("jdbc").option("url","jdbc:mysql://localhost:3306/tourism_db").option("driver","com.mysql.cj.jdbc.Driver").option("dbtable","user_reviews").option("user","root").option("password","123456").load()
if category!='all':
df=df.filter(col("category")==category)
df=df.filter((col("ticket_price")>=price_range[0])&(col("ticket_price")<=price_range[1]))
review_stats=review_df.groupBy("scenic_id").agg(avg("rating").alias("avg_rating"),count("*").alias("review_count"),avg("cost_performance_score").alias("avg_cost_performance"))
merged_df=df.join(review_stats,df.id==review_stats.scenic_id,"left")
merged_df=merged_df.fillna({"avg_rating":0,"review_count":0,"avg_cost_performance":0})
merged_df=merged_df.withColumn("price_level",when(col("ticket_price")<=50,"低价").when((col("ticket_price")>50)&(col("ticket_price")<=150),"中价").when((col("ticket_price")>150)&(col("ticket_price")<=300),"中高价").otherwise("高价"))
price_level_stats=merged_df.groupBy("price_level").agg(avg("ticket_price").alias("avg_price"),avg("avg_rating").alias("level_avg_rating"),count("*").alias("scenic_count"),avg("avg_cost_performance").alias("level_cost_performance"))
merged_df=merged_df.withColumn("value_score",(col("avg_rating")*0.4+col("avg_cost_performance")*0.6)*10)
merged_df=merged_df.withColumn("price_efficiency_ratio",when(col("ticket_price")>0,col("value_score")/col("ticket_price")*10).otherwise(0))
category_avg=merged_df.groupBy("category").agg(avg("ticket_price").alias("category_avg_price"),avg("value_score").alias("category_avg_value"))
final_df=merged_df.join(category_avg,merged_df.category==category_avg.category,"left")
final_df=final_df.withColumn("price_competitiveness",round((col("category_avg_price")-col("ticket_price"))/col("category_avg_price")*100,2))
final_df=final_df.withColumn("value_competitiveness",round((col("value_score")-col("category_avg_value"))/col("category_avg_value")*100,2))
result_df=final_df.select("id","name",col("merged_df.category").alias("category"),"ticket_price","price_level","avg_rating","avg_cost_performance","value_score","price_efficiency_ratio","price_competitiveness","value_competitiveness").orderBy(desc("price_efficiency_ratio"))
pandas_df=result_df.toPandas()
price_level_pandas=price_level_stats.toPandas()
result_data={"scenic_analysis":pandas_df.to_dict('records'),"price_level_summary":price_level_pandas.to_dict('records')}
for item in result_data['scenic_analysis']:
for key in ['avg_rating','avg_cost_performance','value_score','price_efficiency_ratio','price_competitiveness','value_competitiveness']:
item[key]=float(item[key]) if item[key] else 0
return JsonResponse({"code":200,"data":result_data,"message":"价格商业价值分析完成"})
class QualityCompetitiveAnalysisView(View):
def post(self,request):
params=json.loads(request.body)
competitor_ids=params.get('competitor_ids',[])
target_id=params.get('target_id')
df=spark.read.format("jdbc").option("url","jdbc:mysql://localhost:3306/tourism_db").option("driver","com.mysql.cj.jdbc.Driver").option("dbtable","scenic_spots").option("user","root").option("password","123456").load()
review_df=spark.read.format("jdbc").option("url","jdbc:mysql://localhost:3306/tourism_db").option("driver","com.mysql.cj.jdbc.Driver").option("dbtable","user_reviews").option("user","root").option("password","123456").load()
facility_df=spark.read.format("jdbc").option("url","jdbc:mysql://localhost:3306/tourism_db").option("driver","com.mysql.cj.jdbc.Driver").option("dbtable","facility_scores").option("user","root").option("password","123456").load()
all_ids=[target_id]+competitor_ids
df_filtered=df.filter(col("id").isin(all_ids))
review_stats=review_df.groupBy("scenic_id").agg(avg("rating").alias("avg_rating"),avg("service_score").alias("avg_service"),avg("environment_score").alias("avg_environment"),avg("experience_score").alias("avg_experience"),count("*").alias("total_reviews"))
facility_stats=facility_df.groupBy("scenic_id").agg(avg("parking_score").alias("avg_parking"),avg("restroom_score").alias("avg_restroom"),avg("dining_score").alias("avg_dining"),avg("shopping_score").alias("avg_shopping"))
merged_df=df_filtered.join(review_stats,df_filtered.id==review_stats.scenic_id,"left").join(facility_stats,df_filtered.id==facility_stats.scenic_id,"left")
merged_df=merged_df.fillna({"avg_rating":0,"avg_service":0,"avg_environment":0,"avg_experience":0,"total_reviews":0,"avg_parking":0,"avg_restroom":0,"avg_dining":0,"avg_shopping":0})
merged_df=merged_df.withColumn("service_quality_score",(col("avg_service")*0.35+col("avg_environment")*0.25+col("avg_experience")*0.25+col("avg_rating")*0.15)*10)
merged_df=merged_df.withColumn("facility_score",(col("avg_parking")*0.25+col("avg_restroom")*0.3+col("avg_dining")*0.25+col("avg_shopping")*0.2)*10)
merged_df=merged_df.withColumn("comprehensive_quality_score",(col("service_quality_score")*0.6+col("facility_score")*0.4))
target_row=merged_df.filter(col("id")==target_id).first()
if not target_row:
return JsonResponse({"code":404,"message":"目标景点不存在"})
target_quality=float(target_row['comprehensive_quality_score'])
competitor_df=merged_df.filter(col("id")!=target_id)
comparison_result=competitor_df.select("id","name","service_quality_score","facility_score","comprehensive_quality_score","total_reviews").collect()
comparison_list=[]
for row in comparison_result:
competitor_quality=float(row['comprehensive_quality_score'])
quality_gap=target_quality-competitor_quality
comparison_list.append({"competitor_id":row['id'],"competitor_name":row['name'],"competitor_service_quality":float(row['service_quality_score']),"competitor_facility_score":float(row['facility_score']),"competitor_comprehensive_quality":competitor_quality,"competitor_reviews":row['total_reviews'],"quality_gap":round(quality_gap,2),"competitive_advantage":"优势" if quality_gap>0 else "劣势"})
result_data={"target_scenic":{"id":target_id,"name":target_row['name'],"service_quality_score":float(target_row['service_quality_score']),"facility_score":float(target_row['facility_score']),"comprehensive_quality_score":target_quality,"total_reviews":target_row['total_reviews']},"competitor_comparison":comparison_list}
return JsonResponse({"code":200,"data":result_data,"message":"质量竞争力分析完成"})
六、部分文档展示
七、END
💕💕文末获取源码联系计算机编程果茶熊