💖💖作者:计算机毕业设计小途 💙💙个人简介:曾长期从事计算机专业培训教学,本人也热爱上课教学,语言擅长Java、微信小程序、Python、Golang、安卓Android等,开发项目包括大数据、深度学习、网站、小程序、安卓、算法。平常会做一些项目定制化开发、代码讲解、答辩教学、文档编写、也懂一些降重方面的技巧。平常喜欢分享一些自己开发中遇到的问题的解决办法,也喜欢交流技术,大家有技术代码这一块的问题可以问我! 💛💛想说的话:感谢大家的关注与支持! 💜💜 网站实战项目 安卓/小程序实战项目 大数据实战项目 深度学习实战项目
@TOC
基于大数据的北京高档酒店数据可视化分析系统介绍
本系统是一套基于大数据技术的北京高档酒店数据可视化分析系统,采用Hadoop+Spark大数据框架作为核心技术架构,利用HDFS分布式文件系统存储海量酒店数据,通过Spark SQL进行高效的数据清洗、转换和分析处理,结合Pandas和NumPy进行深度数据挖掘,后端提供Python+Django和Java+SpringBoot双版本实现方案,前端采用Vue+ElementUI搭建管理界面,集成Echarts图表组件实现丰富的数据可视化展示效果,系统功能涵盖用户管理、高档酒店数据管理、数据大屏看板、用户画像特征分析、酒店设施水平分析、酒店空间分布分析、酒店价格影响分析以及酒店口碑评价分析等核心模块,通过对北京地区高档酒店的地理位置、价格区间、设施配置、用户评价等多维度数据进行采集和分析,运用大数据技术挖掘酒店行业的潜在规律和发展趋势,为用户提供直观的数据可视化大屏展示和多角度的分析报告,系统底层采用MySQL数据库存储结构化数据,整体架构既满足大数据毕业设计对技术深度的要求,又具备完整的业务逻辑和实用价值,适合作为计算机专业本科毕业设计项目,技术栈涵盖大数据处理、数据分析、数据可视化等多个核心知识点,能够充分展示学生对大数据技术体系的掌握程度和实际应用能力。
基于大数据的北京高档酒店数据可视化分析系统演示视频
基于大数据的北京高档酒店数据可视化分析系统演示图片
基于大数据的北京高档酒店数据可视化分析系统代码展示
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, count, avg, sum, when, round, dense_rank, row_number, desc, asc
from pyspark.sql.window import Window
import pandas as pd
import numpy as np
from django.http import JsonResponse
from django.views.decorators.http import require_http_methods
import json
spark = SparkSession.builder.appName("BeijingHotelAnalysis").config("spark.sql.warehouse.dir", "/user/hive/warehouse").config("spark.executor.memory", "2g").config("spark.driver.memory", "1g").getOrCreate()
@require_http_methods(["GET"])
def analyze_hotel_spatial_distribution(request):
district = request.GET.get('district', None)
hotel_df = spark.read.format("jdbc").option("url", "jdbc:mysql://localhost:3306/hotel_db").option("driver", "com.mysql.jdbc.Driver").option("dbtable", "hotel_data").option("user", "root").option("password", "123456").load()
if district:
filtered_df = hotel_df.filter(col("district") == district)
else:
filtered_df = hotel_df
district_count = filtered_df.groupBy("district").agg(count("hotel_id").alias("hotel_count"), avg("price").alias("avg_price"), avg("rating").alias("avg_rating")).orderBy(desc("hotel_count"))
street_distribution = filtered_df.groupBy("district", "street").agg(count("hotel_id").alias("street_hotel_count")).orderBy("district", desc("street_hotel_count"))
price_range_df = filtered_df.withColumn("price_level", when(col("price") < 500, "经济型").when((col("price") >= 500) & (col("price") < 1000), "舒适型").when((col("price") >= 1000) & (col("price") < 2000), "高档型").otherwise("豪华型"))
price_distribution = price_range_df.groupBy("district", "price_level").agg(count("hotel_id").alias("level_count")).orderBy("district", "price_level")
district_pandas = district_count.toPandas()
street_pandas = street_distribution.limit(50).toPandas()
price_pandas = price_distribution.toPandas()
district_result = district_pandas.to_dict(orient='records')
street_result = street_pandas.to_dict(orient='records')
price_result = price_pandas.to_dict(orient='records')
total_hotels = filtered_df.count()
total_districts = filtered_df.select("district").distinct().count()
response_data = {'total_hotels': total_hotels, 'total_districts': total_districts, 'district_distribution': district_result, 'street_distribution': street_result, 'price_distribution': price_result, 'status': 'success'}
return JsonResponse(response_data, safe=False)
@require_http_methods(["GET"])
def analyze_price_influence_factors(request):
hotel_df = spark.read.format("jdbc").option("url", "jdbc:mysql://localhost:3306/hotel_db").option("driver", "com.mysql.jdbc.Driver").option("dbtable", "hotel_data").option("user", "root").option("password", "123456").load()
facility_price_corr = hotel_df.groupBy("facility_level").agg(avg("price").alias("avg_price"), count("hotel_id").alias("hotel_count"), avg("rating").alias("avg_rating")).orderBy("facility_level")
rating_price_corr = hotel_df.withColumn("rating_range", when(col("rating") < 3.5, "3.5分以下").when((col("rating") >= 3.5) & (col("rating") < 4.0), "3.5-4.0分").when((col("rating") >= 4.0) & (col("rating") < 4.5), "4.0-4.5分").otherwise("4.5分以上")).groupBy("rating_range").agg(avg("price").alias("avg_price"), count("hotel_id").alias("count")).orderBy("rating_range")
district_price_analysis = hotel_df.groupBy("district").agg(avg("price").alias("avg_price"), count("hotel_id").alias("hotel_count"), round(avg("room_area"), 2).alias("avg_room_area")).orderBy(desc("avg_price"))
star_level_price = hotel_df.groupBy("star_level").agg(avg("price").alias("avg_price"), count("hotel_id").alias("count"), avg("rating").alias("avg_rating")).orderBy("star_level")
facility_counts = hotel_df.withColumn("has_gym", when(col("has_gym") == 1, 1).otherwise(0)).withColumn("has_pool", when(col("has_pool") == 1, 1).otherwise(0)).withColumn("has_parking", when(col("has_parking") == 1, 1).otherwise(0))
facility_impact = facility_counts.groupBy("has_gym", "has_pool", "has_parking").agg(avg("price").alias("avg_price"), count("hotel_id").alias("count")).orderBy(desc("avg_price")).limit(10)
pandas_facility = facility_price_corr.toPandas()
pandas_rating = rating_price_corr.toPandas()
pandas_district = district_price_analysis.toPandas()
pandas_star = star_level_price.toPandas()
pandas_impact = facility_impact.toPandas()
facility_numpy = np.array(pandas_facility[['facility_level', 'avg_price']])
rating_numpy = np.array(pandas_rating[['rating_range', 'avg_price']])
correlation_coefficient = np.corrcoef(pandas_facility['facility_level'].astype(float), pandas_facility['avg_price'])[0, 1] if len(pandas_facility) > 1 else 0
response_data = {'facility_price_correlation': pandas_facility.to_dict(orient='records'), 'rating_price_correlation': pandas_rating.to_dict(orient='records'), 'district_price_analysis': pandas_district.to_dict(orient='records'), 'star_level_price': pandas_star.to_dict(orient='records'), 'facility_combination_impact': pandas_impact.to_dict(orient='records'), 'correlation_coefficient': round(float(correlation_coefficient), 4), 'status': 'success'}
return JsonResponse(response_data, safe=False)
@require_http_methods(["GET"])
def analyze_user_portrait_features(request):
user_behavior_df = spark.read.format("jdbc").option("url", "jdbc:mysql://localhost:3306/hotel_db").option("driver", "com.mysql.jdbc.Driver").option("dbtable", "user_behavior").option("user", "root").option("password", "123456").load()
hotel_df = spark.read.format("jdbc").option("url", "jdbc:mysql://localhost:3306/hotel_db").option("driver", "com.mysql.jdbc.Driver").option("dbtable", "hotel_data").option("user", "root").option("password", "123456").load()
joined_df = user_behavior_df.join(hotel_df, user_behavior_df.hotel_id == hotel_df.hotel_id, "left")
age_group_analysis = joined_df.withColumn("age_group", when(col("age") < 25, "25岁以下").when((col("age") >= 25) & (col("age") < 35), "25-35岁").when((col("age") >= 35) & (col("age") < 45), "35-45岁").otherwise("45岁以上")).groupBy("age_group").agg(count("user_id").alias("user_count"), avg("price").alias("avg_booking_price"), avg("stay_days").alias("avg_stay_days")).orderBy("age_group")
gender_preference = joined_df.groupBy("gender").agg(count("user_id").alias("user_count"), avg("price").alias("avg_price"), avg("rating").alias("avg_rating_preference")).orderBy("gender")
booking_time_pattern = joined_df.withColumn("booking_hour", when(col("booking_time") < 6, "凌晨0-6点").when((col("booking_time") >= 6) & (col("booking_time") < 12), "上午6-12点").when((col("booking_time") >= 12) & (col("booking_time") < 18), "下午12-18点").otherwise("晚上18-24点")).groupBy("booking_hour").agg(count("user_id").alias("booking_count")).orderBy("booking_hour")
user_spending_level = joined_df.groupBy("user_id").agg(sum("price").alias("total_spending"), count("hotel_id").alias("booking_times"), avg("price").alias("avg_spending")).withColumn("spending_level", when(col("total_spending") < 2000, "低消费").when((col("total_spending") >= 2000) & (col("total_spending") < 5000), "中等消费").otherwise("高消费"))
spending_distribution = user_spending_level.groupBy("spending_level").agg(count("user_id").alias("user_count")).orderBy("spending_level")
district_preference = joined_df.groupBy("district").agg(count("user_id").alias("visit_count")).orderBy(desc("visit_count")).limit(10)
windowSpec = Window.orderBy(desc("total_spending"))
top_users = user_spending_level.withColumn("rank", row_number().over(windowSpec)).filter(col("rank") <= 20).select("user_id", "total_spending", "booking_times", "avg_spending", "spending_level")
pandas_age = age_group_analysis.toPandas()
pandas_gender = gender_preference.toPandas()
pandas_time = booking_time_pattern.toPandas()
pandas_spending = spending_distribution.toPandas()
pandas_district = district_preference.toPandas()
pandas_top_users = top_users.toPandas()
response_data = {'age_group_features': pandas_age.to_dict(orient='records'), 'gender_preference': pandas_gender.to_dict(orient='records'), 'booking_time_pattern': pandas_time.to_dict(orient='records'), 'spending_distribution': pandas_spending.to_dict(orient='records'), 'district_preference': pandas_district.to_dict(orient='records'), 'top_users': pandas_top_users.to_dict(orient='records'), 'total_users': user_behavior_df.select("user_id").distinct().count(), 'status': 'success'}
return JsonResponse(response_data, safe=False)
基于大数据的北京高档酒店数据可视化分析系统文档展示
💖💖作者:计算机毕业设计小途 💙💙个人简介:曾长期从事计算机专业培训教学,本人也热爱上课教学,语言擅长Java、微信小程序、Python、Golang、安卓Android等,开发项目包括大数据、深度学习、网站、小程序、安卓、算法。平常会做一些项目定制化开发、代码讲解、答辩教学、文档编写、也懂一些降重方面的技巧。平常喜欢分享一些自己开发中遇到的问题的解决办法,也喜欢交流技术,大家有技术代码这一块的问题可以问我! 💛💛想说的话:感谢大家的关注与支持! 💜💜 网站实战项目 安卓/小程序实战项目 大数据实战项目 深度学习实战项目