一、个人简介
💖💖作者:计算机编程果茶熊 💙💙个人简介:曾长期从事计算机专业培训教学,担任过编程老师,同时本人也热爱上课教学,擅长Java、微信小程序、Python、Golang、安卓Android等多个IT方向。会做一些项目定制化开发、代码讲解、答辩教学、文档编写、也懂一些降重方面的技巧。平常喜欢分享一些自己开发中遇到的问题的解决办法,也喜欢交流技术,大家有技术代码这一块的问题可以问我! 💛💛想说的话:感谢大家的关注与支持! 💜💜 网站实战项目 安卓/小程序实战项目 大数据实战项目 计算机毕业设计选题 💕💕文末获取源码联系计算机编程果茶熊
二、系统介绍
大数据框架:Hadoop+Spark(Hive需要定制修改) 开发语言:Java+Python(两个版本都支持) 数据库:MySQL 后端框架:SpringBoot(Spring+SpringMVC+Mybatis)+Django(两个版本都支持) 前端:Vue+Echarts+HTML+CSS+JavaScript+jQuery
三、视频解说
四、部分功能展示
五、部分代码展示
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, count, avg, desc, asc, when, percentile_approx
from pyspark.sql.types import StructType, StructField, StringType, DoubleType, IntegerType
import pandas as pd
import numpy as np
from django.http import JsonResponse
from django.views import View
spark = SparkSession.builder.appName("HotpotAnalysis").config("spark.sql.adaptive.enabled", "true").getOrCreate()
def city_density_analysis(request):
hotpot_df = spark.read.format("jdbc").option("url", "jdbc:mysql://localhost:3306/hotpot_db").option("dbtable", "hotpot_stores").option("user", "root").option("password", "password").load()
city_stats = hotpot_df.groupBy("city").agg(count("store_id").alias("store_count"), avg("rating").alias("avg_rating"), avg("price").alias("avg_price"))
city_density = city_stats.withColumn("density_level", when(col("store_count") > 100, "高密度").when(col("store_count") > 50, "中密度").otherwise("低密度"))
density_distribution = city_density.groupBy("density_level").agg(count("city").alias("city_count"), avg("avg_rating").alias("level_avg_rating"))
city_ranking = city_stats.orderBy(desc("store_count")).limit(20)
regional_analysis = hotpot_df.withColumn("region", when(col("city").isin(["北京", "上海", "广州", "深圳"]), "一线城市").when(col("city").isin(["成都", "重庆", "杭州", "南京"]), "新一线城市").otherwise("其他城市"))
region_stats = regional_analysis.groupBy("region").agg(count("store_id").alias("total_stores"), avg("rating").alias("region_avg_rating"), avg("price").alias("region_avg_price"))
high_potential_cities = city_stats.filter((col("store_count") < 30) & (col("avg_rating") > 4.0)).orderBy(desc("avg_rating"))
market_saturation = city_stats.withColumn("saturation_score", col("store_count") / (col("avg_rating") * 10))
competitive_cities = market_saturation.filter(col("saturation_score") > 2.0).orderBy(desc("saturation_score"))
growth_opportunity = city_stats.withColumn("opportunity_index", (col("avg_rating") * 0.6) + ((100 - col("store_count")) * 0.004))
top_opportunities = growth_opportunity.orderBy(desc("opportunity_index")).limit(15)
result_data = {"density_distribution": density_distribution.toPandas().to_dict('records'), "city_ranking": city_ranking.toPandas().to_dict('records'), "region_stats": region_stats.toPandas().to_dict('records'), "high_potential": high_potential_cities.toPandas().to_dict('records'), "growth_opportunities": top_opportunities.toPandas().to_dict('records')}
return JsonResponse(result_data)
def price_distribution_analysis(request):
hotpot_df = spark.read.format("jdbc").option("url", "jdbc:mysql://localhost:3306/hotpot_db").option("dbtable", "hotpot_stores").option("user", "root").option("password", "password").load()
price_ranges = hotpot_df.withColumn("price_range", when(col("price") < 50, "经济型(50元以下)").when(col("price") < 100, "中档型(50-100元)").when(col("price") < 200, "高档型(100-200元)").otherwise("奢华型(200元以上)"))
range_distribution = price_ranges.groupBy("price_range").agg(count("store_id").alias("store_count"), avg("rating").alias("avg_rating"), avg("popularity").alias("avg_popularity"))
price_percentiles = hotpot_df.select(percentile_approx("price", 0.25).alias("q1"), percentile_approx("price", 0.5).alias("median"), percentile_approx("price", 0.75).alias("q3"))
city_price_analysis = hotpot_df.groupBy("city").agg(avg("price").alias("city_avg_price"), count("store_id").alias("store_count")).filter(col("store_count") > 20)
price_rating_segments = price_ranges.groupBy("price_range").agg(count(when(col("rating") >= 4.5, 1)).alias("excellent_count"), count(when((col("rating") >= 4.0) & (col("rating") < 4.5), 1)).alias("good_count"), count(when(col("rating") < 4.0, 1)).alias("average_count"))
competitive_pricing = hotpot_df.withColumn("value_score", col("rating") / (col("price") / 100))
best_value_stores = competitive_pricing.orderBy(desc("value_score")).limit(20)
price_trend_analysis = hotpot_df.groupBy("city", "price_range").agg(count("store_id").alias("segment_count"), avg("rating").alias("segment_rating"))
market_positioning = price_trend_analysis.withColumn("market_share", col("segment_count") / sum("segment_count").over())
premium_analysis = hotpot_df.filter(col("price") > 150).groupBy("city").agg(count("store_id").alias("premium_count"), avg("rating").alias("premium_rating"))
budget_analysis = hotpot_df.filter(col("price") < 60).groupBy("city").agg(count("store_id").alias("budget_count"), avg("rating").alias("budget_rating"))
price_elasticity = hotpot_df.select(col("price"), col("popularity"), (col("popularity") / col("price")).alias("elasticity_ratio"))
optimal_pricing = price_elasticity.filter((col("elasticity_ratio") > 1.5) & (col("price") < 120))
result_data = {"range_distribution": range_distribution.toPandas().to_dict('records'), "price_percentiles": price_percentiles.toPandas().to_dict('records')[0], "city_price_analysis": city_price_analysis.toPandas().to_dict('records'), "price_rating_segments": price_rating_segments.toPandas().to_dict('records'), "best_value_stores": best_value_stores.toPandas().to_dict('records'), "premium_analysis": premium_analysis.toPandas().to_dict('records'), "optimal_pricing": optimal_pricing.toPandas().to_dict('records')}
return JsonResponse(result_data)
def comprehensive_rating_analysis(request):
hotpot_df = spark.read.format("jdbc").option("url", "jdbc:mysql://localhost:3306/hotpot_db").option("dbtable", "hotpot_stores").option("user", "root").option("password", "password").load()
rating_distribution = hotpot_df.withColumn("rating_level", when(col("rating") >= 4.5, "优秀(4.5+)").when(col("rating") >= 4.0, "良好(4.0-4.5)").when(col("rating") >= 3.5, "一般(3.5-4.0)").otherwise("较差(3.5以下)"))
level_stats = rating_distribution.groupBy("rating_level").agg(count("store_id").alias("count"), avg("price").alias("avg_price"), avg("popularity").alias("avg_popularity"))
city_rating_analysis = hotpot_df.groupBy("city").agg(avg("rating").alias("city_avg_rating"), count("store_id").alias("total_stores"), count(when(col("rating") >= 4.5, 1)).alias("excellent_stores"))
city_excellence_rate = city_rating_analysis.withColumn("excellence_rate", (col("excellent_stores") / col("total_stores") * 100))
rating_price_correlation = hotpot_df.select(col("rating"), col("price"), (col("rating") * col("price")).alias("rating_price_product"))
correlation_by_city = hotpot_df.groupBy("city").agg(avg("rating").alias("avg_rating"), avg("price").alias("avg_price"), count("store_id").alias("sample_size")).filter(col("sample_size") > 15)
quality_consistency = hotpot_df.groupBy("city").agg(avg("rating").alias("avg_rating"), stddev("rating").alias("rating_stddev"))
stable_markets = quality_consistency.filter(col("rating_stddev") < 0.3).orderBy(desc("avg_rating"))
rating_improvement_potential = hotpot_df.filter((col("rating") < 4.0) & (col("popularity") > 1000))
underperforming_analysis = rating_improvement_potential.groupBy("city").agg(count("store_id").alias("underperforming_count"), avg("rating").alias("avg_underperform_rating"))
excellence_benchmarks = hotpot_df.filter(col("rating") >= 4.7).groupBy("city").agg(count("store_id").alias("benchmark_count"), avg("price").alias("benchmark_price"))
rating_volatility = hotpot_df.withColumn("rating_category", when(col("rating") >= 4.5, "high").when(col("rating") >= 3.5, "medium").otherwise("low"))
volatility_analysis = rating_volatility.groupBy("rating_category", "city").agg(count("store_id").alias("category_count"))
market_quality_index = city_rating_analysis.withColumn("quality_index", (col("city_avg_rating") * 0.7) + (col("excellence_rate") * 0.003))
top_quality_markets = market_quality_index.orderBy(desc("quality_index")).limit(15)
result_data = {"level_stats": level_stats.toPandas().to_dict('records'), "city_excellence_rate": city_excellence_rate.toPandas().to_dict('records'), "correlation_analysis": correlation_by_city.toPandas().to_dict('records'), "stable_markets": stable_markets.toPandas().to_dict('records'), "improvement_potential": underperforming_analysis.toPandas().to_dict('records'), "excellence_benchmarks": excellence_benchmarks.toPandas().to_dict('records'), "top_quality_markets": top_quality_markets.toPandas().to_dict('records')}
return JsonResponse(result_data)
六、部分文档展示
七、END
💕💕文末获取源码联系计算机编程果茶熊