一、个人简介
💖💖作者:计算机编程果茶熊 💙💙个人简介:曾长期从事计算机专业培训教学,担任过编程老师,同时本人也热爱上课教学,擅长Java、微信小程序、Python、Golang、安卓Android等多个IT方向。会做一些项目定制化开发、代码讲解、答辩教学、文档编写、也懂一些降重方面的技巧。平常喜欢分享一些自己开发中遇到的问题的解决办法,也喜欢交流技术,大家有技术代码这一块的问题可以问我! 💛💛想说的话:感谢大家的关注与支持! 💜💜 网站实战项目 安卓/小程序实战项目 大数据实战项目 计算机毕业设计选题 💕💕文末获取源码联系计算机编程果茶熊
二、系统介绍
大数据框架:Hadoop+Spark(Hive需要定制修改) 开发语言:Java+Python(两个版本都支持) 数据库:MySQL 后端框架:SpringBoot(Spring+SpringMVC+Mybatis)+Django(两个版本都支持) 前端:Vue+Echarts+HTML+CSS+JavaScript+jQuery
《餐饮外卖平台数据分析系统》是一个基于大数据技术构建的综合性数据分析平台,采用Hadoop+Spark分布式计算框架作为核心数据处理引擎,结合Python语言的强大数据处理能力,构建了完整的餐饮外卖业务数据分析体系。系统以Django作为后端服务框架,提供稳定的API接口服务,前端采用Vue.js配合ElementUI组件库构建用户界面,通过Echarts图表库实现数据的可视化展现。在数据存储方面,系统利用HDFS分布式文件系统存储海量原始数据,MySQL数据库管理结构化业务数据,通过Spark SQL进行高效的数据查询和分析。系统核心功能涵盖用户行为分析、菜品销售统计、商家经营状况监控、市场竞争态势分析等多个维度,能够为餐饮外卖平台的运营决策提供数据支撑。通过Pandas和NumPy等数据科学库的深度集成,系统实现了从数据采集、清洗、分析到可视化展示的完整业务闭环,为餐饮外卖行业的数字化运营提供了实用的技术解决方案。
三、视频解说
四、部分功能展示
五、部分代码展示
from pyspark.sql import SparkSession
from pyspark.sql.functions import *
from pyspark.sql.types import *
import pandas as pd
import numpy as np
from django.http import JsonResponse
from django.views.decorators.csrf import csrf_exempt
import json
from datetime import datetime, timedelta
spark = SparkSession.builder.appName("RestaurantDataAnalysis").config("spark.sql.adaptive.enabled", "true").config("spark.sql.adaptive.coalescePartitions.enabled", "true").getOrCreate()
@csrf_exempt
def analyze_user_consumption_behavior(request):
user_orders_df = spark.sql("SELECT user_id, order_time, total_amount, restaurant_id, food_items FROM orders WHERE order_status = 'completed'")
user_behavior_stats = user_orders_df.groupBy("user_id").agg(
count("*").alias("order_frequency"),
avg("total_amount").alias("avg_order_value"),
sum("total_amount").alias("total_consumption"),
countDistinct("restaurant_id").alias("restaurant_diversity"),
first("order_time").alias("first_order_time"),
max("order_time").alias("last_order_time")
)
user_behavior_stats = user_behavior_stats.withColumn("customer_lifetime_days",
datediff(col("last_order_time"), col("first_order_time")))
user_behavior_stats = user_behavior_stats.withColumn("order_frequency_per_month",
col("order_frequency") * 30 / (col("customer_lifetime_days") + 1))
time_pattern_df = user_orders_df.withColumn("order_hour", hour("order_time")).withColumn("order_weekday", dayofweek("order_time"))
peak_hours = time_pattern_df.groupBy("order_hour").count().orderBy(desc("count")).limit(3)
peak_weekdays = time_pattern_df.groupBy("order_weekday").count().orderBy(desc("count")).limit(2)
user_segments = user_behavior_stats.withColumn("user_segment",
when((col("order_frequency") >= 20) & (col("avg_order_value") >= 80), "高价值客户")
.when((col("order_frequency") >= 10) & (col("avg_order_value") >= 50), "中等价值客户")
.when(col("order_frequency") >= 5, "潜力客户")
.otherwise("新客户"))
segment_distribution = user_segments.groupBy("user_segment").agg(
count("*").alias("user_count"),
avg("total_consumption").alias("avg_total_consumption"),
avg("restaurant_diversity").alias("avg_restaurant_diversity")
)
repeat_purchase_rate = user_orders_df.groupBy("user_id").agg(
count("*").alias("total_orders"),
countDistinct("restaurant_id").alias("unique_restaurants")
).withColumn("loyalty_score", col("total_orders") / col("unique_restaurants"))
result_data = {
'user_behavior_summary': user_behavior_stats.toPandas().to_dict('records'),
'peak_hours': peak_hours.toPandas().to_dict('records'),
'peak_weekdays': peak_weekdays.toPandas().to_dict('records'),
'user_segments': segment_distribution.toPandas().to_dict('records'),
'loyalty_analysis': repeat_purchase_rate.filter(col("total_orders") >= 3).toPandas().to_dict('records')
}
return JsonResponse({'status': 'success', 'data': result_data})
@csrf_exempt
def analyze_food_sales_performance(request):
food_sales_df = spark.sql("SELECT f.food_id, f.food_name, f.category, f.price, oi.quantity, oi.order_id, o.order_time, o.restaurant_id FROM food_items f JOIN order_items oi ON f.food_id = oi.food_id JOIN orders o ON oi.order_id = o.order_id WHERE o.order_status = 'completed'")
daily_sales_stats = food_sales_df.withColumn("order_date", to_date("order_time")).groupBy("food_id", "food_name", "order_date").agg(
sum("quantity").alias("daily_quantity_sold"),
sum(col("quantity") * col("price")).alias("daily_revenue"),
count("order_id").alias("daily_order_count")
)
food_performance_metrics = food_sales_df.groupBy("food_id", "food_name", "category", "price").agg(
sum("quantity").alias("total_quantity_sold"),
sum(col("quantity") * col("price")).alias("total_revenue"),
count("order_id").alias("total_orders"),
countDistinct("restaurant_id").alias("restaurant_count"),
avg("quantity").alias("avg_quantity_per_order")
)
food_performance_metrics = food_performance_metrics.withColumn("revenue_per_order", col("total_revenue") / col("total_orders")).withColumn("popularity_score", col("total_orders") * 0.4 + col("total_quantity_sold") * 0.6)
category_analysis = food_sales_df.groupBy("category").agg(
sum("quantity").alias("category_total_quantity"),
sum(col("quantity") * col("price")).alias("category_total_revenue"),
countDistinct("food_id").alias("unique_food_items"),
avg("price").alias("avg_category_price")
).withColumn("category_market_share", col("category_total_revenue") / sum("category_total_revenue").over())
trending_foods = daily_sales_stats.withColumn("sales_trend",
lag("daily_quantity_sold", 7).over(Window.partitionBy("food_id").orderBy("order_date")))
trending_foods = trending_foods.withColumn("growth_rate",
(col("daily_quantity_sold") - col("sales_trend")) / col("sales_trend") * 100)
top_trending = trending_foods.filter(col("growth_rate") > 20).orderBy(desc("growth_rate")).limit(10)
seasonal_patterns = food_sales_df.withColumn("month", month("order_time")).withColumn("season",
when(col("month").isin(3, 4, 5), "春季")
.when(col("month").isin(6, 7, 8), "夏季")
.when(col("month").isin(9, 10, 11), "秋季")
.otherwise("冬季"))
seasonal_sales = seasonal_patterns.groupBy("season", "category").agg(
sum("quantity").alias("seasonal_quantity"),
avg(col("quantity") * col("price")).alias("avg_seasonal_revenue")
)
result_data = {
'food_performance_metrics': food_performance_metrics.orderBy(desc("popularity_score")).toPandas().to_dict('records'),
'category_analysis': category_analysis.orderBy(desc("category_market_share")).toPandas().to_dict('records'),
'trending_foods': top_trending.toPandas().to_dict('records'),
'seasonal_patterns': seasonal_sales.toPandas().to_dict('records'),
'daily_sales_trends': daily_sales_stats.filter(col("order_date") >= date_sub(current_date(), 30)).toPandas().to_dict('records')
}
return JsonResponse({'status': 'success', 'data': result_data})
@csrf_exempt
def analyze_restaurant_operation_performance(request):
restaurant_orders_df = spark.sql("SELECT r.restaurant_id, r.restaurant_name, r.cuisine_type, r.location, o.order_id, o.order_time, o.total_amount, o.delivery_time, o.order_status FROM restaurants r JOIN orders o ON r.restaurant_id = o.restaurant_id")
restaurant_performance = restaurant_orders_df.filter(col("order_status") == "completed").groupBy("restaurant_id", "restaurant_name", "cuisine_type", "location").agg(
count("order_id").alias("total_orders"),
sum("total_amount").alias("total_revenue"),
avg("total_amount").alias("avg_order_value"),
avg("delivery_time").alias("avg_delivery_time"),
countDistinct(to_date("order_time")).alias("active_days")
)
restaurant_performance = restaurant_performance.withColumn("daily_avg_orders", col("total_orders") / col("active_days")).withColumn("revenue_per_day", col("total_revenue") / col("active_days")).withColumn("efficiency_score", (100 - col("avg_delivery_time")) * col("daily_avg_orders") / 100)
peak_hours_analysis = restaurant_orders_df.withColumn("order_hour", hour("order_time")).groupBy("restaurant_id", "restaurant_name", "order_hour").agg(
count("order_id").alias("hourly_orders"),
avg("total_amount").alias("hourly_avg_revenue")
)
restaurant_peak_hours = peak_hours_analysis.groupBy("restaurant_id", "restaurant_name").agg(
max("hourly_orders").alias("peak_hour_orders"),
first(col("order_hour")).alias("busiest_hour")
)
customer_satisfaction = restaurant_orders_df.withColumn("delivery_rating",
when(col("delivery_time") <= 30, 5)
.when(col("delivery_time") <= 45, 4)
.when(col("delivery_time") <= 60, 3)
.when(col("delivery_time") <= 75, 2)
.otherwise(1))
satisfaction_metrics = customer_satisfaction.groupBy("restaurant_id", "restaurant_name").agg(
avg("delivery_rating").alias("avg_delivery_rating"),
count(when(col("delivery_rating") >= 4, 1)).alias("good_delivery_count"),
count("order_id").alias("total_delivery_count")
).withColumn("satisfaction_rate", col("good_delivery_count") / col("total_delivery_count") * 100)
cuisine_competition = restaurant_performance.groupBy("cuisine_type").agg(
count("restaurant_id").alias("restaurant_count"),
avg("total_revenue").alias("avg_cuisine_revenue"),
max("total_revenue").alias("top_revenue_in_cuisine"),
avg("avg_order_value").alias("cuisine_avg_order_value")
)
location_analysis = restaurant_performance.groupBy("location").agg(
count("restaurant_id").alias("restaurants_in_location"),
sum("total_revenue").alias("location_total_revenue"),
avg("daily_avg_orders").alias("location_avg_daily_orders")
)
restaurant_ranking = restaurant_performance.withColumn("performance_score",
col("revenue_per_day") * 0.4 + col("daily_avg_orders") * 0.3 + col("efficiency_score") * 0.3)
result_data = {
'restaurant_performance': restaurant_performance.orderBy(desc("total_revenue")).toPandas().to_dict('records'),
'peak_hours_analysis': restaurant_peak_hours.toPandas().to_dict('records'),
'satisfaction_metrics': satisfaction_metrics.orderBy(desc("satisfaction_rate")).toPandas().to_dict('records'),
'cuisine_competition': cuisine_competition.orderBy(desc("avg_cuisine_revenue")).toPandas().to_dict('records'),
'location_analysis': location_analysis.orderBy(desc("location_total_revenue")).toPandas().to_dict('records'),
'restaurant_ranking': restaurant_ranking.orderBy(desc("performance_score")).limit(20).toPandas().to_dict('records')
}
return JsonResponse({'status': 'success', 'data': result_data})
六、部分文档展示
七、END
💕💕文末获取源码联系计算机编程果茶熊