前言
💖💖作者:计算机程序员小杨 💙💙个人简介:我是一名计算机相关专业的从业者,擅长Java、微信小程序、Python、Golang、安卓Android等多个IT方向。会做一些项目定制化开发、代码讲解、答辩教学、文档编写、也懂一些降重方面的技巧。热爱技术,喜欢钻研新工具和框架,也乐于通过代码解决实际问题,大家有技术代码这一块的问题可以问我! 💛💛想说的话:感谢大家的关注与支持! 💕💕文末获取源码联系 计算机程序员小杨 💜💜 网站实战项目 安卓/小程序实战项目 大数据实战项目 深度学习实战项目 计算机毕业设计选题 💜💜
一.开发工具简介
大数据框架:Hadoop+Spark(本次没用Hive,支持定制) 开发语言:Python+Java(两个版本都支持) 后端框架:Django+Spring Boot(Spring+SpringMVC+Mybatis)(两个版本都支持) 前端:Vue+ElementUI+Echarts+HTML+CSS+JavaScript+jQuery 详细技术点:Hadoop、HDFS、Spark、Spark SQL、Pandas、NumPy 数据库:MySQL
二.系统内容简介
《基于大数据的商店销售分析与可视化系统》是一套专门针对商店销售数据进行深度挖掘和智能展示的综合性平台。该系统充分运用Hadoop+Spark大数据处理框架,结合Python编程语言和Django后端框架,构建了完整的数据处理链路。系统采用Vue+ElementUI+Echarts技术栈打造现代化前端界面,通过MySQL数据库存储核心业务数据,实现了从数据采集、清洗、分析到可视化展示的全流程自动化处理。系统核心功能涵盖用户管理、商店销售数据管理、整体销售业绩分析、商品维度深度分析、区域门店业绩分析、用户消费行为分析等多个维度,特别是通过Spark SQL和Pandas、NumPy等数据科学工具,能够对海量销售数据进行实时计算和统计分析,生成多样化的业务报表和趋势预测图表。系统还提供了专门的可视化大屏功能,通过Echarts图表库将复杂的数据分析结果以直观的图形方式呈现,帮助商店管理者快速掌握经营状况,制定科学的商业决策,真正实现了大数据技术在零售业务场景中的深度应用。
三.系统功能演示
简单场景蕴含复杂技术:基于大数据的商店销售分析与可视化系统技术含金量解密|Django|Python|毕业设计
四.系统界面展示
五.系统源码展示
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, sum as spark_sum, avg, count, desc, asc
import pandas as pd
import numpy as np
from django.http import JsonResponse
from django.db import connection
spark = SparkSession.builder.appName("StoreDataAnalysis").config("spark.some.config.option", "some-value").getOrCreate()
def overall_sales_performance_analysis(request):
query = """
SELECT
DATE(sale_date) as date,
SUM(total_amount) as daily_revenue,
COUNT(DISTINCT order_id) as order_count,
AVG(total_amount) as avg_order_value,
SUM(quantity) as total_quantity
FROM store_sales_data
WHERE sale_date >= %s AND sale_date <= %s
GROUP BY DATE(sale_date)
ORDER BY date
"""
start_date = request.GET.get('start_date')
end_date = request.GET.get('end_date')
with connection.cursor() as cursor:
cursor.execute(query, [start_date, end_date])
raw_data = cursor.fetchall()
df = spark.createDataFrame(raw_data, ['date', 'daily_revenue', 'order_count', 'avg_order_value', 'total_quantity'])
total_revenue = df.agg(spark_sum('daily_revenue')).collect()[0][0]
total_orders = df.agg(spark_sum('order_count')).collect()[0][0]
avg_daily_revenue = df.agg(avg('daily_revenue')).collect()[0][0]
growth_df = df.withColumn('prev_revenue', df['daily_revenue'].lag(1).over(Window.orderBy('date')))
growth_df = growth_df.withColumn('growth_rate',
(col('daily_revenue') - col('prev_revenue')) / col('prev_revenue') * 100)
peak_day = df.orderBy(desc('daily_revenue')).first()
low_day = df.orderBy(asc('daily_revenue')).first()
trend_analysis = []
pandas_df = df.toPandas()
revenue_values = pandas_df['daily_revenue'].values
if len(revenue_values) > 7:
recent_week = np.mean(revenue_values[-7:])
previous_week = np.mean(revenue_values[-14:-7])
weekly_trend = ((recent_week - previous_week) / previous_week) * 100
trend_analysis.append({'period': 'weekly', 'trend': weekly_trend})
return JsonResponse({
'total_revenue': float(total_revenue),
'total_orders': int(total_orders),
'avg_daily_revenue': float(avg_daily_revenue),
'peak_day': {'date': str(peak_day['date']), 'revenue': float(peak_day['daily_revenue'])},
'low_day': {'date': str(low_day['date']), 'revenue': float(low_day['daily_revenue'])},
'trend_analysis': trend_analysis,
'daily_data': pandas_df.to_dict('records')
})
def product_dimension_analysis(request):
product_query = """
SELECT
product_id,
product_name,
category,
SUM(quantity) as total_sold,
SUM(total_amount) as total_revenue,
AVG(unit_price) as avg_price,
COUNT(DISTINCT DATE(sale_date)) as sales_days
FROM store_sales_data
WHERE sale_date >= %s AND sale_date <= %s
GROUP BY product_id, product_name, category
"""
start_date = request.GET.get('start_date')
end_date = request.GET.get('end_date')
category_filter = request.GET.get('category', '')
with connection.cursor() as cursor:
cursor.execute(product_query, [start_date, end_date])
product_data = cursor.fetchall()
product_df = spark.createDataFrame(product_data,
['product_id', 'product_name', 'category',
'total_sold', 'total_revenue', 'avg_price', 'sales_days'])
if category_filter:
product_df = product_df.filter(col('category') == category_filter)
product_df = product_df.withColumn('daily_avg_sales', col('total_sold') / col('sales_days'))
product_df = product_df.withColumn('revenue_per_unit', col('total_revenue') / col('total_sold'))
top_selling_products = product_df.orderBy(desc('total_sold')).limit(10)
top_revenue_products = product_df.orderBy(desc('total_revenue')).limit(10)
category_summary = product_df.groupBy('category').agg(
spark_sum('total_sold').alias('category_quantity'),
spark_sum('total_revenue').alias('category_revenue'),
avg('avg_price').alias('avg_category_price'),
count('product_id').alias('product_count')
).orderBy(desc('category_revenue'))
pandas_product_df = product_df.toPandas()
correlation_matrix = pandas_product_df[['total_sold', 'total_revenue', 'avg_price']].corr()
price_segments = product_df.withColumn('price_segment',
when(col('avg_price') < 50, 'Low')
.when(col('avg_price') < 200, 'Medium')
.otherwise('High'))
segment_performance = price_segments.groupBy('price_segment').agg(
spark_sum('total_sold').alias('segment_quantity'),
spark_sum('total_revenue').alias('segment_revenue'),
avg('daily_avg_sales').alias('avg_daily_sales')
)
return JsonResponse({
'top_selling_products': [row.asDict() for row in top_selling_products.collect()],
'top_revenue_products': [row.asDict() for row in top_revenue_products.collect()],
'category_summary': [row.asDict() for row in category_summary.collect()],
'price_segment_analysis': [row.asDict() for row in segment_performance.collect()],
'correlation_data': correlation_matrix.to_dict()
})
def regional_store_performance_analysis(request):
regional_query = """
SELECT
store_id,
store_name,
region,
city,
SUM(total_amount) as store_revenue,
COUNT(order_id) as store_orders,
AVG(total_amount) as avg_order_value,
SUM(quantity) as total_items_sold,
COUNT(DISTINCT customer_id) as unique_customers
FROM store_sales_data s
JOIN store_info si ON s.store_id = si.id
WHERE s.sale_date >= %s AND s.sale_date <= %s
GROUP BY store_id, store_name, region, city
"""
start_date = request.GET.get('start_date')
end_date = request.GET.get('end_date')
region_filter = request.GET.get('region', '')
with connection.cursor() as cursor:
cursor.execute(regional_query, [start_date, end_date])
regional_data = cursor.fetchall()
regional_df = spark.createDataFrame(regional_data,
['store_id', 'store_name', 'region', 'city',
'store_revenue', 'store_orders', 'avg_order_value',
'total_items_sold', 'unique_customers'])
if region_filter:
regional_df = regional_df.filter(col('region') == region_filter)
regional_df = regional_df.withColumn('revenue_per_customer',
col('store_revenue') / col('unique_customers'))
regional_df = regional_df.withColumn('items_per_order',
col('total_items_sold') / col('store_orders'))
region_summary = regional_df.groupBy('region').agg(
spark_sum('store_revenue').alias('region_revenue'),
spark_sum('store_orders').alias('region_orders'),
avg('avg_order_value').alias('region_avg_order'),
count('store_id').alias('store_count'),
spark_sum('unique_customers').alias('region_customers')
).orderBy(desc('region_revenue'))
top_performing_stores = regional_df.orderBy(desc('store_revenue')).limit(5)
bottom_performing_stores = regional_df.orderBy(asc('store_revenue')).limit(5)
city_performance = regional_df.groupBy('city', 'region').agg(
spark_sum('store_revenue').alias('city_revenue'),
avg('avg_order_value').alias('city_avg_order'),
spark_sum('unique_customers').alias('city_customers')
).orderBy(desc('city_revenue'))
pandas_regional_df = regional_df.toPandas()
regional_stats = {}
for region in pandas_regional_df['region'].unique():
region_data = pandas_regional_df[pandas_regional_df['region'] == region]
regional_stats[region] = {
'revenue_std': float(region_data['store_revenue'].std()),
'revenue_variance': float(region_data['store_revenue'].var()),
'max_revenue': float(region_data['store_revenue'].max()),
'min_revenue': float(region_data['store_revenue'].min())
}
performance_ranking = regional_df.withColumn('performance_score',
(col('store_revenue') * 0.4 + col('unique_customers') * 100 * 0.3 +
col('avg_order_value') * col('store_orders') * 0.3))
final_ranking = performance_ranking.orderBy(desc('performance_score'))
return JsonResponse({
'region_summary': [row.asDict() for row in region_summary.collect()],
'top_stores': [row.asDict() for row in top_performing_stores.collect()],
'bottom_stores': [row.asDict() for row in bottom_performing_stores.collect()],
'city_performance': [row.asDict() for row in city_performance.collect()],
'regional_statistics': regional_stats,
'performance_ranking': [row.asDict() for row in final_ranking.collect()]
})
六.系统文档展示
结束
💕💕文末获取源码联系 计算机程序员小杨