前言
- 💖💖作者:计算机程序员小杨
- 💙💙个人简介:我是一名计算机相关专业的从业者,擅长Java、微信小程序、Python、Golang、安卓Android等多个IT方向。会做一些项目定制化开发、代码讲解、答辩教学、文档编写、也懂一些降重方面的技巧。热爱技术,喜欢钻研新工具和框架,也乐于通过代码解决实际问题,大家有技术代码这一块的问题可以问我!
- 💛💛想说的话:感谢大家的关注与支持!
- 💕💕文末获取源码联系 计算机程序员小杨
- 💜💜
- 网站实战项目
- 安卓/小程序实战项目
- 大数据实战项目
- 深度学习实战项目
- 计算机毕业设计选题
- 💜💜
一.开发工具简介
- 大数据框架:Hadoop+Spark(本次没用Hive,支持定制)
- 开发语言:Python+Java(两个版本都支持)
- 后端框架:Django+Spring Boot(Spring+SpringMVC+Mybatis)(两个版本都支持)
- 前端:Vue+ElementUI+Echarts+HTML+CSS+JavaScript+jQuery
- 详细技术点:Hadoop、HDFS、Spark、Spark SQL、Pandas、NumPy
- 数据库:MySQL
二.系统内容简介
基于大数据的客户购物订单数据分析与可视化系统是一个集数据采集、处理、分析与展示于一体的综合性平台。该系统采用Hadoop+Spark大数据框架作为核心技术架构,支持海量订单数据的高效处理与实时分析。系统前端采用Vue+ElementUI+Echarts技术栈构建用户交互界面,后端基于SpringBoot框架提供稳定的服务支撑,数据存储层使用MySQL数据库确保数据的可靠性。系统具备完整的用户管理体系,支持购物订单数据的全生命周期管理,通过Spark SQL和Pandas进行深度数据挖掘,实现产品关联规则分析、客户价值分层分析、销售市场分布分析等多维度智能分析功能。平台运用Echarts可视化组件将复杂的数据分析结果以直观的图表形式展现,为企业决策提供数据支撑。整个系统架构设计合理,功能模块清晰,既体现了大数据技术的先进性,又兼顾了实际业务场景的应用需求。
三.系统功能演示
毕设技术含量低被质疑?基于大数据的客户购物订单分析系统一站式解决
四.系统界面展示
五.系统源码展示
from pyspark.sql import SparkSession
from pyspark.sql.functions import *
from pyspark.ml.feature import VectorAssembler
from pyspark.ml.clustering import KMeans
from pyspark.ml.fpm import FPGrowth
import pandas as pd
import numpy as np
from django.http import JsonResponse
from django.views.decorators.csrf import csrf_exempt
import json
spark = SparkSession.builder.appName("OrderDataAnalysis").config("spark.sql.adaptive.enabled", "true").getOrCreate()
def customer_value_analysis(request):
order_df = spark.read.format("jdbc").option("url", "jdbc:mysql://localhost:3306/order_system").option("dbtable", "shopping_order").option("user", "root").option("password", "123456").load()
customer_metrics = order_df.groupBy("customer_id").agg(
count("order_id").alias("frequency"),
sum("total_amount").alias("monetary"),
datediff(current_date(), max("order_date")).alias("recency")
)
assembler = VectorAssembler(inputCols=["frequency", "monetary", "recency"], outputCol="features")
feature_df = assembler.transform(customer_metrics)
kmeans = KMeans(k=4, seed=42, featuresCol="features", predictionCol="customer_segment")
model = kmeans.fit(feature_df)
clustered_df = model.transform(feature_df)
segment_stats = clustered_df.groupBy("customer_segment").agg(
avg("frequency").alias("avg_frequency"),
avg("monetary").alias("avg_monetary"),
avg("recency").alias("avg_recency"),
count("customer_id").alias("customer_count")
)
pandas_df = segment_stats.toPandas()
pandas_df['segment_label'] = pandas_df.apply(lambda row:
'高价值客户' if row['avg_monetary'] > pandas_df['avg_monetary'].mean() and row['avg_frequency'] > pandas_df['avg_frequency'].mean()
else '潜力客户' if row['avg_monetary'] > pandas_df['avg_monetary'].mean()
else '流失风险客户' if row['avg_recency'] > pandas_df['avg_recency'].mean()
else '普通客户', axis=1)
result_data = []
for index, row in pandas_df.iterrows():
result_data.append({
'segment_id': int(row['customer_segment']),
'segment_name': row['segment_label'],
'customer_count': int(row['customer_count']),
'avg_frequency': round(float(row['avg_frequency']), 2),
'avg_monetary': round(float(row['avg_monetary']), 2),
'avg_recency': round(float(row['avg_recency']), 2)
})
return JsonResponse({'code': 200, 'message': '客户价值分层分析完成', 'data': result_data})
def product_association_analysis(request):
order_detail_df = spark.read.format("jdbc").option("url", "jdbc:mysql://localhost:3306/order_system").option("dbtable", "order_detail").option("user", "root").option("password", "123456").load()
product_df = spark.read.format("jdbc").option("url", "jdbc:mysql://localhost:3306/order_system").option("dbtable", "product").option("user", "root").option("password", "123456").load()
order_products = order_detail_df.join(product_df, "product_id").select("order_id", "product_name")
transactions = order_products.groupBy("order_id").agg(collect_list("product_name").alias("items"))
transactions_filtered = transactions.filter(size(col("items")) >= 2)
fpgrowth = FPGrowth(itemsCol="items", minSupport=0.01, minConfidence=0.1)
model = fpgrowth.fit(transactions_filtered)
association_rules = model.associationRules
frequent_itemsets = model.freqItemsets
rules_pandas = association_rules.toPandas()
itemsets_pandas = frequent_itemsets.toPandas()
association_results = []
for index, row in rules_pandas.iterrows():
antecedent_items = ', '.join(row['antecedent'])
consequent_items = ', '.join(row['consequent'])
association_results.append({
'antecedent': antecedent_items,
'consequent': consequent_items,
'confidence': round(float(row['confidence']), 4),
'lift': round(float(row['lift']), 4),
'support': round(float(row['support']), 4)
})
frequent_results = []
for index, row in itemsets_pandas.iterrows():
items_str = ', '.join(row['items'])
frequent_results.append({
'itemset': items_str,
'frequency': int(row['freq']),
'item_count': len(row['items'])
})
return JsonResponse({
'code': 200,
'message': '产品关联规则分析完成',
'data': {
'association_rules': association_results[:20],
'frequent_itemsets': frequent_results[:30]
}
})
def sales_performance_analysis(request):
order_df = spark.read.format("jdbc").option("url", "jdbc:mysql://localhost:3306/order_system").option("dbtable", "shopping_order").option("user", "root").option("password", "123456").load()
order_detail_df = spark.read.format("jdbc").option("url", "jdbc:mysql://localhost:3306/order_system").option("dbtable", "order_detail").option("user", "root").option("password", "123456").load()
product_df = spark.read.format("jdbc").option("url", "jdbc:mysql://localhost:3306/order_system").option("dbtable", "product").option("user", "root").option("password", "123456").load()
combined_df = order_df.join(order_detail_df, "order_id").join(product_df, "product_id")
monthly_sales = combined_df.withColumn("month", date_format(col("order_date"), "yyyy-MM")).groupBy("month").agg(
sum("total_amount").alias("total_revenue"),
sum("quantity").alias("total_quantity"),
countDistinct("order_id").alias("order_count"),
countDistinct("customer_id").alias("customer_count")
).orderBy("month")
category_sales = combined_df.groupBy("category").agg(
sum("total_amount").alias("category_revenue"),
sum("quantity").alias("category_quantity"),
avg("unit_price").alias("avg_price"),
countDistinct("product_id").alias("product_variety")
).orderBy(desc("category_revenue"))
region_sales = combined_df.groupBy("customer_region").agg(
sum("total_amount").alias("region_revenue"),
countDistinct("customer_id").alias("region_customers"),
avg("total_amount").alias("avg_order_value")
).orderBy(desc("region_revenue"))
monthly_pandas = monthly_sales.toPandas()
category_pandas = category_sales.toPandas()
region_pandas = region_sales.toPandas()
monthly_data = []
for index, row in monthly_pandas.iterrows():
monthly_data.append({
'month': row['month'],
'revenue': round(float(row['total_revenue']), 2),
'quantity': int(row['total_quantity']),
'orders': int(row['order_count']),
'customers': int(row['customer_count'])
})
category_data = []
for index, row in category_pandas.iterrows():
category_data.append({
'category': row['category'],
'revenue': round(float(row['category_revenue']), 2),
'quantity': int(row['category_quantity']),
'avg_price': round(float(row['avg_price']), 2),
'product_count': int(row['product_variety'])
})
region_data = []
for index, row in region_pandas.iterrows():
region_data.append({
'region': row['customer_region'],
'revenue': round(float(row['region_revenue']), 2),
'customers': int(row['region_customers']),
'avg_order_value': round(float(row['avg_order_value']), 2)
})
return JsonResponse({
'code': 200,
'message': '销售绩效分析完成',
'data': {
'monthly_trend': monthly_data,
'category_performance': category_data,
'regional_distribution': region_data
}
})