一、个人简介
- 💖💖作者:计算机编程果茶熊
- 💙💙个人简介:曾长期从事计算机专业培训教学,担任过编程老师,同时本人也热爱上课教学,擅长Java、微信小程序、Python、Golang、安卓Android等多个IT方向。会做一些项目定制化开发、代码讲解、答辩教学、文档编写、也懂一些降重方面的技巧。平常喜欢分享一些自己开发中遇到的问题的解决办法,也喜欢交流技术,大家有技术代码这一块的问题可以问我!
- 💛💛想说的话:感谢大家的关注与支持!
- 💜💜
- 网站实战项目
- 安卓/小程序实战项目
- 大数据实战项目
- 计算机毕业设计选题
- 💕💕文末获取源码联系计算机编程果茶熊
二、系统介绍
- 大数据框架:Hadoop+Spark(Hive需要定制修改)
- 开发语言:Java+Python(两个版本都支持)
- 数据库:MySQL
- 后端框架:SpringBoot(Spring+SpringMVC+Mybatis)+Django(两个版本都支持)
- 前端:Vue+Echarts+HTML+CSS+JavaScript+jQuery
- 基于大数据的农产品交易数据分析与可视化系统是一个集数据处理、分析和可视化展示于一体的综合性平台,该系统采用Hadoop分布式存储架构和Spark大数据处理引擎作为核心技术底座,能够高效处理海量农产品交易数据。系统后端基于Python语言开发,采用Django框架构建RESTful API接口,前端使用Vue.js框架结合ElementUI组件库打造现代化用户界面,通过Echarts图表库实现丰富的数据可视化效果。在数据处理层面,系统利用Spark SQL进行大规模数据查询和分析,结合Pandas和NumPy进行数据预处理和统计计算,将处理结果存储于MySQL数据库中供前端调用。系统功能涵盖用户管理模块(包括个人信息维护和密码修改)、农产品交易数据管理模块、实时数据大屏展示、客户群体画像分析、营销活动效果评估、产品运营指标监控以及整体销售业绩统计分析等九大核心功能模块。通过HDFS分布式文件系统存储原始交易数据,利用Spark的内存计算优势实现秒级数据分析响应,为农产品交易决策提供科学的数据支撑和直观的可视化展现,帮助管理者全面掌握市场动态、客户行为特征和销售趋势变化。
三、基于大数据的农产品交易数据分析与可视化系统-视频解说
毕设没思路?想用大数据技术却无从下手?农产品交易数据分析系统完美解决
四、基于大数据的农产品交易数据分析与可视化系统-功能展示
五、基于大数据的农产品交易数据分析与可视化系统-代码展示
# 核心功能1:客户群体画像分析
def analyze_customer_profile(request):
# 初始化Spark会话
spark = SparkSession.builder.appName("CustomerProfileAnalysis").getOrCreate()
# 从HDFS读取农产品交易数据
df = spark.read.format("csv").option("header", "true").load("hdfs://localhost:9000/agricultural_data/transactions.csv")
# 数据预处理和清洗
df = df.filter(df.customer_id.isNotNull() & df.transaction_amount.isNotNull())
df = df.withColumn("transaction_amount", df.transaction_amount.cast("double"))
df = df.withColumn("purchase_date", to_date(df.purchase_date, "yyyy-MM-dd"))
# 计算客户购买频次和金额统计
customer_stats = df.groupBy("customer_id").agg(
count("transaction_id").alias("purchase_frequency"),
sum("transaction_amount").alias("total_amount"),
avg("transaction_amount").alias("avg_amount"),
max("purchase_date").alias("last_purchase_date"),
countDistinct("product_category").alias("category_diversity")
)
# 使用Spark SQL进行客户RFM分析
df.createOrReplaceTempView("transactions")
rfm_analysis = spark.sql("""
SELECT customer_id,
DATEDIFF(CURRENT_DATE(), MAX(purchase_date)) as recency,
COUNT(transaction_id) as frequency,
SUM(transaction_amount) as monetary
FROM transactions
GROUP BY customer_id
""")
# 客户价值分层计算
rfm_analysis = rfm_analysis.withColumn("r_score",
when(col("recency") <= 30, 5)
.when(col("recency") <= 90, 4)
.when(col("recency") <= 180, 3)
.when(col("recency") <= 365, 2)
.otherwise(1))
rfm_analysis = rfm_analysis.withColumn("f_score",
when(col("frequency") >= 10, 5)
.when(col("frequency") >= 7, 4)
.when(col("frequency") >= 5, 3)
.when(col("frequency") >= 3, 2)
.otherwise(1))
# 客户分群标签生成
rfm_with_labels = rfm_analysis.withColumn("customer_segment",
when((col("r_score") >= 4) & (col("f_score") >= 4), "高价值客户")
.when((col("r_score") >= 3) & (col("f_score") >= 3), "潜力客户")
.when((col("r_score") <= 2) & (col("f_score") >= 3), "流失风险客户")
.otherwise("新客户"))
# 转换为Pandas DataFrame进行进一步分析
profile_data = rfm_with_labels.toPandas()
segment_summary = profile_data.groupby('customer_segment').agg({
'customer_id': 'count',
'monetary': ['mean', 'sum'],
'frequency': 'mean'
}).round(2)
return JsonResponse({
'segment_distribution': segment_summary.to_dict(),
'total_customers': len(profile_data),
'analysis_timestamp': timezone.now().isoformat()
})
# 核心功能2:营销活动效果分析
def analyze_marketing_effectiveness(request):
spark = SparkSession.builder.appName("MarketingAnalysis").getOrCreate()
# 读取营销活动数据和交易数据
marketing_df = spark.read.format("csv").option("header", "true").load("hdfs://localhost:9000/agricultural_data/marketing_campaigns.csv")
transaction_df = spark.read.format("csv").option("header", "true").load("hdfs://localhost:9000/agricultural_data/transactions.csv")
# 数据类型转换和预处理
marketing_df = marketing_df.withColumn("campaign_start", to_date(col("campaign_start"), "yyyy-MM-dd"))
marketing_df = marketing_df.withColumn("campaign_end", to_date(col("campaign_end"), "yyyy-MM-dd"))
marketing_df = marketing_df.withColumn("campaign_budget", col("campaign_budget").cast("double"))
transaction_df = transaction_df.withColumn("purchase_date", to_date(col("purchase_date"), "yyyy-MM-dd"))
transaction_df = transaction_df.withColumn("transaction_amount", col("transaction_amount").cast("double"))
# 关联营销活动与交易数据
joined_df = transaction_df.join(marketing_df,
(transaction_df.purchase_date >= marketing_df.campaign_start) &
(transaction_df.purchase_date <= marketing_df.campaign_end) &
(transaction_df.product_category == marketing_df.target_category), "inner")
# 计算营销活动ROI和转化率
campaign_performance = joined_df.groupBy("campaign_id", "campaign_name", "campaign_budget").agg(
count("transaction_id").alias("generated_orders"),
sum("transaction_amount").alias("total_revenue"),
countDistinct("customer_id").alias("unique_customers"),
avg("transaction_amount").alias("avg_order_value")
)
# 使用Spark SQL计算更复杂的营销指标
joined_df.createOrReplaceTempView("campaign_transactions")
marketing_df.createOrReplaceTempView("campaigns")
detailed_metrics = spark.sql("""
SELECT c.campaign_id,
c.campaign_name,
c.campaign_budget,
COUNT(ct.transaction_id) as conversion_count,
SUM(ct.transaction_amount) as total_sales,
AVG(ct.transaction_amount) as avg_transaction,
(SUM(ct.transaction_amount) - c.campaign_budget) / c.campaign_budget * 100 as roi_percentage,
COUNT(DISTINCT ct.customer_id) as customer_reach
FROM campaigns c
LEFT JOIN campaign_transactions ct ON c.campaign_id = ct.campaign_id
GROUP BY c.campaign_id, c.campaign_name, c.campaign_budget
""")
# 计算活动期间vs非活动期间的对比分析
baseline_sales = transaction_df.filter(
~exists(marketing_df.select("campaign_start", "campaign_end").collect())
).agg(avg("transaction_amount").alias("baseline_avg")).collect()[0]["baseline_avg"]
# 转换结果为Pandas进行统计分析
performance_data = detailed_metrics.toPandas()
performance_data['roi_category'] = pd.cut(performance_data['roi_percentage'],
bins=[-float('inf'), 0, 50, 100, float('inf')],
labels=['亏损', '低回报', '中等回报', '高回报'])
effectiveness_summary = {
'total_campaigns': len(performance_data),
'profitable_campaigns': len(performance_data[performance_data['roi_percentage'] > 0]),
'total_marketing_spend': performance_data['campaign_budget'].sum(),
'total_marketing_revenue': performance_data['total_sales'].sum(),
'overall_roi': ((performance_data['total_sales'].sum() - performance_data['campaign_budget'].sum()) / performance_data['campaign_budget'].sum() * 100).round(2)
}
return JsonResponse({
'campaign_performance': performance_data.to_dict('records'),
'effectiveness_summary': effectiveness_summary,
'baseline_comparison': baseline_sales
})
# 核心功能3:整体销售业绩分析
def analyze_overall_sales_performance(request):
spark = SparkSession.builder.appName("SalesPerformanceAnalysis").getOrCreate()
# 读取销售交易数据
sales_df = spark.read.format("csv").option("header", "true").load("hdfs://localhost:9000/agricultural_data/transactions.csv")
# 数据预处理和格式化
sales_df = sales_df.withColumn("transaction_amount", col("transaction_amount").cast("double"))
sales_df = sales_df.withColumn("purchase_date", to_date(col("purchase_date"), "yyyy-MM-dd"))
sales_df = sales_df.withColumn("year", year(col("purchase_date")))
sales_df = sales_df.withColumn("month", month(col("purchase_date")))
sales_df = sales_df.withColumn("quarter", quarter(col("purchase_date")))
# 整体销售趋势分析
monthly_sales = sales_df.groupBy("year", "month").agg(
sum("transaction_amount").alias("monthly_revenue"),
count("transaction_id").alias("monthly_orders"),
countDistinct("customer_id").alias("monthly_customers"),
avg("transaction_amount").alias("avg_order_value")
).orderBy("year", "month")
# 产品类别销售分析
category_performance = sales_df.groupBy("product_category").agg(
sum("transaction_amount").alias("category_revenue"),
count("transaction_id").alias("category_orders"),
avg("transaction_amount").alias("avg_category_price"),
countDistinct("customer_id").alias("category_customers")
).orderBy(desc("category_revenue"))
# 使用Spark SQL进行复合指标计算
sales_df.createOrReplaceTempView("sales_data")
comprehensive_metrics = spark.sql("""
SELECT
COUNT(DISTINCT customer_id) as total_customers,
COUNT(transaction_id) as total_orders,
SUM(transaction_amount) as total_revenue,
AVG(transaction_amount) as overall_avg_order_value,
MAX(transaction_amount) as highest_single_order,
MIN(transaction_amount) as lowest_single_order,
STDDEV(transaction_amount) as revenue_volatility
FROM sales_data
""")
# 同比增长率计算
current_year = datetime.now().year
yearly_comparison = spark.sql(f"""
SELECT year,
SUM(transaction_amount) as yearly_revenue,
COUNT(transaction_id) as yearly_orders,
LAG(SUM(transaction_amount)) OVER (ORDER BY year) as prev_year_revenue
FROM sales_data
GROUP BY year
ORDER BY year
""")
# 客户复购率分析
customer_purchase_analysis = spark.sql("""
SELECT customer_id,
COUNT(transaction_id) as purchase_count,
DATEDIFF(MAX(purchase_date), MIN(purchase_date)) as customer_lifetime_days,
SUM(transaction_amount) as customer_total_value
FROM sales_data
GROUP BY customer_id
""")
repeat_customer_rate = customer_purchase_analysis.filter(col("purchase_count") > 1).count() / customer_purchase_analysis.count()
# 季节性销售模式分析
seasonal_analysis = sales_df.groupBy("quarter").agg(
sum("transaction_amount").alias("quarter_revenue"),
avg("transaction_amount").alias("quarter_avg_order"),
count("transaction_id").alias("quarter_orders")
).orderBy("quarter")
# 转换为Pandas进行高级统计分析
monthly_data = monthly_sales.toPandas()
if len(monthly_data) > 1:
monthly_data['revenue_growth_rate'] = monthly_data['monthly_revenue'].pct_change() * 100
monthly_data['order_growth_rate'] = monthly_data['monthly_orders'].pct_change() * 100
overall_metrics = comprehensive_metrics.collect()[0].asDict()
category_data = category_performance.toPandas()
seasonal_data = seasonal_analysis.toPandas()
# 销售业绩评估
performance_insights = {
'revenue_trend': 'positive' if monthly_data['revenue_growth_rate'].mean() > 0 else 'negative',
'top_performing_category': category_data.iloc[0]['product_category'] if len(category_data) > 0 else None,
'peak_sales_quarter': seasonal_data.loc[seasonal_data['quarter_revenue'].idxmax(), 'quarter'],
'customer_retention_rate': repeat_customer_rate * 100
}
return JsonResponse({
'overall_metrics': overall_metrics,
'monthly_trends': monthly_data.to_dict('records'),
'category_performance': category_data.to_dict('records'),
'seasonal_patterns': seasonal_data.to_dict('records'),
'performance_insights': performance_insights
})
六、基于大数据的农产品交易数据分析与可视化系统-文档展示
七、END
💕💕文末获取源码联系计算机编程果茶熊