前言
💖💖作者:计算机程序员小杨 💙💙个人简介:我是一名计算机相关专业的从业者,擅长Java、微信小程序、Python、Golang、安卓Android等多个IT方向。会做一些项目定制化开发、代码讲解、答辩教学、文档编写、也懂一些降重方面的技巧。热爱技术,喜欢钻研新工具和框架,也乐于通过代码解决实际问题,大家有技术代码这一块的问题可以问我! 💛💛想说的话:感谢大家的关注与支持! 💕💕文末获取源码联系 计算机程序员小杨 💜💜 网站实战项目 安卓/小程序实战项目 大数据实战项目 深度学习实战项目 计算机毕业设计选题 💜💜
一.开发工具简介
大数据框架:Hadoop+Spark(本次没用Hive,支持定制) 开发语言:Python+Java(两个版本都支持) 后端框架:Django+Spring Boot(Spring+SpringMVC+Mybatis)(两个版本都支持) 前端:Vue+ElementUI+Echarts+HTML+CSS+JavaScript+jQuery 详细技术点:Hadoop、HDFS、Spark、Spark SQL、Pandas、NumPy 数据库:MySQL
二.系统内容简介
《农产品供应价格数据可视化分析系统》是一个基于大数据技术的农产品市场分析平台,采用Hadoop+Spark大数据框架构建分布式数据处理环境,通过Django后端框架提供稳定的API服务,前端使用Vue+ElementUI+Echarts技术栈实现交互式数据可视化界面。系统利用HDFS进行海量农产品价格数据的分布式存储,运用Spark SQL进行高效的数据查询与计算,结合Pandas和NumPy进行深度数据分析处理,MySQL数据库确保结构化数据的持久化存储。系统核心功能涵盖产品维度分析、价格维度分析、地域维度分析、商家维度分析以及可视化大屏展示,能够从多个角度深入挖掘农产品市场的价格变化规律、地域分布特征、商家竞争态势等关键信息,为农产品供应链管理、市场预测、价格监控提供数据支撑和决策参考,助力农业产业数字化转型和智能化发展。
三.系统功能演示
四.系统界面展示
五.系统源码展示
from pyspark.sql import SparkSession
from pyspark.sql.functions import *
from django.http import JsonResponse
from django.views import View
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import json
spark = SparkSession.builder.appName("AgriculturePriceAnalysis").config("spark.sql.adaptive.enabled", "true").config("spark.sql.adaptive.coalescePartitions.enabled", "true").getOrCreate()
class ProductDimensionAnalysis(View):
def post(self, request):
data = json.loads(request.body)
product_name = data.get('product_name')
start_date = data.get('start_date')
end_date = data.get('end_date')
df = spark.read.format("jdbc").option("url", "jdbc:mysql://localhost:3306/agriculture_db").option("dbtable", "price_records").option("user", "root").option("password", "password").load()
filtered_df = df.filter((col("product_name") == product_name) & (col("record_date") >= start_date) & (col("record_date") <= end_date))
price_stats = filtered_df.groupBy("product_name").agg(avg("price").alias("avg_price"), max("price").alias("max_price"), min("price").alias("min_price"), stddev("price").alias("price_volatility"), count("*").alias("record_count"))
daily_trend = filtered_df.groupBy("record_date").agg(avg("price").alias("daily_avg_price")).orderBy("record_date")
weekly_trend = filtered_df.withColumn("week_start", date_trunc("week", col("record_date"))).groupBy("week_start").agg(avg("price").alias("weekly_avg_price")).orderBy("week_start")
monthly_trend = filtered_df.withColumn("month_start", date_trunc("month", col("record_date"))).groupBy("month_start").agg(avg("price").alias("monthly_avg_price")).orderBy("month_start")
price_range_analysis = filtered_df.withColumn("price_range", when(col("price") < 10, "low").when(col("price") < 50, "medium").otherwise("high")).groupBy("price_range").agg(count("*").alias("count"), avg("price").alias("avg_price_in_range"))
supplier_count = filtered_df.select("supplier_id").distinct().count()
market_concentration = filtered_df.groupBy("supplier_id").agg(count("*").alias("supply_frequency")).orderBy(desc("supply_frequency")).limit(5)
seasonal_pattern = filtered_df.withColumn("month", month(col("record_date"))).groupBy("month").agg(avg("price").alias("monthly_avg"), count("*").alias("monthly_records")).orderBy("month")
price_change_rate = daily_trend.withColumn("price_change", (col("daily_avg_price") - lag("daily_avg_price").over(Window.orderBy("record_date"))) / lag("daily_avg_price").over(Window.orderBy("record_date")) * 100)
result_data = {
"basic_stats": price_stats.collect()[0].asDict() if price_stats.count() > 0 else {},
"daily_trend": [row.asDict() for row in daily_trend.collect()],
"weekly_trend": [row.asDict() for row in weekly_trend.collect()],
"monthly_trend": [row.asDict() for row in monthly_trend.collect()],
"price_range_distribution": [row.asDict() for row in price_range_analysis.collect()],
"supplier_count": supplier_count,
"top_suppliers": [row.asDict() for row in market_concentration.collect()],
"seasonal_pattern": [row.asDict() for row in seasonal_pattern.collect()],
"price_volatility_analysis": [row.asDict() for row in price_change_rate.filter(col("price_change").isNotNull()).collect()]
}
return JsonResponse(result_data, safe=False)
class RegionalAnalysis(View):
def post(self, request):
data = json.loads(request.body)
region_list = data.get('regions', [])
product_category = data.get('product_category')
analysis_period = data.get('analysis_period', 30)
end_date = datetime.now()
start_date = end_date - timedelta(days=analysis_period)
df = spark.read.format("jdbc").option("url", "jdbc:mysql://localhost:3306/agriculture_db").option("dbtable", "price_records").option("user", "root").option("password", "password").load()
region_df = df.filter((col("region").isin(region_list)) & (col("product_category") == product_category) & (col("record_date") >= start_date.strftime('%Y-%m-%d')))
regional_comparison = region_df.groupBy("region").agg(avg("price").alias("avg_price"), max("price").alias("max_price"), min("price").alias("min_price"), count("*").alias("supply_volume"), countDistinct("supplier_id").alias("supplier_count"))
price_correlation = region_df.groupBy("record_date", "region").agg(avg("price").alias("daily_avg_price")).groupBy("record_date").pivot("region").agg(first("daily_avg_price"))
transport_cost_impact = region_df.join(spark.read.format("jdbc").option("url", "jdbc:mysql://localhost:3306/agriculture_db").option("dbtable", "transport_costs").option("user", "root").option("password", "password").load(), "region", "left").withColumn("adjusted_price", col("price") - col("transport_cost_per_unit"))
regional_price_index = regional_comparison.withColumn("price_index", (col("avg_price") / regional_comparison.agg(avg("avg_price")).collect()[0][0]) * 100)
market_share_analysis = region_df.groupBy("region").agg(sum("quantity").alias("total_quantity")).withColumn("market_share", col("total_quantity") / sum("total_quantity").over(Window.partitionBy()) * 100)
price_dispersion = region_df.groupBy("region").agg(stddev("price").alias("price_std"), (stddev("price") / avg("price") * 100).alias("coefficient_of_variation"))
seasonal_regional_pattern = region_df.withColumn("month", month(col("record_date"))).groupBy("region", "month").agg(avg("price").alias("monthly_avg_price")).orderBy("region", "month")
cross_regional_arbitrage = regional_comparison.alias("r1").crossJoin(regional_comparison.alias("r2")).filter(col("r1.region") != col("r2.region")).withColumn("price_diff", col("r1.avg_price") - col("r2.avg_price")).withColumn("arbitrage_opportunity", when(abs(col("price_diff")) > 5, "High").when(abs(col("price_diff")) > 2, "Medium").otherwise("Low"))
regional_stability = region_df.withColumn("week", weekofyear(col("record_date"))).groupBy("region", "week").agg(avg("price").alias("weekly_avg")).groupBy("region").agg(stddev("weekly_avg").alias("weekly_volatility"))
result_data = {
"regional_comparison": [row.asDict() for row in regional_comparison.collect()],
"price_correlation_matrix": [row.asDict() for row in price_correlation.collect()],
"transport_adjusted_prices": [row.asDict() for row in transport_cost_impact.select("region", "adjusted_price").groupBy("region").agg(avg("adjusted_price").alias("avg_adjusted_price")).collect()],
"regional_price_index": [row.asDict() for row in regional_price_index.collect()],
"market_share": [row.asDict() for row in market_share_analysis.collect()],
"price_dispersion": [row.asDict() for row in price_dispersion.collect()],
"seasonal_patterns": [row.asDict() for row in seasonal_regional_pattern.collect()],
"arbitrage_opportunities": [row.asDict() for row in cross_regional_arbitrage.filter(col("arbitrage_opportunity") == "High").collect()],
"regional_stability_ranking": [row.asDict() for row in regional_stability.orderBy("weekly_volatility").collect()]
}
return JsonResponse(result_data, safe=False)
class SupplierAnalysis(View):
def post(self, request):
data = json.loads(request.body)
supplier_ids = data.get('supplier_ids', [])
analysis_metrics = data.get('metrics', ['price', 'volume', 'stability'])
time_period = data.get('time_period', 90)
end_date = datetime.now()
start_date = end_date - timedelta(days=time_period)
df = spark.read.format("jdbc").option("url", "jdbc:mysql://localhost:3306/agriculture_db").option("dbtable", "price_records").option("user", "root").option("password", "password").load()
supplier_df = df.filter((col("supplier_id").isin(supplier_ids)) & (col("record_date") >= start_date.strftime('%Y-%m-%d')))
supplier_performance = supplier_df.groupBy("supplier_id").agg(avg("price").alias("avg_price"), sum("quantity").alias("total_supply"), count("*").alias("transaction_count"), stddev("price").alias("price_volatility"), min("record_date").alias("first_supply_date"), max("record_date").alias("last_supply_date"))
price_competitiveness = supplier_df.groupBy("supplier_id", "product_name").agg(avg("price").alias("supplier_avg_price")).join(supplier_df.groupBy("product_name").agg(avg("price").alias("market_avg_price")), "product_name").withColumn("price_competitiveness_score", (col("market_avg_price") - col("supplier_avg_price")) / col("market_avg_price") * 100)
supply_consistency = supplier_df.withColumn("week", weekofyear(col("record_date"))).groupBy("supplier_id", "week").agg(sum("quantity").alias("weekly_supply")).groupBy("supplier_id").agg(avg("weekly_supply").alias("avg_weekly_supply"), stddev("weekly_supply").alias("supply_volatility"), count("*").alias("active_weeks"))
product_diversity = supplier_df.groupBy("supplier_id").agg(countDistinct("product_name").alias("product_count"), countDistinct("product_category").alias("category_count"))
market_penetration = supplier_df.groupBy("supplier_id").agg(countDistinct("region").alias("region_coverage"), countDistinct("market_id").alias("market_coverage"))
supplier_growth_trend = supplier_df.withColumn("month", date_trunc("month", col("record_date"))).groupBy("supplier_id", "month").agg(sum("quantity").alias("monthly_supply"), avg("price").alias("monthly_avg_price")).withColumn("supply_growth", (col("monthly_supply") - lag("monthly_supply").over(Window.partitionBy("supplier_id").orderBy("month"))) / lag("monthly_supply").over(Window.partitionBy("supplier_id").orderBy("month")) * 100)
quality_indicators = supplier_df.join(spark.read.format("jdbc").option("url", "jdbc:mysql://localhost:3306/agriculture_db").option("dbtable", "quality_ratings").option("user", "root").option("password", "password").load(), ["supplier_id", "product_name"], "left").groupBy("supplier_id").agg(avg("quality_score").alias("avg_quality_score"), count("quality_score").alias("rated_transactions"))
supplier_reliability = supplier_df.withColumn("delivery_delay", datediff(col("actual_delivery_date"), col("promised_delivery_date"))).groupBy("supplier_id").agg(avg("delivery_delay").alias("avg_delivery_delay"), countDistinct("record_date").alias("delivery_frequency"), (count(when(col("delivery_delay") <= 0, 1)) / count("*") * 100).alias("on_time_delivery_rate"))
competitive_positioning = price_competitiveness.groupBy("supplier_id").agg(avg("price_competitiveness_score").alias("overall_competitiveness"), count("*").alias("competing_products"))
supplier_risk_assessment = supplier_performance.join(supply_consistency, "supplier_id").join(supplier_reliability, "supplier_id", "left").withColumn("risk_score", when(col("price_volatility") > 10, 3).when(col("price_volatility") > 5, 2).otherwise(1) + when(col("supply_volatility") > col("avg_weekly_supply") * 0.5, 2).otherwise(1) + when(col("on_time_delivery_rate") < 80, 2).otherwise(0))
result_data = {
"supplier_performance_overview": [row.asDict() for row in supplier_performance.collect()],
"price_competitiveness_analysis": [row.asDict() for row in competitive_positioning.collect()],
"supply_consistency_metrics": [row.asDict() for row in supply_consistency.collect()],
"product_portfolio_diversity": [row.asDict() for row in product_diversity.collect()],
"market_penetration_analysis": [row.asDict() for row in market_penetration.collect()],
"growth_trend_analysis": [row.asDict() for row in supplier_growth_trend.filter(col("supply_growth").isNotNull()).collect()],
"quality_performance": [row.asDict() for row in quality_indicators.collect()],
"reliability_metrics": [row.asDict() for row in supplier_reliability.collect()],
"risk_assessment": [row.asDict() for row in supplier_risk_assessment.orderBy("risk_score").collect()]
}
return JsonResponse(result_data, safe=False)
六.系统文档展示
结束
💕💕文末获取源码联系 计算机程序员小杨