基于大数据的农作物产量数据分析系统 | Excel分析农作物数据vs Hadoop+Spark大数据处理:技术层次天壤之别

61 阅读7分钟

💖💖作者:计算机毕业设计杰瑞 💙💙个人简介:曾长期从事计算机专业培训教学,本人也热爱上课教学,语言擅长Java、微信小程序、Python、Golang、安卓Android等,开发项目包括大数据、深度学习、网站、小程序、安卓、算法。平常会做一些项目定制化开发、代码讲解、答辩教学、文档编写、也懂一些降重方面的技巧。平常喜欢分享一些自己开发中遇到的问题的解决办法,也喜欢交流技术,大家有技术代码这一块的问题可以问我! 💛💛想说的话:感谢大家的关注与支持! 💜💜 网站实战项目 安卓/小程序实战项目 大数据实战项目 深度学校实战项目 计算机毕业设计选题推荐

基于大数据的农作物产量数据分析系统介绍

农作物产量数据分析系统是一套基于大数据技术架构的综合性农业数据处理平台,采用Hadoop分布式文件系统作为数据存储基础,结合Spark计算引擎实现海量农业数据的高效处理与分析。系统前端采用Vue框架配合ElementUI组件库构建用户界面,通过Echarts图表库实现数据可视化展示,后端基于Django框架提供RESTful API接口服务,数据持久化采用MySQL关系型数据库。系统核心功能涵盖农作物产量数据的采集管理、多维度综合分析、气候因素关联性研究、作物生长周期产量变化分析、地理环境影响评估以及生产措施效果评价等模块。通过Spark SQL进行大规模数据查询,结合Pandas和NumPy进行数据清洗与统计分析,系统能够处理不同地区、不同时间跨度的农作物产量数据,为农业生产决策提供科学依据和技术支撑。

基于大数据的农作物产量数据分析系统演示视频

演示视频

基于大数据的农作物产量数据分析系统演示图片

在这里插入图片描述 在这里插入图片描述 在这里插入图片描述 在这里插入图片描述 在这里插入图片描述 在这里插入图片描述 在这里插入图片描述 在这里插入图片描述

基于大数据的农作物产量数据分析系统代码展示

from pyspark.sql import SparkSession
from pyspark.sql.functions import *
from pyspark.sql.types import *
import pandas as pd
import numpy as np
from django.http import JsonResponse
from django.views.decorators.csrf import csrf_exempt
import json
import mysql.connector
from datetime import datetime, timedelta

spark = SparkSession.builder.appName("CropYieldAnalysis").config("spark.sql.adaptive.enabled", "true").config("spark.sql.adaptive.coalescePartitions.enabled", "true").getOrCreate()

@csrf_exempt
def crop_yield_multi_analysis(request):
    if request.method == 'POST':
        data = json.loads(request.body)
        crop_types = data.get('crop_types', [])
        start_date = data.get('start_date')
        end_date = data.get('end_date')
        regions = data.get('regions', [])
        analysis_dimensions = data.get('dimensions', ['time', 'region', 'crop_type'])
        crop_df = spark.read.format("jdbc").option("url", "jdbc:mysql://localhost:3306/crop_system").option("dbtable", "crop_yield_data").option("user", "root").option("password", "123456").load()
        filtered_df = crop_df.filter(col("harvest_date").between(start_date, end_date))
        if crop_types:
            filtered_df = filtered_df.filter(col("crop_type").isin(crop_types))
        if regions:
            filtered_df = filtered_df.filter(col("region").isin(regions))
        multi_dim_result = filtered_df.groupBy("crop_type", "region", year("harvest_date").alias("year"), month("harvest_date").alias("month")).agg(avg("yield_per_hectare").alias("avg_yield"), sum("total_yield").alias("total_yield"), count("*").alias("sample_count"), stddev("yield_per_hectare").alias("yield_std"))
        trend_analysis = multi_dim_result.groupBy("crop_type", "year").agg(avg("avg_yield").alias("yearly_avg_yield"), sum("total_yield").alias("yearly_total_yield")).orderBy("crop_type", "year")
        regional_comparison = multi_dim_result.groupBy("region", "crop_type").agg(avg("avg_yield").alias("region_avg_yield"), max("avg_yield").alias("region_max_yield"), min("avg_yield").alias("region_min_yield"))
        seasonal_pattern = multi_dim_result.groupBy("crop_type", "month").agg(avg("avg_yield").alias("monthly_avg_yield")).orderBy("crop_type", "month")
        correlation_data = filtered_df.select("yield_per_hectare", "temperature", "rainfall", "soil_ph", "fertilizer_amount").toPandas()
        correlation_matrix = correlation_data.corr().to_dict()
        yield_variability = multi_dim_result.groupBy("crop_type").agg(avg("yield_std").alias("avg_variability"), (max("avg_yield") - min("avg_yield")).alias("yield_range"))
        performance_ranking = regional_comparison.orderBy(desc("region_avg_yield")).limit(10)
        result_data = {"trend_analysis": trend_analysis.collect(), "regional_comparison": regional_comparison.collect(), "seasonal_pattern": seasonal_pattern.collect(), "correlation_matrix": correlation_matrix, "yield_variability": yield_variability.collect(), "performance_ranking": performance_ranking.collect(), "total_samples": filtered_df.count()}
        return JsonResponse({"status": "success", "data": result_data, "message": "多维综合分析完成"})

@csrf_exempt
def climate_correlation_analysis(request):
    if request.method == 'POST':
        data = json.loads(request.body)
        crop_type = data.get('crop_type')
        climate_factors = data.get('climate_factors', ['temperature', 'rainfall', 'humidity', 'sunshine_hours'])
        analysis_period = data.get('analysis_period', 12)
        climate_df = spark.read.format("jdbc").option("url", "jdbc:mysql://localhost:3306/crop_system").option("dbtable", "climate_data").option("user", "root").option("password", "123456").load()
        yield_df = spark.read.format("jdbc").option("url", "jdbc:mysql://localhost:3306/crop_system").option("dbtable", "crop_yield_data").option("user", "root").option("password", "123456").load()
        if crop_type:
            yield_df = yield_df.filter(col("crop_type") == crop_type)
        joined_df = yield_df.join(climate_df, (yield_df.region == climate_df.region) & (year(yield_df.harvest_date) == year(climate_df.record_date) & month(yield_df.harvest_date) == month(climate_df.record_date)), "inner")
        climate_impact_analysis = joined_df.groupBy("region", year("harvest_date").alias("year")).agg(*[avg(factor).alias(f"avg_{factor}") for factor in climate_factors], avg("yield_per_hectare").alias("avg_yield"), count("*").alias("data_points"))
        temperature_yield_corr = joined_df.select("temperature", "yield_per_hectare").toPandas()
        temp_correlation = temperature_yield_corr.corr().iloc[0,1] if len(temperature_yield_corr) > 0 else 0
        rainfall_segments = joined_df.withColumn("rainfall_range", when(col("rainfall") < 500, "low").when(col("rainfall") < 1000, "medium").otherwise("high")).groupBy("rainfall_range").agg(avg("yield_per_hectare").alias("avg_yield_by_rainfall"), count("*").alias("sample_count"))
        temperature_segments = joined_df.withColumn("temp_range", when(col("temperature") < 15, "cool").when(col("temperature") < 25, "moderate").otherwise("warm")).groupBy("temp_range").agg(avg("yield_per_hectare").alias("avg_yield_by_temp"), count("*").alias("sample_count"))
        optimal_conditions = joined_df.filter((col("yield_per_hectare") >= joined_df.select(percentile_approx("yield_per_hectare", 0.8)).collect()[0][0])).select(*climate_factors).toPandas()
        optimal_climate_ranges = {factor: {"min": optimal_conditions[factor].min(), "max": optimal_conditions[factor].max(), "mean": optimal_conditions[factor].mean()} for factor in climate_factors if factor in optimal_conditions.columns}
        extreme_weather_impact = joined_df.filter((col("temperature") > 35) | (col("temperature") < 5) | (col("rainfall") > 2000) | (col("rainfall") < 100)).agg(avg("yield_per_hectare").alias("extreme_weather_yield")).collect()[0]["extreme_weather_yield"]
        normal_weather_yield = joined_df.filter((col("temperature").between(5, 35)) & (col("rainfall").between(100, 2000))).agg(avg("yield_per_hectare").alias("normal_weather_yield")).collect()[0]["normal_weather_yield"]
        weather_impact_ratio = extreme_weather_impact / normal_weather_yield if normal_weather_yield > 0 else 0
        result_data = {"climate_impact_analysis": climate_impact_analysis.collect(), "temperature_correlation": temp_correlation, "rainfall_yield_segments": rainfall_segments.collect(), "temperature_yield_segments": temperature_segments.collect(), "optimal_climate_ranges": optimal_climate_ranges, "extreme_weather_impact": extreme_weather_impact, "normal_weather_yield": normal_weather_yield, "weather_impact_ratio": weather_impact_ratio}
        return JsonResponse({"status": "success", "data": result_data, "message": "气候影响关联分析完成"})

@csrf_exempt
def crop_cycle_yield_analysis(request):
    if request.method == 'POST':
        data = json.loads(request.body)
        crop_type = data.get('crop_type')
        analysis_years = data.get('analysis_years', 5)
        cycle_stages = data.get('cycle_stages', ['germination', 'growth', 'flowering', 'maturity', 'harvest'])
        cycle_df = spark.read.format("jdbc").option("url", "jdbc:mysql://localhost:3306/crop_system").option("dbtable", "crop_cycle_data").option("user", "root").option("password", "123456").load()
        yield_df = spark.read.format("jdbc").option("url", "jdbc:mysql://localhost:3306/crop_system").option("dbtable", "crop_yield_data").option("user", "root").option("password", "123456").load()
        if crop_type:
            cycle_df = cycle_df.filter(col("crop_type") == crop_type)
            yield_df = yield_df.filter(col("crop_type") == crop_type)
        current_year = datetime.now().year
        recent_data = yield_df.filter(year("harvest_date") >= (current_year - analysis_years))
        cycle_yield_joined = recent_data.join(cycle_df, (recent_data.field_id == cycle_df.field_id) & (year(recent_data.harvest_date) == year(cycle_df.planting_date)), "inner")
        cycle_stage_analysis = cycle_yield_joined.withColumn("germination_days", datediff("growth_start_date", "planting_date")).withColumn("growth_days", datediff("flowering_start_date", "growth_start_date")).withColumn("flowering_days", datediff("maturity_date", "flowering_start_date")).withColumn("maturity_days", datediff("harvest_date", "maturity_date")).withColumn("total_cycle_days", datediff("harvest_date", "planting_date"))
        stage_performance = cycle_stage_analysis.groupBy("crop_type").agg(avg("germination_days").alias("avg_germination_days"), avg("growth_days").alias("avg_growth_days"), avg("flowering_days").alias("avg_flowering_days"), avg("maturity_days").alias("avg_maturity_days"), avg("total_cycle_days").alias("avg_total_cycle_days"), avg("yield_per_hectare").alias("avg_yield"), corr("total_cycle_days", "yield_per_hectare").alias("cycle_yield_correlation"))
        optimal_cycle_analysis = cycle_stage_analysis.filter(col("yield_per_hectare") >= cycle_stage_analysis.select(percentile_approx("yield_per_hectare", 0.75)).collect()[0][0]).groupBy("crop_type").agg(avg("germination_days").alias("optimal_germination_days"), avg("growth_days").alias("optimal_growth_days"), avg("flowering_days").alias("optimal_flowering_days"), avg("maturity_days").alias("optimal_maturity_days"), avg("total_cycle_days").alias("optimal_total_cycle_days"))
        seasonal_planting_impact = cycle_yield_joined.withColumn("planting_month", month("planting_date")).groupBy("planting_month", "crop_type").agg(avg("yield_per_hectare").alias("avg_yield_by_planting_month"), avg(datediff("harvest_date", "planting_date")).alias("avg_cycle_length"), count("*").alias("planting_count")).orderBy("crop_type", "planting_month")
        cycle_variability = cycle_stage_analysis.groupBy("crop_type").agg(stddev("total_cycle_days").alias("cycle_length_std"), stddev("yield_per_hectare").alias("yield_std"), (max("total_cycle_days") - min("total_cycle_days")).alias("cycle_length_range"))
        growth_rate_analysis = cycle_stage_analysis.withColumn("growth_rate", col("yield_per_hectare") / col("total_cycle_days")).groupBy("crop_type").agg(avg("growth_rate").alias("avg_growth_rate"), max("growth_rate").alias("max_growth_rate"), min("growth_rate").alias("min_growth_rate"))
        result_data = {"stage_performance": stage_performance.collect(), "optimal_cycle_analysis": optimal_cycle_analysis.collect(), "seasonal_planting_impact": seasonal_planting_impact.collect(), "cycle_variability": cycle_variability.collect(), "growth_rate_analysis": growth_rate_analysis.collect(), "analysis_period_years": analysis_years}
        return JsonResponse({"status": "success", "data": result_data, "message": "作物周期产量分析完成"})

基于大数据的农作物产量数据分析系统文档展示

在这里插入图片描述

💖💖作者:计算机毕业设计杰瑞 💙💙个人简介:曾长期从事计算机专业培训教学,本人也热爱上课教学,语言擅长Java、微信小程序、Python、Golang、安卓Android等,开发项目包括大数据、深度学习、网站、小程序、安卓、算法。平常会做一些项目定制化开发、代码讲解、答辩教学、文档编写、也懂一些降重方面的技巧。平常喜欢分享一些自己开发中遇到的问题的解决办法,也喜欢交流技术,大家有技术代码这一块的问题可以问我! 💛💛想说的话:感谢大家的关注与支持! 💜💜 网站实战项目 安卓/小程序实战项目 大数据实战项目 深度学校实战项目 计算机毕业设计选题推荐