前言
💖💖作者:计算机程序员小杨 💙💙个人简介:我是一名计算机相关专业的从业者,擅长Java、微信小程序、Python、Golang、安卓Android等多个IT方向。会做一些项目定制化开发、代码讲解、答辩教学、文档编写、也懂一些降重方面的技巧。热爱技术,喜欢钻研新工具和框架,也乐于通过代码解决实际问题,大家有技术代码这一块的问题可以问我! 💛💛想说的话:感谢大家的关注与支持! 💕💕文末获取源码联系 计算机程序员小杨 💜💜 网站实战项目 安卓/小程序实战项目 大数据实战项目 深度学习实战项目 计算机毕业设计选题 💜💜
一.开发工具简介
大数据框架:Hadoop+Spark(本次没用Hive,支持定制) 开发语言:Python+Java(两个版本都支持) 后端框架:Django+Spring Boot(Spring+SpringMVC+Mybatis)(两个版本都支持) 前端:Vue+ElementUI+Echarts+HTML+CSS+JavaScript+jQuery 详细技术点:Hadoop、HDFS、Spark、Spark SQL、Pandas、NumPy 数据库:MySQL
二.系统内容简介
城镇居民食品消费量数据分析与可视化系统是一套基于大数据技术架构的综合性数据处理平台,专门针对城镇居民食品消费行为进行深度挖掘和智能分析。系统采用Hadoop分布式存储架构结合Spark大数据计算引擎,能够高效处理海量的食品消费数据,通过Spark SQL实现复杂的数据查询和统计分析。后端采用Django框架构建RESTful API接口,前端运用Vue.js配合ElementUI组件库打造响应式用户界面,集成Echarts图表库实现数据的多维度可视化展示。系统核心功能涵盖食品消费数据的采集管理、消费者行为趋势分析、多维度统计分析、食品品类消费对比、区域消费差异研究以及年度消费变化趋势预测等模块。通过Pandas和NumPy进行数据预处理和科学计算,结合MySQL数据库存储结构化数据,为城镇居民食品消费模式研究提供了完整的技术解决方案,展现了现代大数据技术在民生数据分析领域的实际应用价值。
三.系统功能演示
2026年大数据技术风口:城镇居民食品消费量数据分析与可视化系统抓住Hadoop+Spark热点
四.系统界面展示
五.系统源码展示
from pyspark.sql import SparkSession
from django.http import JsonResponse
from django.views import View
import pandas as pd
import numpy as np
from datetime import datetime
import json
spark = SparkSession.builder.appName("FoodConsumptionAnalysis").config("spark.sql.adaptive.enabled", "true").getOrCreate()
class ConsumerTrendAnalysisView(View):
def post(self, request):
try:
data = json.loads(request.body)
start_date = data.get('start_date')
end_date = data.get('end_date')
region = data.get('region', 'all')
food_df = spark.read.format("jdbc").option("url", "jdbc:mysql://localhost:3306/food_consumption").option("dbtable", "food_consumption_data").option("user", "root").option("password", "123456").load()
if region != 'all':
food_df = food_df.filter(food_df.region == region)
food_df = food_df.filter((food_df.consumption_date >= start_date) & (food_df.consumption_date <= end_date))
food_df.createOrReplaceTempView("consumption_data")
trend_sql = """
SELECT
DATE_FORMAT(consumption_date, 'yyyy-MM') as month,
food_category,
SUM(consumption_amount) as total_consumption,
AVG(consumption_amount) as avg_consumption,
COUNT(DISTINCT consumer_id) as consumer_count
FROM consumption_data
GROUP BY DATE_FORMAT(consumption_date, 'yyyy-MM'), food_category
ORDER BY month, food_category
"""
trend_result = spark.sql(trend_sql)
trend_pandas = trend_result.toPandas()
growth_rate_data = []
for category in trend_pandas['food_category'].unique():
category_data = trend_pandas[trend_pandas['food_category'] == category].sort_values('month')
if len(category_data) > 1:
current_consumption = category_data.iloc[-1]['total_consumption']
previous_consumption = category_data.iloc[-2]['total_consumption']
growth_rate = ((current_consumption - previous_consumption) / previous_consumption) * 100 if previous_consumption > 0 else 0
growth_rate_data.append({
'category': category,
'current_month_consumption': float(current_consumption),
'growth_rate': round(float(growth_rate), 2)
})
seasonal_analysis_sql = """
SELECT
MONTH(consumption_date) as month_num,
food_category,
AVG(consumption_amount) as seasonal_avg
FROM consumption_data
GROUP BY MONTH(consumption_date), food_category
ORDER BY month_num
"""
seasonal_result = spark.sql(seasonal_analysis_sql)
seasonal_pandas = seasonal_result.toPandas()
seasonal_trend = seasonal_pandas.groupby('month_num')['seasonal_avg'].mean().to_dict()
return JsonResponse({
'success': True,
'trend_data': trend_pandas.to_dict('records'),
'growth_analysis': growth_rate_data,
'seasonal_pattern': seasonal_trend,
'total_records': trend_result.count()
})
except Exception as e:
return JsonResponse({'success': False, 'error': str(e)})
class FoodCategoryComparisonView(View):
def post(self, request):
try:
data = json.loads(request.body)
comparison_period = data.get('period', 'month')
selected_categories = data.get('categories', [])
food_df = spark.read.format("jdbc").option("url", "jdbc:mysql://localhost:3306/food_consumption").option("dbtable", "food_consumption_data").option("user", "root").option("password", "123456").load()
if selected_categories:
category_filter = " OR ".join([f"food_category = '{cat}'" for cat in selected_categories])
food_df = food_df.filter(category_filter)
food_df.createOrReplaceTempView("category_comparison")
if comparison_period == 'month':
time_format = 'yyyy-MM'
elif comparison_period == 'quarter':
time_format = 'yyyy-qq'
else:
time_format = 'yyyy'
comparison_sql = f"""
SELECT
food_category,
DATE_FORMAT(consumption_date, '{time_format}') as time_period,
SUM(consumption_amount) as total_amount,
AVG(consumption_amount) as avg_amount,
MAX(consumption_amount) as max_amount,
MIN(consumption_amount) as min_amount,
STDDEV(consumption_amount) as std_deviation,
COUNT(*) as consumption_frequency
FROM category_comparison
GROUP BY food_category, DATE_FORMAT(consumption_date, '{time_format}')
ORDER BY time_period, food_category
"""
comparison_result = spark.sql(comparison_sql)
comparison_pandas = comparison_result.toPandas()
category_ranking_sql = """
SELECT
food_category,
SUM(consumption_amount) as category_total,
AVG(consumption_amount) as category_avg,
COUNT(DISTINCT consumer_id) as unique_consumers,
ROUND(SUM(consumption_amount) * 100.0 / (SELECT SUM(consumption_amount) FROM category_comparison), 2) as market_share
FROM category_comparison
GROUP BY food_category
ORDER BY category_total DESC
"""
ranking_result = spark.sql(category_ranking_sql)
ranking_pandas = ranking_result.toPandas()
correlation_matrix = comparison_pandas.pivot(index='time_period', columns='food_category', values='total_amount').corr()
correlation_data = correlation_matrix.fillna(0).to_dict()
consumption_volatility = comparison_pandas.groupby('food_category')['total_amount'].std().to_dict()
return JsonResponse({
'success': True,
'comparison_data': comparison_pandas.to_dict('records'),
'category_ranking': ranking_pandas.to_dict('records'),
'correlation_analysis': correlation_data,
'volatility_analysis': consumption_volatility,
'analysis_period': comparison_period
})
except Exception as e:
return JsonResponse({'success': False, 'error': str(e)})
class RegionalConsumptionDifferenceView(View):
def post(self, request):
try:
data = json.loads(request.body)
analysis_type = data.get('analysis_type', 'comprehensive')
target_food_category = data.get('food_category', 'all')
regional_df = spark.read.format("jdbc").option("url", "jdbc:mysql://localhost:3306/food_consumption").option("dbtable", "food_consumption_data").option("user", "root").option("password", "123456").load()
if target_food_category != 'all':
regional_df = regional_df.filter(regional_df.food_category == target_food_category)
regional_df.createOrReplaceTempView("regional_analysis")
regional_summary_sql = """
SELECT
region,
food_category,
SUM(consumption_amount) as total_consumption,
AVG(consumption_amount) as avg_consumption,
COUNT(DISTINCT consumer_id) as consumer_count,
MAX(consumption_amount) as max_single_consumption,
PERCENTILE_APPROX(consumption_amount, 0.5) as median_consumption,
PERCENTILE_APPROX(consumption_amount, 0.95) as p95_consumption
FROM regional_analysis
GROUP BY region, food_category
ORDER BY region, total_consumption DESC
"""
summary_result = spark.sql(regional_summary_sql)
summary_pandas = summary_result.toPandas()
regional_diversity_sql = """
SELECT
region,
COUNT(DISTINCT food_category) as category_diversity,
SUM(consumption_amount) as region_total_consumption,
AVG(consumption_amount) as region_avg_consumption
FROM regional_analysis
GROUP BY region
ORDER BY region_total_consumption DESC
"""
diversity_result = spark.sql(regional_diversity_sql)
diversity_pandas = diversity_result.toPandas()
consumption_intensity_analysis = []
for region in summary_pandas['region'].unique():
region_data = summary_pandas[summary_pandas['region'] == region]
total_regional_consumption = region_data['total_consumption'].sum()
category_distribution = {}
for _, row in region_data.iterrows():
category_distribution[row['food_category']] = {
'consumption_amount': float(row['total_consumption']),
'percentage': round((row['total_consumption'] / total_regional_consumption) * 100, 2),
'avg_per_consumer': round(float(row['avg_consumption']), 2)
}
consumption_intensity_analysis.append({
'region': region,
'total_consumption': float(total_regional_consumption),
'category_breakdown': category_distribution
})
regional_comparison_matrix = summary_pandas.pivot(index='region', columns='food_category', values='avg_consumption').fillna(0)
regional_similarity = regional_comparison_matrix.corr().to_dict()
outlier_detection_sql = """
SELECT
region,
food_category,
consumption_amount,
consumer_id,
consumption_date
FROM regional_analysis
WHERE consumption_amount > (
SELECT PERCENTILE_APPROX(consumption_amount, 0.95)
FROM regional_analysis r2
WHERE r2.region = regional_analysis.region
AND r2.food_category = regional_analysis.food_category
)
ORDER BY consumption_amount DESC
LIMIT 20
"""
outlier_result = spark.sql(outlier_detection_sql)
outlier_pandas = outlier_result.toPandas()
return JsonResponse({
'success': True,
'regional_summary': summary_pandas.to_dict('records'),
'diversity_analysis': diversity_pandas.to_dict('records'),
'consumption_intensity': consumption_intensity_analysis,
'regional_similarity': regional_similarity,
'consumption_outliers': outlier_pandas.to_dict('records'),
'analysis_timestamp': datetime.now().strftime('%Y-%m-%d %H:%M:%S')
})
except Exception as e:
return JsonResponse({'success': False, 'error': str(e)})
六.系统文档展示
结束
💛💛想说的话:感谢大家的关注与支持! 💕💕文末获取源码联系 计算机程序员小杨 💜💜 网站实战项目 安卓/小程序实战项目 大数据实战项目 深度学习实战项目 计算机毕业设计选题 💜💜