基于大数据的全球经济指标数据分析系统 | 计算机专业的高光时刻:用Hadoop+Spark做全球经济指标分析的那些日子

53 阅读6分钟

💖💖作者:计算机毕业设计江挽 💙💙个人简介:曾长期从事计算机专业培训教学,本人也热爱上课教学,语言擅长Java、微信小程序、Python、Golang、安卓Android等,开发项目包括大数据、深度学习、网站、小程序、安卓、算法。平常会做一些项目定制化开发、代码讲解、答辩教学、文档编写、也懂一些降重方面的技巧。平常喜欢分享一些自己开发中遇到的问题的解决办法,也喜欢交流技术,大家有技术代码这一块的问题可以问我! 💛💛想说的话:感谢大家的关注与支持! 💜💜 网站实战项目 安卓/小程序实战项目 大数据实战项目 深度学习实战项目

基于大数据的全球经济指标数据分析系统介绍

全球经济指标数据分析系统是一套基于大数据技术架构的综合性经济数据分析平台,采用Hadoop+Spark分布式计算框架作为核心数据处理引擎,结合Python数据科学生态和Django Web框架构建后端服务,前端使用Vue+ElementUI+Echarts技术栈实现数据可视化展示。系统通过HDFS分布式文件系统存储海量经济指标数据,利用Spark SQL进行高效的数据查询和分析,配合Pandas、NumPy等科学计算库完成复杂的统计分析任务。系统功能涵盖经济指标数据管理、国家经济画像分析、全球经济格局分析、政府财政健康分析以及宏观经济健康分析等核心模块,为用户提供从数据采集、存储、处理到可视化展示的完整解决方案。通过交互式图表和动态仪表板,用户可以直观地观察全球各国经济发展趋势,深入分析经济指标间的关联性,为经济研究和决策提供数据支撑。

基于大数据的全球经济指标数据分析系统演示视频

演示视频

基于大数据的全球经济指标数据分析系统演示图片

在这里插入图片描述 在这里插入图片描述 在这里插入图片描述 在这里插入图片描述 在这里插入图片描述 在这里插入图片描述 在这里插入图片描述 在这里插入图片描述

基于大数据的全球经济指标数据分析系统代码展示

from pyspark.sql import SparkSession
from pyspark.sql.functions import col, sum as spark_sum, avg, max as spark_max, min as spark_min, when, desc
from pyspark.sql.types import StructType, StructField, StringType, DoubleType, IntegerType
import pandas as pd
import numpy as np
from django.http import JsonResponse
from django.views import View
import json

def create_spark_session():
    spark = SparkSession.builder \
        .appName("GlobalEconomicIndicatorAnalysis") \
        .config("spark.sql.adaptive.enabled", "true") \
        .config("spark.sql.adaptive.coalescePartitions.enabled", "true") \
        .getOrCreate()
    return spark

def economic_indicator_management(request):
    spark = create_spark_session()
    schema = StructType([
        StructField("country_code", StringType(), True),
        StructField("country_name", StringType(), True),
        StructField("year", IntegerType(), True),
        StructField("gdp", DoubleType(), True),
        StructField("inflation_rate", DoubleType(), True),
        StructField("unemployment_rate", DoubleType(), True),
        StructField("trade_balance", DoubleType(), True),
        StructField("government_debt", DoubleType(), True),
        StructField("population", IntegerType(), True)
    ])
    df = spark.read.format("csv").option("header", "true").schema(schema).load("hdfs://localhost:9000/economic_data/indicators.csv")
    df.createOrReplaceTempView("economic_indicators")
    country_filter = request.GET.get('country', '')
    year_filter = request.GET.get('year', '')
    query = "SELECT * FROM economic_indicators WHERE 1=1"
    if country_filter:
        query += f" AND country_name LIKE '%{country_filter}%'"
    if year_filter:
        query += f" AND year = {year_filter}"
    result_df = spark.sql(query)
    result_df = result_df.orderBy(desc("year"), desc("gdp"))
    enriched_df = result_df.withColumn("gdp_per_capita", col("gdp") / col("population"))
    enriched_df = enriched_df.withColumn("economic_health_score", 
        when((col("inflation_rate") < 3) & (col("unemployment_rate") < 5), "Healthy")
        .when((col("inflation_rate") < 5) & (col("unemployment_rate") < 8), "Moderate")
        .otherwise("Concerning"))
    processed_data = enriched_df.collect()
    data_list = []
    for row in processed_data:
        data_list.append({
            'country_code': row['country_code'],
            'country_name': row['country_name'],
            'year': row['year'],
            'gdp': float(row['gdp']) if row['gdp'] else 0,
            'inflation_rate': float(row['inflation_rate']) if row['inflation_rate'] else 0,
            'unemployment_rate': float(row['unemployment_rate']) if row['unemployment_rate'] else 0,
            'trade_balance': float(row['trade_balance']) if row['trade_balance'] else 0,
            'government_debt': float(row['government_debt']) if row['government_debt'] else 0,
            'population': row['population'],
            'gdp_per_capita': float(row['gdp_per_capita']) if row['gdp_per_capita'] else 0,
            'economic_health_score': row['economic_health_score']
        })
    spark.stop()
    return JsonResponse({'status': 'success', 'data': data_list, 'total': len(data_list)})

def country_economic_portrait_analysis(request):
    spark = create_spark_session()
    country_code = request.GET.get('country_code', 'CHN')
    df = spark.read.format("csv").option("header", "true").option("inferSchema", "true").load("hdfs://localhost:9000/economic_data/detailed_indicators.csv")
    df.createOrReplaceTempView("country_indicators")
    basic_stats = spark.sql(f"""
        SELECT country_name, 
               AVG(gdp) as avg_gdp,
               AVG(inflation_rate) as avg_inflation,
               AVG(unemployment_rate) as avg_unemployment,
               AVG(trade_balance) as avg_trade_balance,
               MAX(gdp) as max_gdp,
               MIN(gdp) as min_gdp,
               COUNT(*) as data_points
        FROM country_indicators 
        WHERE country_code = '{country_code}'
        GROUP BY country_name
    """).collect()
    trend_analysis = spark.sql(f"""
        SELECT year, gdp, inflation_rate, unemployment_rate, trade_balance,
               LAG(gdp) OVER (ORDER BY year) as prev_gdp,
               LAG(inflation_rate) OVER (ORDER BY year) as prev_inflation
        FROM country_indicators 
        WHERE country_code = '{country_code}'
        ORDER BY year
    """)
    trend_df = trend_analysis.withColumn("gdp_growth_rate", 
        when(col("prev_gdp").isNotNull(), ((col("gdp") - col("prev_gdp")) / col("prev_gdp") * 100)).otherwise(0))
    trend_df = trend_df.withColumn("inflation_change", 
        when(col("prev_inflation").isNotNull(), (col("inflation_rate") - col("prev_inflation"))).otherwise(0))
    trend_data = trend_df.select("year", "gdp", "gdp_growth_rate", "inflation_rate", "inflation_change", "unemployment_rate", "trade_balance").collect()
    pandas_df = pd.DataFrame([row.asDict() for row in trend_data])
    correlation_matrix = pandas_df[['gdp_growth_rate', 'inflation_rate', 'unemployment_rate']].corr()
    stability_score = calculate_economic_stability(pandas_df)
    competitiveness_ranking = spark.sql(f"""
        SELECT country_code, country_name, AVG(gdp) as avg_gdp,
               RANK() OVER (ORDER BY AVG(gdp) DESC) as gdp_ranking
        FROM country_indicators 
        WHERE year >= 2020
        GROUP BY country_code, country_name
    """).filter(col("country_code") == country_code).collect()
    portrait_result = {
        'basic_statistics': basic_stats[0].asDict() if basic_stats else {},
        'trend_analysis': [row.asDict() for row in trend_data],
        'correlation_analysis': correlation_matrix.to_dict(),
        'stability_score': stability_score,
        'global_ranking': competitiveness_ranking[0]['gdp_ranking'] if competitiveness_ranking else 0
    }
    spark.stop()
    return JsonResponse({'status': 'success', 'data': portrait_result})

def global_economic_pattern_analysis(request):
    spark = create_spark_session()
    df = spark.read.format("csv").option("header", "true").option("inferSchema", "true").load("hdfs://localhost:9000/economic_data/global_data.csv")
    df.createOrReplaceTempView("global_economy")
    regional_analysis = spark.sql("""
        SELECT region, 
               COUNT(DISTINCT country_code) as country_count,
               SUM(gdp) as total_gdp,
               AVG(gdp) as avg_gdp,
               AVG(inflation_rate) as avg_inflation,
               AVG(unemployment_rate) as avg_unemployment,
               SUM(population) as total_population
        FROM global_economy 
        WHERE year = 2023
        GROUP BY region
        ORDER BY total_gdp DESC
    """).collect()
    economic_powerhouse = spark.sql("""
        SELECT country_code, country_name, gdp, 
               RANK() OVER (ORDER BY gdp DESC) as gdp_rank,
               trade_balance, government_debt,
               (gdp / population) as gdp_per_capita
        FROM global_economy 
        WHERE year = 2023 AND gdp IS NOT NULL
        ORDER BY gdp DESC
        LIMIT 20
    """).collect()
    development_classification = spark.sql("""
        SELECT 
            CASE 
                WHEN gdp/population >= 40000 THEN 'Developed'
                WHEN gdp/population >= 12000 THEN 'Upper Middle Income'
                WHEN gdp/population >= 4000 THEN 'Lower Middle Income'
                ELSE 'Low Income'
            END as income_group,
            COUNT(*) as country_count,
            AVG(gdp/population) as avg_gdp_per_capita,
            AVG(inflation_rate) as avg_inflation
        FROM global_economy 
        WHERE year = 2023 AND population > 0
        GROUP BY 
            CASE 
                WHEN gdp/population >= 40000 THEN 'Developed'
                WHEN gdp/population >= 12000 THEN 'Upper Middle Income'
                WHEN gdp/population >= 4000 THEN 'Lower Middle Income'
                ELSE 'Low Income'
            END
        ORDER BY avg_gdp_per_capita DESC
    """).collect()
    trade_network = spark.sql("""
        SELECT country_code, country_name, trade_balance,
               SUM(trade_balance) OVER () as global_trade_sum,
               trade_balance / SUM(ABS(trade_balance)) OVER () * 100 as trade_share
        FROM global_economy 
        WHERE year = 2023 AND trade_balance IS NOT NULL
        ORDER BY ABS(trade_balance) DESC
        LIMIT 15
    """).collect()
    economic_volatility = spark.sql("""
        SELECT country_code, country_name,
               STDDEV(gdp) as gdp_volatility,
               STDDEV(inflation_rate) as inflation_volatility,
               AVG(inflation_rate) as avg_inflation
        FROM global_economy 
        WHERE year >= 2019
        GROUP BY country_code, country_name
        HAVING COUNT(*) >= 3
        ORDER BY gdp_volatility DESC
        LIMIT 10
    """).collect()
    pattern_result = {
        'regional_distribution': [row.asDict() for row in regional_analysis],
        'economic_powerhouse': [row.asDict() for row in economic_powerhouse],
        'development_classification': [row.asDict() for row in development_classification],
        'trade_network_analysis': [row.asDict() for row in trade_network],
        'volatility_ranking': [row.asDict() for row in economic_volatility]
    }
    spark.stop()
    return JsonResponse({'status': 'success', 'data': pattern_result})

def calculate_economic_stability(df):
    if len(df) < 3:
        return 0
    gdp_volatility = np.std(df['gdp_growth_rate'].dropna())
    inflation_volatility = np.std(df['inflation_rate'].dropna())
    unemployment_volatility = np.std(df['unemployment_rate'].dropna())
    stability_score = max(0, 100 - (gdp_volatility * 2 + inflation_volatility * 3 + unemployment_volatility * 2))
    return round(stability_score, 2)

基于大数据的全球经济指标数据分析系统文档展示

在这里插入图片描述

💖💖作者:计算机毕业设计江挽 💙💙个人简介:曾长期从事计算机专业培训教学,本人也热爱上课教学,语言擅长Java、微信小程序、Python、Golang、安卓Android等,开发项目包括大数据、深度学习、网站、小程序、安卓、算法。平常会做一些项目定制化开发、代码讲解、答辩教学、文档编写、也懂一些降重方面的技巧。平常喜欢分享一些自己开发中遇到的问题的解决办法,也喜欢交流技术,大家有技术代码这一块的问题可以问我! 💛💛想说的话:感谢大家的关注与支持! 💜💜 网站实战项目 安卓/小程序实战项目 大数据实战项目 深度学习实战项目