Python大数据毕设选题:基于Spark的金融数据分析系统Django版本详解|系统设计

48 阅读5分钟

前言

一.开发工具简介

  • 大数据框架:Hadoop+Spark(本次没用Hive,支持定制)
  • 开发语言:Python+Java(两个版本都支持)
  • 后端框架:Django+Spring Boot(Spring+SpringMVC+Mybatis)(两个版本都支持)
  • 前端:Vue+ElementUI+Echarts+HTML+CSS+JavaScript+jQuery
  • 详细技术点:Hadoop、HDFS、Spark、Spark SQL、Pandas、NumPy
  • 数据库:MySQL

二.系统内容简介

《基于大数据的金融数据分析与可视化系统》是一个集数据采集、存储、分析和可视化于一体的综合性金融数据处理平台。系统采用Python作为主要开发语言,结合Django框架构建Web应用架构,通过Hadoop分布式文件系统实现海量金融数据的可靠存储,利用Spark引擎进行高效的分布式数据计算和分析处理。前端采用Vue.js配合ElementUI组件库构建用户界面,集成ECharts图表库实现丰富的数据可视化效果。系统核心功能涵盖用户管理、金融数据管理、客户行为分析、客户画像分析、宏观经济分析、营销成效分析等模块,并提供直观的可视化大屏展示。通过Spark SQL进行复杂的数据查询和统计分析,结合Pandas和NumPy进行数据预处理和科学计算,为金融机构提供全方位的数据分析解决方案。

三.系统功能演示

Python大数据毕设选题:基于Spark的金融数据分析系统Django版本详解|系统设计

四.系统界面展示

在这里插入图片描述 在这里插入图片描述 在这里插入图片描述 在这里插入图片描述 在这里插入图片描述 在这里插入图片描述 在这里插入图片描述

五.系统源码展示



from pyspark.sql import SparkSession
from pyspark.sql.functions import col, sum, avg, count, when, desc, asc
from pyspark.ml.feature import VectorAssembler, StandardScaler
from pyspark.ml.clustering import KMeans
import pandas as pd
import numpy as np
from django.http import JsonResponse
from django.views import View

spark = SparkSession.builder.appName("FinancialDataAnalysis").config("spark.executor.memory", "2g").config("spark.driver.memory", "1g").getOrCreate()

class CustomerBehaviorAnalysis(View):
    def post(self, request):
        financial_data = spark.read.format("jdbc").option("url", "jdbc:mysql://localhost:3306/financial_db").option("dbtable", "customer_transactions").option("user", "root").option("password", "123456").load()
        customer_behavior_df = financial_data.groupBy("customer_id").agg(
            count("transaction_id").alias("transaction_count"),
            sum("transaction_amount").alias("total_amount"),
            avg("transaction_amount").alias("avg_amount"),
            count(when(col("transaction_type") == "deposit", 1)).alias("deposit_count"),
            count(when(col("transaction_type") == "withdrawal", 1)).alias("withdrawal_count"),
            count(when(col("transaction_type") == "transfer", 1)).alias("transfer_count")
        )
        behavior_stats = customer_behavior_df.select(
            avg("transaction_count").alias("avg_transaction_count"),
            avg("total_amount").alias("avg_total_amount"),
            avg("avg_amount").alias("overall_avg_amount")
        ).collect()[0]
        high_value_customers = customer_behavior_df.filter(col("total_amount") > behavior_stats["avg_total_amount"] * 2)
        active_customers = customer_behavior_df.filter(col("transaction_count") > behavior_stats["avg_transaction_count"] * 1.5)
        behavior_patterns = customer_behavior_df.withColumn("behavior_type", 
            when(col("deposit_count") > col("withdrawal_count"), "savings_oriented")
            .when(col("transfer_count") > col("deposit_count"), "transfer_heavy")
            .otherwise("balanced")
        )
        pattern_distribution = behavior_patterns.groupBy("behavior_type").count().orderBy(desc("count"))
        result_data = {
            "high_value_count": high_value_customers.count(),
            "active_count": active_customers.count(),
            "behavior_patterns": [row.asDict() for row in pattern_distribution.collect()],
            "average_stats": behavior_stats.asDict()
        }
        return JsonResponse(result_data)

class CustomerProfileAnalysis(View):
    def post(self, request):
        customer_data = spark.read.format("jdbc").option("url", "jdbc:mysql://localhost:3306/financial_db").option("dbtable", "customer_profiles").option("user", "root").option("password", "123456").load()
        transaction_data = spark.read.format("jdbc").option("url", "jdbc:mysql://localhost:3306/financial_db").option("dbtable", "customer_transactions").option("user", "root").option("password", "123456").load()
        customer_metrics = transaction_data.groupBy("customer_id").agg(
            sum("transaction_amount").alias("total_spending"),
            count("transaction_id").alias("transaction_frequency"),
            avg("transaction_amount").alias("avg_transaction"),
            count(when(col("transaction_type") == "investment", 1)).alias("investment_count")
        )
        profile_data = customer_data.join(customer_metrics, "customer_id", "left")
        feature_cols = ["age", "income", "total_spending", "transaction_frequency", "avg_transaction", "investment_count"]
        assembler = VectorAssembler(inputCols=feature_cols, outputCol="features")
        feature_vector = assembler.transform(profile_data.na.fill(0))
        scaler = StandardScaler(inputCol="features", outputCol="scaled_features")
        scaler_model = scaler.fit(feature_vector)
        scaled_data = scaler_model.transform(feature_vector)
        kmeans = KMeans(k=4, seed=42, featuresCol="scaled_features")
        kmeans_model = kmeans.fit(scaled_data)
        clustered_data = kmeans_model.transform(scaled_data)
        cluster_summary = clustered_data.groupBy("prediction").agg(
            count("customer_id").alias("customer_count"),
            avg("age").alias("avg_age"),
            avg("income").alias("avg_income"),
            avg("total_spending").alias("avg_spending"),
            avg("transaction_frequency").alias("avg_frequency")
        ).orderBy("prediction")
        cluster_profiles = []
        for row in cluster_summary.collect():
            profile = row.asDict()
            if profile["avg_spending"] > 50000 and profile["avg_income"] > 80000:
                profile["segment_name"] = "高价值客户"
            elif profile["avg_frequency"] > 20:
                profile["segment_name"] = "活跃客户"
            elif profile["avg_age"] < 35:
                profile["segment_name"] = "年轻客户"
            else:
                profile["segment_name"] = "普通客户"
            cluster_profiles.append(profile)
        profile_result = {
            "cluster_count": 4,
            "total_customers": clustered_data.count(),
            "customer_segments": cluster_profiles
        }
        return JsonResponse(profile_result)

class MacroEconomicAnalysis(View):
    def post(self, request):
        economic_data = spark.read.format("jdbc").option("url", "jdbc:mysql://localhost:3306/financial_db").option("dbtable", "economic_indicators").option("user", "root").option("password", "123456").load()
        market_data = spark.read.format("jdbc").option("url", "jdbc:mysql://localhost:3306/financial_db").option("dbtable", "market_data").option("user", "root").option("password", "123456").load()
        economic_trends = economic_data.select("date", "gdp_growth", "inflation_rate", "unemployment_rate", "interest_rate").orderBy("date")
        gdp_trend = economic_data.select(avg("gdp_growth").alias("avg_gdp"), 
                                       when(col("gdp_growth") > 0, 1).otherwise(0).alias("positive_growth")).groupBy().agg(
            avg("avg_gdp").alias("overall_gdp_avg"),
            sum("positive_growth").alias("positive_months")
        ).collect()[0]
        inflation_analysis = economic_data.select(
            avg("inflation_rate").alias("avg_inflation"),
            when(col("inflation_rate") > 3, 1).otherwise(0).alias("high_inflation")
        ).groupBy().agg(
            avg("avg_inflation").alias("overall_inflation_avg"),
            sum("high_inflation").alias("high_inflation_months")
        ).collect()[0]
        market_correlation = market_data.join(economic_data, "date", "inner")
        correlation_analysis = market_correlation.select(
            col("stock_index"),
            col("gdp_growth"),
            col("inflation_rate"),
            when(col("stock_index") > lag("stock_index").over(Window.orderBy("date")), 1).otherwise(0).alias("market_up")
        )
        market_performance = market_data.select(
            avg("stock_index").alias("avg_stock_index"),
            avg("bond_yield").alias("avg_bond_yield"),
            avg("currency_rate").alias("avg_currency_rate")
        ).collect()[0]
        economic_indicators = economic_data.select(
            "date",
            "gdp_growth",
            "inflation_rate", 
            "unemployment_rate",
            "interest_rate"
        ).orderBy(desc("date")).limit(12)
        macro_result = {
            "gdp_analysis": gdp_trend.asDict(),
            "inflation_analysis": inflation_analysis.asDict(),
            "market_performance": market_performance.asDict(),
            "recent_indicators": [row.asDict() for row in economic_indicators.collect()],
            "trend_summary": "基于分析结果的宏观经济趋势评估"
        }
        return JsonResponse(macro_result)

六.系统文档展示

在这里插入图片描述

结束