前言
- 💖💖作者:计算机程序员小杨
- 💙💙个人简介:我是一名计算机相关专业的从业者,擅长Java、微信小程序、Python、Golang、安卓Android等多个IT方向。会做一些项目定制化开发、代码讲解、答辩教学、文档编写、也懂一些降重方面的技巧。热爱技术,喜欢钻研新工具和框架,也乐于通过代码解决实际问题,大家有技术代码这一块的问题可以问我!
- 💛💛想说的话:感谢大家的关注与支持!
- 💕💕文末获取源码联系 计算机程序员小杨
- 💜💜
- 网站实战项目
- 安卓/小程序实战项目
- 大数据实战项目
- 深度学习实战项目
- 计算机毕业设计选题
- 💜💜
一.开发工具简介
- 大数据框架:Hadoop+Spark(本次没用Hive,支持定制)
- 开发语言:Python+Java(两个版本都支持)
- 后端框架:Django+Spring Boot(Spring+SpringMVC+Mybatis)(两个版本都支持)
- 前端:Vue+ElementUI+Echarts+HTML+CSS+JavaScript+jQuery
- 详细技术点:Hadoop、HDFS、Spark、Spark SQL、Pandas、NumPy
- 数据库:MySQL
二.系统内容简介
《基于大数据的金融数据分析与可视化系统》是一个集数据采集、存储、分析和可视化于一体的综合性金融数据处理平台。系统采用Python作为主要开发语言,结合Django框架构建Web应用架构,通过Hadoop分布式文件系统实现海量金融数据的可靠存储,利用Spark引擎进行高效的分布式数据计算和分析处理。前端采用Vue.js配合ElementUI组件库构建用户界面,集成ECharts图表库实现丰富的数据可视化效果。系统核心功能涵盖用户管理、金融数据管理、客户行为分析、客户画像分析、宏观经济分析、营销成效分析等模块,并提供直观的可视化大屏展示。通过Spark SQL进行复杂的数据查询和统计分析,结合Pandas和NumPy进行数据预处理和科学计算,为金融机构提供全方位的数据分析解决方案。
三.系统功能演示
Python大数据毕设选题:基于Spark的金融数据分析系统Django版本详解|系统设计
四.系统界面展示
五.系统源码展示
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, sum, avg, count, when, desc, asc
from pyspark.ml.feature import VectorAssembler, StandardScaler
from pyspark.ml.clustering import KMeans
import pandas as pd
import numpy as np
from django.http import JsonResponse
from django.views import View
spark = SparkSession.builder.appName("FinancialDataAnalysis").config("spark.executor.memory", "2g").config("spark.driver.memory", "1g").getOrCreate()
class CustomerBehaviorAnalysis(View):
def post(self, request):
financial_data = spark.read.format("jdbc").option("url", "jdbc:mysql://localhost:3306/financial_db").option("dbtable", "customer_transactions").option("user", "root").option("password", "123456").load()
customer_behavior_df = financial_data.groupBy("customer_id").agg(
count("transaction_id").alias("transaction_count"),
sum("transaction_amount").alias("total_amount"),
avg("transaction_amount").alias("avg_amount"),
count(when(col("transaction_type") == "deposit", 1)).alias("deposit_count"),
count(when(col("transaction_type") == "withdrawal", 1)).alias("withdrawal_count"),
count(when(col("transaction_type") == "transfer", 1)).alias("transfer_count")
)
behavior_stats = customer_behavior_df.select(
avg("transaction_count").alias("avg_transaction_count"),
avg("total_amount").alias("avg_total_amount"),
avg("avg_amount").alias("overall_avg_amount")
).collect()[0]
high_value_customers = customer_behavior_df.filter(col("total_amount") > behavior_stats["avg_total_amount"] * 2)
active_customers = customer_behavior_df.filter(col("transaction_count") > behavior_stats["avg_transaction_count"] * 1.5)
behavior_patterns = customer_behavior_df.withColumn("behavior_type",
when(col("deposit_count") > col("withdrawal_count"), "savings_oriented")
.when(col("transfer_count") > col("deposit_count"), "transfer_heavy")
.otherwise("balanced")
)
pattern_distribution = behavior_patterns.groupBy("behavior_type").count().orderBy(desc("count"))
result_data = {
"high_value_count": high_value_customers.count(),
"active_count": active_customers.count(),
"behavior_patterns": [row.asDict() for row in pattern_distribution.collect()],
"average_stats": behavior_stats.asDict()
}
return JsonResponse(result_data)
class CustomerProfileAnalysis(View):
def post(self, request):
customer_data = spark.read.format("jdbc").option("url", "jdbc:mysql://localhost:3306/financial_db").option("dbtable", "customer_profiles").option("user", "root").option("password", "123456").load()
transaction_data = spark.read.format("jdbc").option("url", "jdbc:mysql://localhost:3306/financial_db").option("dbtable", "customer_transactions").option("user", "root").option("password", "123456").load()
customer_metrics = transaction_data.groupBy("customer_id").agg(
sum("transaction_amount").alias("total_spending"),
count("transaction_id").alias("transaction_frequency"),
avg("transaction_amount").alias("avg_transaction"),
count(when(col("transaction_type") == "investment", 1)).alias("investment_count")
)
profile_data = customer_data.join(customer_metrics, "customer_id", "left")
feature_cols = ["age", "income", "total_spending", "transaction_frequency", "avg_transaction", "investment_count"]
assembler = VectorAssembler(inputCols=feature_cols, outputCol="features")
feature_vector = assembler.transform(profile_data.na.fill(0))
scaler = StandardScaler(inputCol="features", outputCol="scaled_features")
scaler_model = scaler.fit(feature_vector)
scaled_data = scaler_model.transform(feature_vector)
kmeans = KMeans(k=4, seed=42, featuresCol="scaled_features")
kmeans_model = kmeans.fit(scaled_data)
clustered_data = kmeans_model.transform(scaled_data)
cluster_summary = clustered_data.groupBy("prediction").agg(
count("customer_id").alias("customer_count"),
avg("age").alias("avg_age"),
avg("income").alias("avg_income"),
avg("total_spending").alias("avg_spending"),
avg("transaction_frequency").alias("avg_frequency")
).orderBy("prediction")
cluster_profiles = []
for row in cluster_summary.collect():
profile = row.asDict()
if profile["avg_spending"] > 50000 and profile["avg_income"] > 80000:
profile["segment_name"] = "高价值客户"
elif profile["avg_frequency"] > 20:
profile["segment_name"] = "活跃客户"
elif profile["avg_age"] < 35:
profile["segment_name"] = "年轻客户"
else:
profile["segment_name"] = "普通客户"
cluster_profiles.append(profile)
profile_result = {
"cluster_count": 4,
"total_customers": clustered_data.count(),
"customer_segments": cluster_profiles
}
return JsonResponse(profile_result)
class MacroEconomicAnalysis(View):
def post(self, request):
economic_data = spark.read.format("jdbc").option("url", "jdbc:mysql://localhost:3306/financial_db").option("dbtable", "economic_indicators").option("user", "root").option("password", "123456").load()
market_data = spark.read.format("jdbc").option("url", "jdbc:mysql://localhost:3306/financial_db").option("dbtable", "market_data").option("user", "root").option("password", "123456").load()
economic_trends = economic_data.select("date", "gdp_growth", "inflation_rate", "unemployment_rate", "interest_rate").orderBy("date")
gdp_trend = economic_data.select(avg("gdp_growth").alias("avg_gdp"),
when(col("gdp_growth") > 0, 1).otherwise(0).alias("positive_growth")).groupBy().agg(
avg("avg_gdp").alias("overall_gdp_avg"),
sum("positive_growth").alias("positive_months")
).collect()[0]
inflation_analysis = economic_data.select(
avg("inflation_rate").alias("avg_inflation"),
when(col("inflation_rate") > 3, 1).otherwise(0).alias("high_inflation")
).groupBy().agg(
avg("avg_inflation").alias("overall_inflation_avg"),
sum("high_inflation").alias("high_inflation_months")
).collect()[0]
market_correlation = market_data.join(economic_data, "date", "inner")
correlation_analysis = market_correlation.select(
col("stock_index"),
col("gdp_growth"),
col("inflation_rate"),
when(col("stock_index") > lag("stock_index").over(Window.orderBy("date")), 1).otherwise(0).alias("market_up")
)
market_performance = market_data.select(
avg("stock_index").alias("avg_stock_index"),
avg("bond_yield").alias("avg_bond_yield"),
avg("currency_rate").alias("avg_currency_rate")
).collect()[0]
economic_indicators = economic_data.select(
"date",
"gdp_growth",
"inflation_rate",
"unemployment_rate",
"interest_rate"
).orderBy(desc("date")).limit(12)
macro_result = {
"gdp_analysis": gdp_trend.asDict(),
"inflation_analysis": inflation_analysis.asDict(),
"market_performance": market_performance.asDict(),
"recent_indicators": [row.asDict() for row in economic_indicators.collect()],
"trend_summary": "基于分析结果的宏观经济趋势评估"
}
return JsonResponse(macro_result)