前言
- 💖💖作者:计算机程序员小杨
- 💙💙个人简介:我是一名计算机相关专业的从业者,擅长Java、微信小程序、Python、Golang、安卓Android等多个IT方向。会做一些项目定制化开发、代码讲解、答辩教学、文档编写、也懂一些降重方面的技巧。热爱技术,喜欢钻研新工具和框架,也乐于通过代码解决实际问题,大家有技术代码这一块的问题可以问我!
- 💛💛想说的话:感谢大家的关注与支持!
- 💕💕文末获取源码联系 计算机程序员小杨
- 💜💜
- 网站实战项目
- 安卓/小程序实战项目
- 大数据实战项目
- 深度学习实战项目
- 计算机毕业设计选题
- 💜💜
一.开发工具简介
- 大数据框架:Hadoop+Spark(本次没用Hive,支持定制)
- 开发语言:Python+Java(两个版本都支持)
- 后端框架:Django+Spring Boot(Spring+SpringMVC+Mybatis)(两个版本都支持)
- 前端:Vue+ElementUI+Echarts+HTML+CSS+JavaScript+jQuery
- 详细技术点:Hadoop、HDFS、Spark、Spark SQL、Pandas、NumPy
- 数据库:MySQL
二.系统内容简介
本系统是一套基于大数据技术栈的信用卡交易诈骗数据分析与可视化系统,采用Hadoop+Spark分布式计算框架作为核心技术架构,能够高效处理和分析海量信用卡交易数据。系统支持Python+Django和Java+Spring Boot两套完整的技术实现方案,前端采用Vue+ElementUI+Echarts技术栈构建现代化的交互界面,后端利用MySQL数据库进行数据存储管理。在大数据处理层面,系统深度集成了HDFS分布式文件系统、Spark SQL查询引擎以及Pandas、NumPy等数据科学库,实现对信用卡交易数据的全方位智能分析。功能模块涵盖首页展示、交易数据管理、总体态势分析、属性关联分析、聚类行为分析、时空特征分析、金额复合分析以及可视化大屏幕展示等八大核心板块,通过多维度的数据挖掘算法识别异常交易模式,并以直观的图表形式呈现分析结果。系统能够从时间分布、地理位置、交易金额、用户行为等多个维度深入挖掘诈骗交易的潜在规律,为金融机构提供有效的风险预警和决策支持,同时展现了大数据技术在金融反欺诈领域的实际应用价值。
三.系统功能演示
想看其他类型的计算机毕业设计作品也可以和我说都有 谢谢大家! 有技术这一块问题大家可以评论区交流或者私我~
四.系统界面展示
五.系统源码展示
# 核心功能1:聚类行为分析 - 基于Spark ML的异常交易聚类检测
def clustering_behavior_analysis(spark_session, transaction_data):
# 数据预处理和特征工程
feature_cols = ['amount', 'merchant_category', 'transaction_hour', 'day_of_week', 'user_age', 'account_balance', 'transaction_frequency']
# 构建特征向量
assembler = VectorAssembler(inputCols=feature_cols, outputCol="features")
feature_data = assembler.transform(transaction_data)
# 数据标准化
scaler = StandardScaler(inputCol="features", outputCol="scaledFeatures",
withStd=True, withMean=True)
scaler_model = scaler.fit(feature_data)
scaled_data = scaler_model.transform(feature_data)
# K-means聚类分析
kmeans = KMeans(k=5, seed=42, featuresCol="scaledFeatures", predictionCol="cluster")
kmeans_model = kmeans.fit(scaled_data)
clustered_data = kmeans_model.transform(scaled_data)
# 计算聚类中心距离,识别异常簇
cluster_centers = kmeans_model.clusterCenters()
cluster_stats = clustered_data.groupBy("cluster").agg(
count("*").alias("cluster_size"),
avg("amount").alias("avg_amount"),
stddev("amount").alias("std_amount"),
avg("transaction_frequency").alias("avg_frequency")
).collect()
# 异常评分计算
anomaly_threshold = 2.5
for row in clustered_data.collect():
cluster_id = row['cluster']
features = row['scaledFeatures'].toArray()
center = cluster_centers[cluster_id]
distance = np.linalg.norm(features - center)
if distance > anomaly_threshold:
# 标记为潜在诈骗交易
transaction_id = row['transaction_id']
risk_score = min(100, (distance / anomaly_threshold) * 50)
return clustered_data, cluster_stats
# 核心功能2:时空特征分析 - 基于Spark SQL的多维度时空模式挖掘
def spatiotemporal_feature_analysis(spark_session, transaction_data):
# 创建临时视图用于SQL查询
transaction_data.createOrReplaceTempView("transactions")
# 时间维度异常检测 - 识别非正常时间交易
time_pattern_sql = """
SELECT
user_id,
transaction_hour,
COUNT(*) as transaction_count,
AVG(amount) as avg_amount,
CASE
WHEN transaction_hour BETWEEN 2 AND 5 THEN 'high_risk_time'
WHEN transaction_hour BETWEEN 22 AND 24 THEN 'medium_risk_time'
ELSE 'normal_time'
END as time_risk_level
FROM transactions
GROUP BY user_id, transaction_hour
HAVING COUNT(*) > 3
"""
time_analysis = spark_session.sql(time_pattern_sql)
# 地理位置异常检测 - 识别跨地域异常交易
location_pattern_sql = """
WITH user_location_stats AS (
SELECT
user_id,
merchant_city,
COUNT(*) as city_transaction_count,
LAG(merchant_city) OVER (PARTITION BY user_id ORDER BY transaction_time) as prev_city,
transaction_time,
LAG(transaction_time) OVER (PARTITION BY user_id ORDER BY transaction_time) as prev_time
FROM transactions
)
SELECT
user_id,
merchant_city,
prev_city,
city_transaction_count,
CASE
WHEN merchant_city != prev_city AND
(unix_timestamp(transaction_time) - unix_timestamp(prev_time)) < 3600
THEN 'location_anomaly'
ELSE 'normal_location'
END as location_risk_flag,
(unix_timestamp(transaction_time) - unix_timestamp(prev_time))/3600 as time_diff_hours
FROM user_location_stats
WHERE prev_city IS NOT NULL
"""
location_analysis = spark_session.sql(location_pattern_sql)
# 综合时空风险评分计算
combined_risk_sql = """
SELECT
t.user_id,
t.transaction_id,
ta.time_risk_level,
la.location_risk_flag,
t.amount,
CASE
WHEN ta.time_risk_level = 'high_risk_time' AND la.location_risk_flag = 'location_anomaly' THEN 90
WHEN ta.time_risk_level = 'high_risk_time' OR la.location_risk_flag = 'location_anomaly' THEN 65
WHEN ta.time_risk_level = 'medium_risk_time' THEN 40
ELSE 15
END as spatiotemporal_risk_score
FROM transactions t
LEFT JOIN time_analysis ta ON t.user_id = ta.user_id AND t.transaction_hour = ta.transaction_hour
LEFT JOIN location_analysis la ON t.user_id = la.user_id
"""
return spark_session.sql(combined_risk_sql)
# 核心功能3:金额复合分析 - 基于统计学和机器学习的金额异常检测
def amount_composite_analysis(spark_session, transaction_data):
# 用户历史交易金额统计分析
user_amount_stats = transaction_data.groupBy("user_id").agg(
avg("amount").alias("avg_amount"),
stddev("amount").alias("std_amount"),
min("amount").alias("min_amount"),
max("amount").alias("max_amount"),
count("amount").alias("transaction_count")
)
# 基于Z-score的异常检测
transaction_with_stats = transaction_data.join(user_amount_stats, "user_id")
# 计算每笔交易的异常分数
def calculate_amount_anomaly_score(amount, avg_amount, std_amount, max_amount):
if std_amount == 0 or std_amount is None:
return 0
z_score = abs((amount - avg_amount) / std_amount)
# 多重异常检测规则
anomaly_score = 0
# Z-score异常检测
if z_score > 3:
anomaly_score += 40
elif z_score > 2:
anomaly_score += 25
# 金额阈值异常检测
if amount > max_amount * 2:
anomaly_score += 35
elif amount > avg_amount * 5:
anomaly_score += 20
# 小额高频异常检测
if amount < avg_amount * 0.1 and amount > 0:
anomaly_score += 15
return min(anomaly_score, 100)
# 注册UDF函数
amount_anomaly_udf = udf(calculate_amount_anomaly_score, FloatType())
# 应用异常检测
result_data = transaction_with_stats.withColumn(
"amount_anomaly_score",
amount_anomaly_udf(
col("amount"),
col("avg_amount"),
col("std_amount"),
col("max_amount")
)
)
# 金额模式分析 - 识别整数金额和特定模式
pattern_analysis = result_data.withColumn(
"amount_pattern_flag",
when(col("amount") % 100 == 0, "round_hundred")
.when(col("amount") % 50 == 0, "round_fifty")
.when(col("amount").cast("string").rlike(".*\\.00$"), "exact_dollar")
.otherwise("normal_pattern")
)
# 时间窗口内金额累计异常检测
window_spec = Window.partitionBy("user_id").orderBy("transaction_time").rowsBetween(-5, 0)
final_result = pattern_analysis.withColumn(
"rolling_amount_sum",
sum("amount").over(window_spec)
).withColumn(
"rolling_transaction_count",
count("amount").over(window_spec)
).withColumn(
"rolling_avg_amount",
avg("amount").over(window_spec)
)
# 最终综合风险评分
composite_score = final_result.withColumn(
"final_amount_risk_score",
col("amount_anomaly_score") +
when(col("amount_pattern_flag") != "normal_pattern", 10).otherwise(0) +
when(col("rolling_amount_sum") > col("avg_amount") * col("transaction_count") * 0.3, 15).otherwise(0)
)
return composite_score.select("user_id", "transaction_id", "amount", "amount_anomaly_score",
"amount_pattern_flag", "final_amount_risk_score")