💖💖作者:计算机毕业设计杰瑞 💙💙个人简介:曾长期从事计算机专业培训教学,本人也热爱上课教学,语言擅长Java、微信小程序、Python、Golang、安卓Android等,开发项目包括大数据、深度学习、网站、小程序、安卓、算法。平常会做一些项目定制化开发、代码讲解、答辩教学、文档编写、也懂一些降重方面的技巧。平常喜欢分享一些自己开发中遇到的问题的解决办法,也喜欢交流技术,大家有技术代码这一块的问题可以问我! 💛💛想说的话:感谢大家的关注与支持! 💜💜 网站实战项目 安卓/小程序实战项目 大数据实战项目 深度学校实战项目 计算机毕业设计选题推荐
基于大数据的肾脏疾病风险数据分析系统介绍
肾脏疾病风险数据分析系统是一套基于Hadoop分布式计算框架和Spark大数据处理引擎构建的医疗数据分析平台。该系统采用Python语言结合Django后端框架进行核心业务逻辑开发,前端使用Vue.js配合ElementUI组件库实现用户交互界面,通过Echarts图表库完成数据可视化展示。系统核心功能涵盖肾脏疾病风险数据的全生命周期管理,包括患者基础信息维护、肾功能生化指标的深度分析、疾病合并症对肾脏功能影响的评估、患者人口特征的统计分析、肾脏疾病风险等级的智能预测、多因子相关性的数学建模分析以及综合性数据可视化大屏展示。系统利用Spark SQL进行海量医疗数据的快速查询处理,结合Pandas和NumPy科学计算库实现复杂的统计分析算法,通过HDFS分布式文件系统保障数据存储的可靠性和可扩展性,为医疗机构提供高效准确的肾脏疾病风险评估工具,助力临床决策的科学化和精准化。
基于大数据的肾脏疾病风险数据分析系统演示视频
基于大数据的肾脏疾病风险数据分析系统演示图片
基于大数据的肾脏疾病风险数据分析系统代码展示
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, avg, count, when, sum as spark_sum, corr, desc, asc
from pyspark.ml.stat import Correlation
from pyspark.ml.feature import VectorAssembler
import pandas as pd
import numpy as np
from django.http import JsonResponse
from django.views.decorators.csrf import csrf_exempt
import json
spark = SparkSession.builder.appName("KidneyDiseaseRiskAnalysis").config("spark.sql.adaptive.enabled", "true").config("spark.sql.adaptive.coalescePartitions.enabled", "true").getOrCreate()
def kidney_function_biochemical_analysis(request):
patient_data = spark.sql("SELECT patient_id, creatinine, urea, gfr, protein, albumin, age, gender FROM kidney_patients")
creatinine_stats = patient_data.agg(avg("creatinine").alias("avg_creatinine"), count("creatinine").alias("total_count")).collect()[0]
urea_abnormal_count = patient_data.filter(col("urea") > 7.5).count()
urea_total_count = patient_data.count()
urea_abnormal_rate = (urea_abnormal_count / urea_total_count) * 100
gfr_stage_analysis = patient_data.withColumn("gfr_stage",
when(col("gfr") >= 90, "正常")
.when(col("gfr").between(60, 89), "轻度下降")
.when(col("gfr").between(30, 59), "中度下降")
.when(col("gfr").between(15, 29), "重度下降")
.otherwise("肾衰竭")
).groupBy("gfr_stage").count().orderBy(desc("count"))
protein_risk_analysis = patient_data.withColumn("protein_risk",
when(col("protein") > 0.3, "高风险")
.when(col("protein").between(0.15, 0.3), "中风险")
.otherwise("低风险")
).groupBy("protein_risk").count()
age_gender_biochemical = patient_data.groupBy("age", "gender").agg(
avg("creatinine").alias("avg_creatinine"),
avg("urea").alias("avg_urea"),
avg("gfr").alias("avg_gfr")
).orderBy("age")
albumin_correlation = patient_data.stat.corr("albumin", "gfr")
biochemical_trends = patient_data.select("creatinine", "urea", "gfr", "protein", "albumin").toPandas()
correlation_matrix = biochemical_trends.corr()
creatinine_percentiles = np.percentile(biochemical_trends['creatinine'], [25, 50, 75])
gfr_distribution = patient_data.groupBy("gfr_stage").agg(
avg("creatinine").alias("avg_creatinine_by_stage"),
avg("urea").alias("avg_urea_by_stage")
)
abnormal_indicators_count = patient_data.withColumn("abnormal_count",
when(col("creatinine") > 133, 1).otherwise(0) +
when(col("urea") > 7.5, 1).otherwise(0) +
when(col("gfr") < 60, 1).otherwise(0) +
when(col("protein") > 0.3, 1).otherwise(0)
).groupBy("abnormal_count").count()
monthly_biochemical_trend = spark.sql("""
SELECT MONTH(test_date) as month,
AVG(creatinine) as avg_monthly_creatinine,
AVG(gfr) as avg_monthly_gfr,
COUNT(*) as test_count
FROM kidney_patients
GROUP BY MONTH(test_date)
ORDER BY month
""")
result_data = {
'creatinine_average': float(creatinine_stats['avg_creatinine']),
'urea_abnormal_rate': round(urea_abnormal_rate, 2),
'gfr_stage_distribution': [{'stage': row['gfr_stage'], 'count': row['count']} for row in gfr_stage_analysis.collect()],
'protein_risk_distribution': [{'risk_level': row['protein_risk'], 'count': row['count']} for row in protein_risk_analysis.collect()],
'correlation_matrix': correlation_matrix.to_dict(),
'creatinine_percentiles': creatinine_percentiles.tolist(),
'albumin_gfr_correlation': round(albumin_correlation, 4)
}
return JsonResponse(result_data)
def disease_complication_impact_analysis(request):
complications_data = spark.sql("""
SELECT patient_id, diabetes, hypertension, cardiovascular_disease,
chronic_kidney_disease_stage, gfr, creatinine, proteinuria_level
FROM kidney_patients p
JOIN patient_complications c ON p.patient_id = c.patient_id
""")
diabetes_impact = complications_data.groupBy("diabetes").agg(
avg("gfr").alias("avg_gfr"),
avg("creatinine").alias("avg_creatinine"),
count("patient_id").alias("patient_count")
)
hypertension_kidney_correlation = complications_data.filter(col("hypertension") == 1).agg(
avg("gfr").alias("hypertension_avg_gfr"),
count("patient_id").alias("hypertension_count")
).collect()[0]
cardiovascular_kidney_impact = complications_data.withColumn("cardiovascular_kidney_risk",
when((col("cardiovascular_disease") == 1) & (col("gfr") < 60), "高风险")
.when((col("cardiovascular_disease") == 1) & (col("gfr") >= 60), "中风险")
.when((col("cardiovascular_disease") == 0) & (col("gfr") < 60), "肾脏风险")
.otherwise("低风险")
).groupBy("cardiovascular_kidney_risk").count()
multiple_complications_analysis = complications_data.withColumn("complication_count",
col("diabetes") + col("hypertension") + col("cardiovascular_disease")
).groupBy("complication_count").agg(
avg("gfr").alias("avg_gfr_by_complications"),
avg("creatinine").alias("avg_creatinine_by_complications"),
count("patient_id").alias("patients_with_complications")
).orderBy("complication_count")
stage_complication_matrix = complications_data.groupBy("chronic_kidney_disease_stage", "diabetes", "hypertension").count()
proteinuria_complication_relationship = complications_data.groupBy("proteinuria_level").agg(
spark_sum("diabetes").alias("diabetes_cases"),
spark_sum("hypertension").alias("hypertension_cases"),
spark_sum("cardiovascular_disease").alias("cardiovascular_cases"),
count("patient_id").alias("total_patients")
)
severity_score_calculation = complications_data.withColumn("severity_score",
(col("diabetes") * 2) + (col("hypertension") * 1.5) + (col("cardiovascular_disease") * 2.5) +
when(col("gfr") < 30, 3).when(col("gfr") < 60, 2).otherwise(0)
)
high_risk_patients = severity_score_calculation.filter(col("severity_score") >= 5).count()
total_patients = complications_data.count()
high_risk_percentage = (high_risk_patients / total_patients) * 100
complication_progression_risk = complications_data.withColumn("progression_risk",
when((col("diabetes") == 1) & (col("hypertension") == 1) & (col("gfr") < 45), "极高风险")
.when((col("diabetes") == 1) & (col("gfr") < 60), "高风险")
.when((col("hypertension") == 1) & (col("proteinuria_level") > 1), "中高风险")
.otherwise("标准风险")
).groupBy("progression_risk").count().orderBy(desc("count"))
age_complication_interaction = spark.sql("""
SELECT age_group,
AVG(CASE WHEN diabetes = 1 THEN gfr END) as avg_gfr_diabetes,
AVG(CASE WHEN hypertension = 1 THEN gfr END) as avg_gfr_hypertension,
COUNT(*) as group_size
FROM (
SELECT *,
CASE WHEN age < 40 THEN '青年'
WHEN age BETWEEN 40 AND 60 THEN '中年'
ELSE '老年' END as age_group
FROM kidney_patients p JOIN patient_complications c ON p.patient_id = c.patient_id
) GROUP BY age_group
""")
result_data = {
'diabetes_kidney_impact': [{'diabetes_status': row['diabetes'], 'avg_gfr': float(row['avg_gfr']), 'patient_count': row['patient_count']} for row in diabetes_impact.collect()],
'hypertension_avg_gfr': float(hypertension_kidney_correlation['hypertension_avg_gfr']),
'cardiovascular_risk_distribution': [{'risk_level': row['cardiovascular_kidney_risk'], 'count': row['count']} for row in cardiovascular_kidney_impact.collect()],
'multiple_complications_impact': [{'complication_count': row['complication_count'], 'avg_gfr': float(row['avg_gfr_by_complications'])} for row in multiple_complications_analysis.collect()],
'high_risk_patient_percentage': round(high_risk_percentage, 2),
'progression_risk_distribution': [{'risk_level': row['progression_risk'], 'patient_count': row['count']} for row in complication_progression_risk.collect()]
}
return JsonResponse(result_data)
def multi_factor_correlation_analysis(request):
correlation_dataset = spark.sql("""
SELECT age, gender, bmi, systolic_bp, diastolic_bp,
creatinine, urea, gfr, protein, albumin, hemoglobin,
diabetes, hypertension, smoking_status, family_history
FROM kidney_patients p
JOIN patient_lifestyle l ON p.patient_id = l.patient_id
JOIN patient_complications c ON p.patient_id = c.patient_id
""")
numeric_features = ['age', 'bmi', 'systolic_bp', 'diastolic_bp', 'creatinine', 'urea', 'gfr', 'protein', 'albumin', 'hemoglobin']
assembler = VectorAssembler(inputCols=numeric_features, outputCol="features")
feature_vector = assembler.transform(correlation_dataset)
correlation_matrix = Correlation.corr(feature_vector, "features").head()[0].toArray()
correlation_df = pd.DataFrame(correlation_matrix, columns=numeric_features, index=numeric_features)
strong_correlations = []
for i in range(len(numeric_features)):
for j in range(i+1, len(numeric_features)):
corr_value = correlation_matrix[i][j]
if abs(corr_value) > 0.5:
strong_correlations.append({
'factor1': numeric_features[i],
'factor2': numeric_features[j],
'correlation': round(float(corr_value), 4)
})
age_related_correlations = correlation_dataset.stat.corr("age", "gfr"), correlation_dataset.stat.corr("age", "creatinine"), correlation_dataset.stat.corr("age", "systolic_bp")
bmi_kidney_function = correlation_dataset.groupBy(
when(col("bmi") < 18.5, "偏瘦")
.when(col("bmi").between(18.5, 24), "正常")
.when(col("bmi").between(24, 28), "超重")
.otherwise("肥胖").alias("bmi_category")
).agg(
avg("gfr").alias("avg_gfr"),
avg("creatinine").alias("avg_creatinine"),
count("*").alias("category_count")
)
blood_pressure_kidney_correlation = correlation_dataset.withColumn("bp_category",
when((col("systolic_bp") >= 140) | (col("diastolic_bp") >= 90), "高血压")
.when((col("systolic_bp").between(120, 139)) | (col("diastolic_bp").between(80, 89)), "高血压前期")
.otherwise("正常血压")
).groupBy("bp_category").agg(
avg("gfr").alias("avg_gfr_by_bp"),
avg("protein").alias("avg_protein_by_bp"),
count("*").alias("bp_category_count")
)
lifestyle_kidney_impact = correlation_dataset.groupBy("smoking_status").agg(
avg("gfr").alias("avg_gfr_by_smoking"),
avg("albumin").alias("avg_albumin_by_smoking")
)
family_history_correlation = correlation_dataset.filter(col("family_history") == 1).agg(
avg("gfr").alias("family_history_avg_gfr"),
avg("creatinine").alias("family_history_avg_creatinine")
).collect()[0]
hemoglobin_kidney_relationship = correlation_dataset.stat.corr("hemoglobin", "gfr")
gender_specific_correlations = correlation_dataset.groupBy("gender").agg(
corr("age", "gfr").alias("age_gfr_correlation"),
corr("bmi", "creatinine").alias("bmi_creatinine_correlation"),
avg("gfr").alias("gender_avg_gfr")
)
multi_risk_factor_score = correlation_dataset.withColumn("risk_score",
when(col("age") > 65, 2).when(col("age") > 45, 1).otherwise(0) +
when(col("diabetes") == 1, 2).otherwise(0) +
when(col("hypertension") == 1, 1).otherwise(0) +
when(col("smoking_status") == 1, 1).otherwise(0) +
when(col("bmi") > 28, 1).otherwise(0) +
when(col("family_history") == 1, 1).otherwise(0)
).groupBy("risk_score").agg(
avg("gfr").alias("avg_gfr_by_risk_score"),
count("*").alias("patients_per_risk_level")
).orderBy("risk_score")
predictive_factor_importance = correlation_dataset.select(
corr("creatinine", "gfr").alias("creatinine_gfr_corr"),
corr("protein", "gfr").alias("protein_gfr_corr"),
corr("systolic_bp", "gfr").alias("bp_gfr_corr"),
corr("bmi", "gfr").alias("bmi_gfr_corr")
).collect()[0]
result_data = {
'strong_correlations': strong_correlations,
'age_kidney_correlations': {
'age_gfr': round(age_related_correlations[0], 4),
'age_creatinine': round(age_related_correlations[1], 4),
'age_bp': round(age_related_correlations[2], 4)
},
'bmi_kidney_function': [{'bmi_category': row['bmi_category'], 'avg_gfr': float(row['avg_gfr']), 'count': row['category_count']} for row in bmi_kidney_function.collect()],
'blood_pressure_impact': [{'bp_category': row['bp_category'], 'avg_gfr': float(row['avg_gfr_by_bp'])} for row in blood_pressure_kidney_correlation.collect()],
'hemoglobin_kidney_correlation': round(hemoglobin_kidney_relationship, 4),
'family_history_impact': {
'avg_gfr': float(family_history_correlation['family_history_avg_gfr']),
'avg_creatinine': float(family_history_correlation['family_history_avg_creatinine'])
},
'multi_risk_distribution': [{'risk_score': row['risk_score'], 'avg_gfr': float(row['avg_gfr_by_risk_score']), 'patient_count': row['patients_per_risk_level']} for row in multi_risk_factor_score.collect()],
'predictive_factor_importance': {
'creatinine_importance': abs(round(float(predictive_factor_importance['creatinine_gfr_corr']), 4)),
'protein_importance': abs(round(float(predictive_factor_importance['protein_gfr_corr']), 4)),
'bp_importance': abs(round(float(predictive_factor_importance['bp_gfr_corr']), 4))
}
}
return JsonResponse(result_data)
基于大数据的肾脏疾病风险数据分析系统文档展示
💖💖作者:计算机毕业设计杰瑞 💙💙个人简介:曾长期从事计算机专业培训教学,本人也热爱上课教学,语言擅长Java、微信小程序、Python、Golang、安卓Android等,开发项目包括大数据、深度学习、网站、小程序、安卓、算法。平常会做一些项目定制化开发、代码讲解、答辩教学、文档编写、也懂一些降重方面的技巧。平常喜欢分享一些自己开发中遇到的问题的解决办法,也喜欢交流技术,大家有技术代码这一块的问题可以问我! 💛💛想说的话:感谢大家的关注与支持! 💜💜 网站实战项目 安卓/小程序实战项目 大数据实战项目 深度学校实战项目 计算机毕业设计选题推荐