前言
💖💖作者:计算机程序员小杨 💙💙个人简介:我是一名计算机相关专业的从业者,擅长Java、微信小程序、Python、Golang、安卓Android等多个IT方向。会做一些项目定制化开发、代码讲解、答辩教学、文档编写、也懂一些降重方面的技巧。热爱技术,喜欢钻研新工具和框架,也乐于通过代码解决实际问题,大家有技术代码这一块的问题可以问我! 💛💛想说的话:感谢大家的关注与支持! 💕💕文末获取源码联系 计算机程序员小杨 💜💜 网站实战项目 安卓/小程序实战项目 大数据实战项目 深度学习实战项目 计算机毕业设计选题 💜💜
一.开发工具简介
大数据框架:Hadoop+Spark(本次没用Hive,支持定制) 开发语言:Python+Java(两个版本都支持) 后端框架:Django+Spring Boot(Spring+SpringMVC+Mybatis)(两个版本都支持) 前端:Vue+ElementUI+Echarts+HTML+CSS+JavaScript+jQuery 详细技术点:Hadoop、HDFS、Spark、Spark SQL、Pandas、NumPy 数据库:MySQL
二.系统内容简介
《健康风险预测数据可视化分析系统》是一个基于大数据技术构建的医疗健康分析平台,采用Hadoop+Spark分布式计算框架处理海量健康数据,运用Python语言结合Django后端框架构建稳定的服务架构。系统前端采用Vue+ElementUI+Echarts技术栈,为用户提供直观友好的交互界面和丰富的数据可视化展示。通过集成Spark SQL、Pandas、NumPy等数据处理工具,系统能够高效处理患者生命体征数据,实现健康风险智能预测、患者风险画像构建、氧气使用模式分析等核心功能。系统支持多维度的患者聚类分析,通过机器学习算法挖掘隐藏的健康风险模式,同时提供comprehensive的用户管理和数据管理功能,确保医疗数据的安全性和完整性。整个系统以MySQL作为数据存储基础,配合HDFS分布式文件系统,构建了一个完整的健康数据分析生态,为医疗机构提供科学的决策支持和风险预警服务。
三.系统功能演示
四.系统界面展示
五.系统源码展示
from pyspark.sql import SparkSession
from pyspark.sql.functions import *
from pyspark.ml.clustering import KMeans
from pyspark.ml.feature import VectorAssembler
from django.http import JsonResponse
from django.views.decorators.csrf import csrf_exempt
import pandas as pd
import numpy as np
import json
spark = SparkSession.builder.appName("HealthRiskPrediction").config("spark.sql.adaptive.enabled", "true").getOrCreate()
@csrf_exempt
def health_risk_prediction(request):
if request.method == 'POST':
patient_data = json.loads(request.body)
patient_id = patient_data['patient_id']
vital_signs_df = spark.sql(f"SELECT * FROM vital_signs WHERE patient_id = {patient_id}")
heart_rate_avg = vital_signs_df.agg(avg("heart_rate")).collect()[0][0]
blood_pressure_systolic_avg = vital_signs_df.agg(avg("blood_pressure_systolic")).collect()[0][0]
blood_pressure_diastolic_avg = vital_signs_df.agg(avg("blood_pressure_diastolic")).collect()[0][0]
temperature_avg = vital_signs_df.agg(avg("temperature")).collect()[0][0]
oxygen_saturation_avg = vital_signs_df.agg(avg("oxygen_saturation")).collect()[0][0]
risk_score = 0
if heart_rate_avg > 100 or heart_rate_avg < 60:
risk_score += 25
if blood_pressure_systolic_avg > 140 or blood_pressure_diastolic_avg > 90:
risk_score += 30
if temperature_avg > 38.5 or temperature_avg < 36.0:
risk_score += 20
if oxygen_saturation_avg < 95:
risk_score += 35
risk_factors = []
if heart_rate_avg > 100:
risk_factors.append("心率过速")
elif heart_rate_avg < 60:
risk_factors.append("心率过缓")
if blood_pressure_systolic_avg > 140:
risk_factors.append("高血压")
if temperature_avg > 38.5:
risk_factors.append("发热")
if oxygen_saturation_avg < 95:
risk_factors.append("血氧饱和度低")
risk_level = "低风险" if risk_score < 30 else "中风险" if risk_score < 60 else "高风险"
prediction_result = {
'patient_id': patient_id,
'risk_score': risk_score,
'risk_level': risk_level,
'risk_factors': risk_factors,
'vital_signs_analysis': {
'heart_rate_avg': round(heart_rate_avg, 2),
'blood_pressure_avg': f"{round(blood_pressure_systolic_avg, 1)}/{round(blood_pressure_diastolic_avg, 1)}",
'temperature_avg': round(temperature_avg, 2),
'oxygen_saturation_avg': round(oxygen_saturation_avg, 2)
}
}
return JsonResponse(prediction_result)
@csrf_exempt
def vital_signs_analysis(request):
if request.method == 'POST':
analysis_params = json.loads(request.body)
start_date = analysis_params['start_date']
end_date = analysis_params['end_date']
patient_id = analysis_params.get('patient_id', None)
query = f"SELECT * FROM vital_signs WHERE record_date BETWEEN '{start_date}' AND '{end_date}'"
if patient_id:
query += f" AND patient_id = {patient_id}"
vital_signs_df = spark.sql(query)
daily_stats = vital_signs_df.groupBy("record_date").agg(
avg("heart_rate").alias("avg_heart_rate"),
max("heart_rate").alias("max_heart_rate"),
min("heart_rate").alias("min_heart_rate"),
avg("blood_pressure_systolic").alias("avg_systolic"),
avg("blood_pressure_diastolic").alias("avg_diastolic"),
avg("temperature").alias("avg_temperature"),
avg("oxygen_saturation").alias("avg_oxygen_saturation")
).orderBy("record_date")
daily_stats_pandas = daily_stats.toPandas()
abnormal_records = vital_signs_df.filter(
(col("heart_rate") > 100) | (col("heart_rate") < 60) |
(col("blood_pressure_systolic") > 140) | (col("blood_pressure_diastolic") > 90) |
(col("temperature") > 38.5) | (col("temperature") < 36.0) |
(col("oxygen_saturation") < 95)
)
abnormal_count = abnormal_records.count()
total_records = vital_signs_df.count()
abnormal_rate = (abnormal_count / total_records) * 100 if total_records > 0 else 0
trend_analysis = {}
for column in ['heart_rate', 'blood_pressure_systolic', 'temperature', 'oxygen_saturation']:
values = daily_stats_pandas[f'avg_{column}' if column != 'blood_pressure_systolic' else 'avg_systolic'].values
if len(values) > 1:
trend_slope = np.polyfit(range(len(values)), values, 1)[0]
trend_analysis[column] = "上升" if trend_slope > 0 else "下降" if trend_slope < 0 else "稳定"
analysis_result = {
'daily_statistics': daily_stats_pandas.to_dict('records'),
'abnormal_rate': round(abnormal_rate, 2),
'abnormal_count': abnormal_count,
'total_records': total_records,
'trend_analysis': trend_analysis,
'summary': {
'avg_heart_rate': round(daily_stats_pandas['avg_heart_rate'].mean(), 2),
'avg_blood_pressure': f"{round(daily_stats_pandas['avg_systolic'].mean(), 1)}/{round(daily_stats_pandas['avg_diastolic'].mean(), 1)}",
'avg_temperature': round(daily_stats_pandas['avg_temperature'].mean(), 2),
'avg_oxygen_saturation': round(daily_stats_pandas['avg_oxygen_saturation'].mean(), 2)
}
}
return JsonResponse(analysis_result)
@csrf_exempt
def patient_clustering_analysis(request):
if request.method == 'POST':
clustering_params = json.loads(request.body)
k_clusters = clustering_params.get('k_clusters', 3)
patients_df = spark.sql("SELECT p.patient_id, p.age, p.gender, AVG(v.heart_rate) as avg_heart_rate, AVG(v.blood_pressure_systolic) as avg_systolic, AVG(v.blood_pressure_diastolic) as avg_diastolic, AVG(v.temperature) as avg_temperature, AVG(v.oxygen_saturation) as avg_oxygen_saturation FROM patients p JOIN vital_signs v ON p.patient_id = v.patient_id GROUP BY p.patient_id, p.age, p.gender")
patients_df = patients_df.na.drop()
gender_encoded = patients_df.withColumn("gender_encoded", when(col("gender") == "男", 1).otherwise(0))
feature_columns = ["age", "gender_encoded", "avg_heart_rate", "avg_systolic", "avg_diastolic", "avg_temperature", "avg_oxygen_saturation"]
assembler = VectorAssembler(inputCols=feature_columns, outputCol="features")
feature_df = assembler.transform(gender_encoded)
kmeans = KMeans(k=k_clusters, seed=42, featuresCol="features", predictionCol="cluster")
model = kmeans.fit(feature_df)
clustered_df = model.transform(feature_df)
cluster_stats = clustered_df.groupBy("cluster").agg(
count("patient_id").alias("patient_count"),
avg("age").alias("avg_age"),
avg("avg_heart_rate").alias("cluster_avg_heart_rate"),
avg("avg_systolic").alias("cluster_avg_systolic"),
avg("avg_temperature").alias("cluster_avg_temperature"),
avg("avg_oxygen_saturation").alias("cluster_avg_oxygen_saturation")
).orderBy("cluster")
cluster_analysis = cluster_stats.toPandas().to_dict('records')
patient_clusters = clustered_df.select("patient_id", "cluster", "age", "gender", "avg_heart_rate", "avg_systolic", "avg_temperature", "avg_oxygen_saturation").toPandas()
cluster_characteristics = []
for cluster_info in cluster_analysis:
cluster_id = cluster_info['cluster']
characteristics = []
if cluster_info['cluster_avg_heart_rate'] > 80:
characteristics.append("心率偏高")
if cluster_info['cluster_avg_systolic'] > 130:
characteristics.append("血压偏高")
if cluster_info['cluster_avg_temperature'] > 37.5:
characteristics.append("体温偏高")
if cluster_info['cluster_avg_oxygen_saturation'] < 97:
characteristics.append("血氧偏低")
if cluster_info['avg_age'] > 60:
characteristics.append("高龄群体")
cluster_characteristics.append({
'cluster_id': cluster_id,
'characteristics': characteristics,
'risk_level': "高风险" if len(characteristics) >= 3 else "中风险" if len(characteristics) >= 1 else "低风险"
})
clustering_result = {
'cluster_analysis': cluster_analysis,
'cluster_characteristics': cluster_characteristics,
'patient_distribution': patient_clusters.to_dict('records'),
'total_patients': len(patient_clusters),
'clustering_summary': {
'optimal_clusters': k_clusters,
'silhouette_score': model.summary.trainingCost
}
}
return JsonResponse(clustering_result)
六.系统文档展示
结束
💕💕文末获取源码联系 计算机程序员小杨