前言
- 💖💖作者:计算机程序员小杨
- 💙💙个人简介:我是一名计算机相关专业的从业者,擅长Java、微信小程序、Python、Golang、安卓Android等多个IT方向。会做一些项目定制化开发、代码讲解、答辩教学、文档编写、也懂一些降重方面的技巧。热爱技术,喜欢钻研新工具和框架,也乐于通过代码解决实际问题,大家有技术代码这一块的问题可以问我!
- 💛💛想说的话:感谢大家的关注与支持!
- 💕💕文末获取源码联系 计算机程序员小杨
- 💜💜
- 网站实战项目
- 安卓/小程序实战项目
- 大数据实战项目
- 深度学习实战项目
- 计算机毕业设计选题
- 💜💜
一.开发工具简介
- 大数据框架:Hadoop+Spark(本次没用Hive,支持定制)
- 开发语言:Python+Java(两个版本都支持)
- 后端框架:Django+Spring Boot(Spring+SpringMVC+Mybatis)(两个版本都支持)
- 前端:Vue+ElementUI+Echarts+HTML+CSS+JavaScript+jQuery
- 详细技术点:Hadoop、HDFS、Spark、Spark SQL、Pandas、NumPy
- 数据库:MySQL
二.系统内容简介
基于大数据的孕产妇健康风险数据可视化分析系统是一个集成Hadoop分布式存储、Spark大数据处理和Django Web框架的智能医疗分析平台。该系统通过部署Hadoop集群实现海量孕产妇健康数据的分布式存储,利用Spark强大的内存计算能力对HDFS中的医疗数据进行实时分析处理。系统采用Python作为主要开发语言,结合Django框架构建稳定的后端服务,前端使用Vue+ElementUI打造现代化的用户界面,通过Echarts实现丰富的数据可视化效果。核心功能涵盖用户管理、孕妇健康风险数据录入、基础健康数据分析、心血管风险评估、高风险人群筛查、代谢健康监测以及临床预警系统等模块。系统运用Spark SQL进行复杂的医疗数据查询,结合Pandas和NumPy进行科学计算,能够从多维度分析孕产妇的健康状况,识别潜在风险因素,为医护人员提供决策支持,最终形成一个完整的孕产妇健康管理生态系统。
三.系统功能演示
大数据+智慧医疗热潮下:孕产妇健康风险分析系统成毕设新宠|计算机毕业设计|数据可视化
四.系统界面展示
五.系统源码展示
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, when, count, avg, sum, max, min
from pyspark.sql.types import StructType, StructField, StringType, IntegerType, FloatType, DateType
import pandas as pd
import numpy as np
from django.http import JsonResponse
from django.views import View
import json
from datetime import datetime, timedelta
spark = SparkSession.builder.appName("PregnancyHealthRiskAnalysis").config("spark.sql.adaptive.enabled", "true").config("spark.sql.adaptive.coalescePartitions.enabled", "true").getOrCreate()
def analyze_cardiovascular_risk(request):
pregnancy_data = spark.read.format("jdbc").option("url", "jdbc:mysql://localhost:3306/pregnancy_health").option("dbtable", "pregnancy_health_data").option("user", "root").option("password", "password").load()
high_bp_threshold = 140
risk_score_df = pregnancy_data.withColumn("bp_risk_score", when(col("systolic_bp") >= high_bp_threshold, 3).when(col("systolic_bp") >= 130, 2).when(col("systolic_bp") >= 120, 1).otherwise(0))
risk_score_df = risk_score_df.withColumn("heart_rate_risk", when(col("heart_rate") >= 100, 2).when(col("heart_rate") >= 90, 1).otherwise(0))
risk_score_df = risk_score_df.withColumn("weight_gain_risk", when(col("weight_gain") >= 18, 3).when(col("weight_gain") >= 15, 2).when(col("weight_gain") >= 12, 1).otherwise(0))
cardiovascular_risk_df = risk_score_df.withColumn("total_cv_risk", col("bp_risk_score") + col("heart_rate_risk") + col("weight_gain_risk"))
cardiovascular_risk_df = cardiovascular_risk_df.withColumn("risk_level", when(col("total_cv_risk") >= 6, "高风险").when(col("total_cv_risk") >= 4, "中风险").when(col("total_cv_risk") >= 2, "低风险").otherwise("正常"))
risk_statistics = cardiovascular_risk_df.groupBy("risk_level").agg(count("patient_id").alias("patient_count"), avg("systolic_bp").alias("avg_systolic_bp"), avg("heart_rate").alias("avg_heart_rate"), avg("weight_gain").alias("avg_weight_gain"))
result_data = risk_statistics.collect()
analysis_result = []
for row in result_data:
analysis_result.append({"risk_level": row["risk_level"], "patient_count": row["patient_count"], "avg_systolic_bp": round(row["avg_systolic_bp"], 2), "avg_heart_rate": round(row["avg_heart_rate"], 2), "avg_weight_gain": round(row["avg_weight_gain"], 2)})
high_risk_patients = cardiovascular_risk_df.filter(col("risk_level") == "高风险").select("patient_id", "patient_name", "systolic_bp", "heart_rate", "weight_gain", "total_cv_risk").orderBy(col("total_cv_risk").desc()).limit(20)
high_risk_list = []
for patient in high_risk_patients.collect():
high_risk_list.append({"patient_id": patient["patient_id"], "patient_name": patient["patient_name"], "systolic_bp": patient["systolic_bp"], "heart_rate": patient["heart_rate"], "weight_gain": patient["weight_gain"], "risk_score": patient["total_cv_risk"]})
return JsonResponse({"status": "success", "risk_statistics": analysis_result, "high_risk_patients": high_risk_list, "total_analyzed": cardiovascular_risk_df.count()})
def detect_high_risk_population(request):
health_records = spark.read.format("jdbc").option("url", "jdbc:mysql://localhost:3306/pregnancy_health").option("dbtable", "pregnancy_health_data").option("user", "root").option("password", "password").load()
risk_factors_df = health_records.withColumn("age_risk", when(col("age") >= 35, 2).when(col("age") <= 18, 2).otherwise(0))
risk_factors_df = risk_factors_df.withColumn("bmi_risk", when(col("bmi") >= 30, 3).when(col("bmi") >= 25, 2).when(col("bmi") <= 18.5, 1).otherwise(0))
risk_factors_df = risk_factors_df.withColumn("diabetes_risk", when(col("blood_glucose") >= 7.0, 3).when(col("blood_glucose") >= 6.1, 2).when(col("blood_glucose") >= 5.6, 1).otherwise(0))
risk_factors_df = risk_factors_df.withColumn("hypertension_risk", when(col("systolic_bp") >= 140, 3).when(col("systolic_bp") >= 130, 2).when(col("systolic_bp") >= 120, 1).otherwise(0))
risk_factors_df = risk_factors_df.withColumn("history_risk", when(col("pregnancy_history").contains("流产"), 2).when(col("pregnancy_history").contains("早产"), 2).when(col("pregnancy_history").contains("难产"), 1).otherwise(0))
comprehensive_risk_df = risk_factors_df.withColumn("comprehensive_risk_score", col("age_risk") + col("bmi_risk") + col("diabetes_risk") + col("hypertension_risk") + col("history_risk"))
comprehensive_risk_df = comprehensive_risk_df.withColumn("risk_category", when(col("comprehensive_risk_score") >= 8, "极高风险").when(col("comprehensive_risk_score") >= 6, "高风险").when(col("comprehensive_risk_score") >= 4, "中风险").when(col("comprehensive_risk_score") >= 2, "低风险").otherwise("正常"))
high_risk_population = comprehensive_risk_df.filter(col("comprehensive_risk_score") >= 6)
risk_factor_analysis = high_risk_population.agg(avg("age").alias("avg_age"), avg("bmi").alias("avg_bmi"), avg("blood_glucose").alias("avg_glucose"), avg("systolic_bp").alias("avg_bp"), count("patient_id").alias("total_high_risk"))
risk_distribution = comprehensive_risk_df.groupBy("risk_category").agg(count("patient_id").alias("population_count"), avg("comprehensive_risk_score").alias("avg_risk_score"))
age_group_risk = comprehensive_risk_df.withColumn("age_group", when(col("age") <= 20, "≤20岁").when(col("age") <= 25, "21-25岁").when(col("age") <= 30, "26-30岁").when(col("age") <= 35, "31-35岁").otherwise("≥36岁")).groupBy("age_group", "risk_category").count().orderBy("age_group", "risk_category")
result_summary = risk_factor_analysis.collect()[0]
distribution_data = [{"category": row["risk_category"], "count": row["population_count"], "avg_score": round(row["avg_risk_score"], 2)} for row in risk_distribution.collect()]
age_risk_data = [{"age_group": row["age_group"], "risk_category": row["risk_category"], "count": row["count"]} for row in age_group_risk.collect()]
return JsonResponse({"status": "success", "summary": {"avg_age": round(result_summary["avg_age"], 1), "avg_bmi": round(result_summary["avg_bmi"], 2), "avg_glucose": round(result_summary["avg_glucose"], 2), "avg_bp": round(result_summary["avg_bp"], 1), "total_high_risk": result_summary["total_high_risk"]}, "risk_distribution": distribution_data, "age_risk_analysis": age_risk_data})
def analyze_metabolic_health(request):
metabolic_data = spark.read.format("jdbc").option("url", "jdbc:mysql://localhost:3306/pregnancy_health").option("dbtable", "pregnancy_health_data").option("user", "root").option("password", "password").load()
glucose_analysis_df = metabolic_data.withColumn("glucose_status", when(col("blood_glucose") >= 7.0, "糖尿病").when(col("blood_glucose") >= 6.1, "糖尿病前期").when(col("blood_glucose") >= 5.6, "糖耐量异常").otherwise("正常"))
insulin_resistance_df = glucose_analysis_df.withColumn("insulin_resistance_risk", when((col("fasting_insulin") >= 15) & (col("blood_glucose") >= 5.6), 3).when(col("fasting_insulin") >= 12, 2).when(col("fasting_insulin") >= 10, 1).otherwise(0))
metabolic_syndrome_df = insulin_resistance_df.withColumn("metabolic_syndrome_score", when((col("bmi") >= 25) & (col("systolic_bp") >= 130) & (col("blood_glucose") >= 5.6), 3).when((col("bmi") >= 25) & (col("systolic_bp") >= 130), 2).when((col("bmi") >= 25) & (col("blood_glucose") >= 5.6), 2).when(col("bmi") >= 25, 1).otherwise(0))
lipid_profile_df = metabolic_syndrome_df.withColumn("lipid_risk", when((col("total_cholesterol") >= 6.2) & (col("triglycerides") >= 2.3), 3).when(col("total_cholesterol") >= 6.2, 2).when(col("triglycerides") >= 2.3, 2).when(col("hdl_cholesterol") <= 1.0, 1).otherwise(0))
comprehensive_metabolic_df = lipid_profile_df.withColumn("total_metabolic_risk", col("insulin_resistance_risk") + col("metabolic_syndrome_score") + col("lipid_risk"))
metabolic_risk_categories = comprehensive_metabolic_df.withColumn("metabolic_risk_level", when(col("total_metabolic_risk") >= 7, "严重代谢异常").when(col("total_metabolic_risk") >= 5, "中度代谢异常").when(col("total_metabolic_risk") >= 3, "轻度代谢异常").otherwise("代谢正常"))
glucose_statistics = metabolic_risk_categories.groupBy("glucose_status").agg(count("patient_id").alias("patient_count"), avg("blood_glucose").alias("avg_glucose"), avg("fasting_insulin").alias("avg_insulin"))
metabolic_risk_stats = metabolic_risk_categories.groupBy("metabolic_risk_level").agg(count("patient_id").alias("risk_count"), avg("total_metabolic_risk").alias("avg_risk_score"), avg("bmi").alias("avg_bmi"), avg("systolic_bp").alias("avg_bp"))
critical_patients = comprehensive_metabolic_df.filter(col("total_metabolic_risk") >= 6).select("patient_id", "patient_name", "blood_glucose", "bmi", "systolic_bp", "total_cholesterol", "total_metabolic_risk").orderBy(col("total_metabolic_risk").desc()).limit(15)
glucose_data = [{"status": row["glucose_status"], "count": row["patient_count"], "avg_glucose": round(row["avg_glucose"], 2), "avg_insulin": round(row["avg_insulin"], 2)} for row in glucose_statistics.collect()]
risk_data = [{"risk_level": row["metabolic_risk_level"], "count": row["risk_count"], "avg_score": round(row["avg_risk_score"], 2), "avg_bmi": round(row["avg_bmi"], 2), "avg_bp": round(row["avg_bp"], 1)} for row in metabolic_risk_stats.collect()]
critical_list = [{"patient_id": row["patient_id"], "name": row["patient_name"], "glucose": row["blood_glucose"], "bmi": row["bmi"], "bp": row["systolic_bp"], "cholesterol": row["total_cholesterol"], "risk_score": row["total_metabolic_risk"]} for row in critical_patients.collect()]
return JsonResponse({"status": "success", "glucose_analysis": glucose_data, "metabolic_risk_analysis": risk_data, "critical_patients": critical_list, "total_analyzed": comprehensive_metabolic_df.count()})