前言
💖💖作者:计算机程序员小杨 💙💙个人简介:我是一名计算机相关专业的从业者,擅长Java、微信小程序、Python、Golang、安卓Android等多个IT方向。会做一些项目定制化开发、代码讲解、答辩教学、文档编写、也懂一些降重方面的技巧。热爱技术,喜欢钻研新工具和框架,也乐于通过代码解决实际问题,大家有技术代码这一块的问题可以问我! 💛💛想说的话:感谢大家的关注与支持! 💕💕文末获取源码联系 计算机程序员小杨 💜💜 网站实战项目 安卓/小程序实战项目 大数据实战项目 深度学习实战项目 计算机毕业设计选题 💜💜
一.开发工具简介
大数据框架:Hadoop+Spark(本次没用Hive,支持定制) 开发语言:Python+Java(两个版本都支持) 后端框架:Django+Spring Boot(Spring+SpringMVC+Mybatis)(两个版本都支持) 前端:Vue+ElementUI+Echarts+HTML+CSS+JavaScript+jQuery 详细技术点:Hadoop、HDFS、Spark、Spark SQL、Pandas、NumPy 数据库:MySQL
二.系统内容简介
本系统是一个专门针对小儿阑尾炎临床诊断的大数据分析平台,通过集成Hadoop分布式存储架构和Spark内存计算引擎,对海量的小儿阑尾炎病例数据进行深度挖掘与智能分析。系统采用分层架构设计,底层利用Hadoop HDFS存储来自多个医疗机构的患儿病历、检验指标、影像资料等结构化和非结构化数据,中间层通过Spark SQL和MLlib组件实现数据清洗、特征提取和机器学习建模,上层构建直观的数据可视化界面展现分析结果。平台具备实时数据接入、批量数据处理、多维度统计分析、预测模型训练、交互式图表展示等核心功能,能够帮助医护人员识别小儿阑尾炎的高危因素、分析发病规律、评估治疗效果,为临床决策提供数据支撑。系统运用现代大数据技术栈,具有良好的扩展性和容错性,可处理TB级别的医疗数据,满足大型医疗机构的数据分析需求。
三.系统功能演示
基于大数据的小儿阑尾炎数据可视化分析系统毕设:Hadoop+Spark技术实现
四.系统界面展示
五.系统源码展示
from pyspark.sql import SparkSession
from pyspark.sql.functions import *
from pyspark.sql.types import *
from pyspark.ml.feature import VectorAssembler, StandardScaler
from pyspark.ml.classification import RandomForestClassifier
from pyspark.ml.evaluation import BinaryClassificationEvaluator
import pandas as pd
import numpy as np
spark = SparkSession.builder.appName("PediatricAppendicitisAnalysis").config("spark.sql.adaptive.enabled", "true").config("spark.sql.adaptive.coalescePartitions.enabled", "true").getOrCreate()
def process_patient_data_analysis():
raw_data = spark.read.option("header", "true").option("inferSchema", "true").csv("hdfs://cluster/appendicitis/patient_records.csv")
cleaned_data = raw_data.filter(col("age").between(1, 18)).filter(col("white_blood_cell").isNotNull()).filter(col("neutrophil_percentage").isNotNull()).filter(col("body_temperature").isNotNull())
age_groups = cleaned_data.withColumn("age_group", when(col("age") < 3, "infant").when(col("age") < 7, "preschool").when(col("age") < 12, "school_age").otherwise("adolescent"))
symptom_features = age_groups.withColumn("fever_severity", when(col("body_temperature") > 38.5, "high").when(col("body_temperature") > 37.5, "moderate").otherwise("normal")).withColumn("wbc_level", when(col("white_blood_cell") > 15000, "very_high").when(col("white_blood_cell") > 12000, "high").when(col("white_blood_cell") > 10000, "elevated").otherwise("normal"))
pain_analysis = symptom_features.withColumn("pain_duration_hours", col("pain_duration_minutes") / 60).withColumn("pain_severity_score", when(col("pain_scale") >= 8, 3).when(col("pain_scale") >= 6, 2).when(col("pain_scale") >= 4, 1).otherwise(0))
diagnosis_features = pain_analysis.withColumn("inflammation_score", (col("white_blood_cell") / 1000) + (col("neutrophil_percentage") / 10) + when(col("body_temperature") > 38, col("body_temperature") - 37, 0)).withColumn("clinical_risk_score", col("pain_severity_score") + when(col("vomiting") == "yes", 1, 0) + when(col("rebound_tenderness") == "positive", 2, 0) + when(col("mcburney_point_tenderness") == "positive", 2, 0))
statistical_summary = diagnosis_features.groupBy("age_group", "gender").agg(count("*").alias("case_count"), avg("inflammation_score").alias("avg_inflammation"), avg("clinical_risk_score").alias("avg_risk_score"), avg("hospital_stay_days").alias("avg_stay_duration"))
complication_analysis = diagnosis_features.filter(col("complications").isNotNull()).groupBy("age_group", "complications").agg(count("*").alias("complication_count")).orderBy("age_group", desc("complication_count"))
seasonal_pattern = diagnosis_features.withColumn("admission_month", month(col("admission_date"))).withColumn("season", when(col("admission_month").isin([12, 1, 2]), "winter").when(col("admission_month").isin([3, 4, 5]), "spring").when(col("admission_month").isin([6, 7, 8]), "summer").otherwise("autumn")).groupBy("season", "age_group").agg(count("*").alias("seasonal_cases")).orderBy("season", "age_group")
return statistical_summary, complication_analysis, seasonal_pattern
def build_diagnostic_prediction_model():
training_data = spark.read.option("header", "true").option("inferSchema", "true").csv("hdfs://cluster/appendicitis/labeled_cases.csv")
feature_data = training_data.select("age", "gender_numeric", "body_temperature", "white_blood_cell", "neutrophil_percentage", "pain_scale", "vomiting_numeric", "rebound_tenderness_numeric", "mcburney_point_numeric", "ultrasound_score", "ct_score", "appendicitis_confirmed")
processed_features = feature_data.withColumn("fever_flag", when(col("body_temperature") > 37.5, 1, 0)).withColumn("wbc_elevated", when(col("white_blood_cell") > 10000, 1, 0)).withColumn("neutrophil_high", when(col("neutrophil_percentage") > 75, 1, 0)).withColumn("severe_pain", when(col("pain_scale") >= 7, 1, 0))
interaction_features = processed_features.withColumn("fever_wbc_interaction", col("fever_flag") * col("wbc_elevated")).withColumn("pain_tenderness_interaction", col("severe_pain") * col("rebound_tenderness_numeric")).withColumn("imaging_consensus", (col("ultrasound_score") + col("ct_score")) / 2).withColumn("clinical_triad", col("fever_flag") + col("severe_pain") + col("vomiting_numeric"))
feature_columns = ["age", "gender_numeric", "body_temperature", "white_blood_cell", "neutrophil_percentage", "pain_scale", "fever_flag", "wbc_elevated", "neutrophil_high", "severe_pain", "fever_wbc_interaction", "pain_tenderness_interaction", "imaging_consensus", "clinical_triad"]
assembler = VectorAssembler(inputCols=feature_columns, outputCol="raw_features")
feature_vector = assembler.transform(interaction_features)
scaler = StandardScaler(inputCol="raw_features", outputCol="scaled_features", withStd=True, withMean=True)
scaler_model = scaler.fit(feature_vector)
scaled_data = scaler_model.transform(feature_vector)
train_data, test_data = scaled_data.randomSplit([0.8, 0.2], seed=42)
rf_classifier = RandomForestClassifier(featuresCol="scaled_features", labelCol="appendicitis_confirmed", numTrees=100, maxDepth=10, seed=42)
rf_model = rf_classifier.fit(train_data)
predictions = rf_model.transform(test_data)
evaluator = BinaryClassificationEvaluator(labelCol="appendicitis_confirmed", rawPredictionCol="rawPrediction", metricName="areaUnderROC")
auc_score = evaluator.evaluate(predictions)
feature_importance = rf_model.featureImportances.toArray()
importance_df = spark.createDataFrame([(feature_columns[i], float(feature_importance[i])) for i in range(len(feature_columns))], ["feature", "importance"]).orderBy(desc("importance"))
return rf_model, scaler_model, auc_score, importance_df
def generate_real_time_alerts():
streaming_data = spark.readStream.format("kafka").option("kafka.bootstrap.servers", "localhost:9092").option("subscribe", "appendicitis_alerts").option("startingOffsets", "latest").load()
parsed_stream = streaming_data.select(from_json(col("value").cast("string"), StructType([StructField("patient_id", StringType()), StructField("age", IntegerType()), StructField("body_temperature", DoubleType()), StructField("white_blood_cell", IntegerType()), StructField("pain_scale", IntegerType()), StructField("rebound_tenderness", StringType()), StructField("admission_time", TimestampType())])).alias("data")).select("data.*")
alert_conditions = parsed_stream.withColumn("high_fever_alert", when(col("body_temperature") > 39.0, 1, 0)).withColumn("wbc_critical_alert", when(col("white_blood_cell") > 20000, 1, 0)).withColumn("severe_pain_alert", when(col("pain_scale") >= 9, 1, 0)).withColumn("positive_tenderness_alert", when(col("rebound_tenderness") == "positive", 1, 0))
risk_scoring = alert_conditions.withColumn("emergency_risk_score", col("high_fever_alert") * 2 + col("wbc_critical_alert") * 3 + col("severe_pain_alert") * 2 + col("positive_tenderness_alert") * 2).withColumn("alert_level", when(col("emergency_risk_score") >= 7, "CRITICAL").when(col("emergency_risk_score") >= 4, "HIGH").when(col("emergency_risk_score") >= 2, "MEDIUM").otherwise("LOW"))
critical_cases = risk_scoring.filter(col("alert_level").isin(["CRITICAL", "HIGH"])).withColumn("alert_timestamp", current_timestamp()).withColumn("recommended_action", when(col("alert_level") == "CRITICAL", "IMMEDIATE_SURGERY_CONSULTATION").otherwise("URGENT_EVALUATION"))
windowed_analysis = critical_cases.withWatermark("admission_time", "10 minutes").groupBy(window(col("admission_time"), "1 hour"), col("alert_level")).agg(count("*").alias("alert_count"), collect_list("patient_id").alias("patient_list"))
alert_output = windowed_analysis.writeStream.outputMode("update").format("console").option("truncate", "false").trigger(processingTime="30 seconds").start()
hospital_notifications = critical_cases.select("patient_id", "alert_level", "emergency_risk_score", "recommended_action", "alert_timestamp").writeStream.format("kafka").option("kafka.bootstrap.servers", "localhost:9092").option("topic", "hospital_emergency_alerts").option("checkpointLocation", "/tmp/kafka_checkpoint").outputMode("append").start()
return alert_output, hospital_notifications
六.系统文档展示
结束
💕💕文末获取源码联系 计算机程序员小杨