💖💖作者:计算机毕业设计杰瑞 💙💙个人简介:曾长期从事计算机专业培训教学,本人也热爱上课教学,语言擅长Java、微信小程序、Python、Golang、安卓Android等,开发项目包括大数据、深度学习、网站、小程序、安卓、算法。平常会做一些项目定制化开发、代码讲解、答辩教学、文档编写、也懂一些降重方面的技巧。平常喜欢分享一些自己开发中遇到的问题的解决办法,也喜欢交流技术,大家有技术代码这一块的问题可以问我! 💛💛想说的话:感谢大家的关注与支持! 💜💜 网站实战项目 安卓/小程序实战项目 大数据实战项目 深度学校实战项目 计算机毕业设计选题推荐
基于大数据的结核病数据分析系统介绍
结核病数据分析系统是一款基于大数据技术栈构建的智能化医疗数据分析平台,采用Hadoop+Spark分布式计算框架作为核心技术支撑,实现对结核病相关医疗数据的高效存储、处理与分析。系统前端采用Vue+ElementUI+Echarts技术栈构建交互界面,后端支持Python Django和Java Spring Boot双版本架构,通过MySQL数据库存储结构化数据,利用HDFS分布式文件系统管理海量非结构化医疗数据。系统集成十大核心功能模块,包括用户中心、结核病信息管理、可视化大屏展示、综合健康特征分析、典型临床症状分析、生活方式风险分析、患者基本特征分析等,通过Spark SQL进行复杂查询分析,结合Pandas、NumPy等数据科学库进行深度数据挖掘,为医疗机构提供结核病防控决策支持。系统充分发挥大数据技术在医疗健康领域的应用优势,通过分布式计算提升数据处理效率,利用可视化技术直观展现分析结果,为结核病防治工作提供科学的数据支撑和决策依据。
基于大数据的结核病数据分析系统演示视频
基于大数据的结核病数据分析系统演示图片
基于大数据的结核病数据分析系统代码展示
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, count, avg, sum, when, desc, asc
from pyspark.sql.types import IntegerType, FloatType, StringType
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
from django.http import JsonResponse
from django.views import View
import json
import mysql.connector
spark = SparkSession.builder.appName("TuberculosisDataAnalysis").config("spark.sql.adaptive.enabled", "true").config("spark.serializer", "org.apache.spark.serializer.KryoSerializer").getOrCreate()
def comprehensive_health_analysis(request):
connection = mysql.connector.connect(host='localhost', database='tuberculosis_db', user='root', password='password')
query = "SELECT patient_id, age, gender, bmi, smoking_status, drinking_status, exercise_frequency, nutrition_level, immune_status, comorbidity_count FROM patient_health_info"
health_df = spark.read.format("jdbc").option("url", "jdbc:mysql://localhost:3306/tuberculosis_db").option("dbtable", f"({query}) as health_data").option("user", "root").option("password", "password").load()
age_groups = health_df.withColumn("age_group", when(col("age") < 18, "青少年").when(col("age") < 35, "青年").when(col("age") < 60, "中年").otherwise("老年"))
health_risk_scores = age_groups.withColumn("health_risk_score", (when(col("smoking_status") == "吸烟", 3).otherwise(0) + when(col("drinking_status") == "酗酒", 2).otherwise(0) + when(col("exercise_frequency") < 2, 2).otherwise(0) + when(col("nutrition_level") == "差", 3).otherwise(0) + when(col("immune_status") == "低下", 4).otherwise(0) + col("comorbidity_count")))
risk_distribution = health_risk_scores.withColumn("risk_level", when(col("health_risk_score") <= 3, "低风险").when(col("health_risk_score") <= 7, "中风险").otherwise("高风险"))
gender_risk_analysis = risk_distribution.groupBy("gender", "risk_level").agg(count("patient_id").alias("patient_count"), avg("bmi").alias("avg_bmi"), avg("age").alias("avg_age"))
age_group_analysis = risk_distribution.groupBy("age_group", "risk_level").agg(count("patient_id").alias("patient_count"), avg("health_risk_score").alias("avg_risk_score"))
lifestyle_correlation = health_df.groupBy("smoking_status", "drinking_status").agg(count("patient_id").alias("patient_count"), avg("bmi").alias("avg_bmi"), avg("immune_status").alias("avg_immune_score"))
comorbidity_impact = health_df.filter(col("comorbidity_count") > 0).groupBy("comorbidity_count").agg(count("patient_id").alias("patient_count"), avg("immune_status").alias("avg_immune_status"))
gender_results = [{"gender": row["gender"], "risk_level": row["risk_level"], "patient_count": row["patient_count"], "avg_bmi": round(row["avg_bmi"], 2), "avg_age": round(row["avg_age"], 1)} for row in gender_risk_analysis.collect()]
age_results = [{"age_group": row["age_group"], "risk_level": row["risk_level"], "patient_count": row["patient_count"], "avg_risk_score": round(row["avg_risk_score"], 2)} for row in age_group_analysis.collect()]
lifestyle_results = [{"smoking_status": row["smoking_status"], "drinking_status": row["drinking_status"], "patient_count": row["patient_count"], "avg_bmi": round(row["avg_bmi"], 2)} for row in lifestyle_correlation.collect()]
high_risk_patients = risk_distribution.filter(col("risk_level") == "高风险").select("patient_id", "age", "gender", "health_risk_score", "smoking_status", "drinking_status")
intervention_recommendations = []
for patient in high_risk_patients.collect():
recommendations = []
if patient["smoking_status"] == "吸烟":
recommendations.append("戒烟干预")
if patient["drinking_status"] == "酗酒":
recommendations.append("酒精依赖治疗")
intervention_recommendations.append({"patient_id": patient["patient_id"], "risk_score": patient["health_risk_score"], "recommendations": recommendations})
connection.close()
return JsonResponse({"status": "success", "gender_analysis": gender_results, "age_analysis": age_results, "lifestyle_analysis": lifestyle_results, "intervention_recommendations": intervention_recommendations[:20]})
def clinical_symptom_analysis(request):
connection = mysql.connector.connect(host='localhost', database='tuberculosis_db', user='root', password='password')
symptom_query = "SELECT patient_id, fever_duration, cough_type, sputum_characteristics, chest_pain_location, night_sweats_frequency, weight_loss_percentage, fatigue_level, appetite_loss_degree, breathing_difficulty FROM clinical_symptoms"
symptom_df = spark.read.format("jdbc").option("url", "jdbc:mysql://localhost:3306/tuberculosis_db").option("dbtable", f"({symptom_query}) as symptom_data").option("user", "root").option("password", "password").load()
symptom_severity_scores = symptom_df.withColumn("symptom_severity", (when(col("fever_duration") > 14, 3).when(col("fever_duration") > 7, 2).when(col("fever_duration") > 0, 1).otherwise(0) + when(col("cough_type") == "干咳", 1).when(col("cough_type") == "湿咳", 2).when(col("cough_type") == "血痰", 3).otherwise(0) + when(col("sputum_characteristics").contains("血丝"), 3).when(col("sputum_characteristics").contains("脓性"), 2).otherwise(1) + when(col("night_sweats_frequency") > 5, 3).when(col("night_sweats_frequency") > 2, 2).otherwise(1) + when(col("weight_loss_percentage") > 15, 4).when(col("weight_loss_percentage") > 10, 3).when(col("weight_loss_percentage") > 5, 2).otherwise(1)))
severity_classification = symptom_severity_scores.withColumn("severity_level", when(col("symptom_severity") <= 5, "轻度").when(col("symptom_severity") <= 10, "中度").otherwise("重度"))
symptom_patterns = severity_classification.groupBy("cough_type", "sputum_characteristics").agg(count("patient_id").alias("pattern_count"), avg("symptom_severity").alias("avg_severity"), avg("weight_loss_percentage").alias("avg_weight_loss"))
fever_cough_correlation = symptom_df.filter(col("fever_duration") > 0).groupBy("cough_type").agg(count("patient_id").alias("fever_cough_count"), avg("fever_duration").alias("avg_fever_duration"), avg("night_sweats_frequency").alias("avg_night_sweats"))
chest_pain_analysis = symptom_df.filter(col("chest_pain_location").isNotNull()).groupBy("chest_pain_location").agg(count("patient_id").alias("pain_count"), avg("breathing_difficulty").alias("avg_breathing_difficulty"))
systemic_symptom_correlation = symptom_df.select("weight_loss_percentage", "fatigue_level", "appetite_loss_degree").filter(col("weight_loss_percentage") > 5)
weight_fatigue_groups = systemic_symptom_correlation.withColumn("weight_loss_group", when(col("weight_loss_percentage") > 15, "重度消瘦").when(col("weight_loss_percentage") > 10, "中度消瘦").otherwise("轻度消瘦"))
weight_fatigue_stats = weight_fatigue_groups.groupBy("weight_loss_group").agg(avg("fatigue_level").alias("avg_fatigue"), avg("appetite_loss_degree").alias("avg_appetite_loss"), count("*").alias("patient_count"))
early_warning_indicators = symptom_df.filter((col("fever_duration") > 7) & (col("cough_type").isin(["湿咳", "血痰"])) & (col("weight_loss_percentage") > 5))
warning_stats = early_warning_indicators.agg(count("patient_id").alias("high_risk_count"), avg("symptom_severity").alias("avg_severity_score"))
symptom_progression_analysis = symptom_df.withColumn("progression_risk", when((col("fever_duration") > 14) & (col("weight_loss_percentage") > 10) & (col("night_sweats_frequency") > 5), "快速进展").when((col("fever_duration") > 7) & (col("weight_loss_percentage") > 5), "中等进展").otherwise("缓慢进展"))
progression_stats = symptom_progression_analysis.groupBy("progression_risk").agg(count("patient_id").alias("progression_count"), avg("symptom_severity").alias("avg_progression_severity"))
pattern_results = [{"cough_type": row["cough_type"], "sputum_characteristics": row["sputum_characteristics"], "pattern_count": row["pattern_count"], "avg_severity": round(row["avg_severity"], 2)} for row in symptom_patterns.collect()]
fever_results = [{"cough_type": row["cough_type"], "fever_cough_count": row["fever_cough_count"], "avg_fever_duration": round(row["avg_fever_duration"], 1)} for row in fever_cough_correlation.collect()]
progression_results = [{"progression_risk": row["progression_risk"], "progression_count": row["progression_count"], "avg_severity": round(row["avg_progression_severity"], 2)} for row in progression_stats.collect()]
connection.close()
return JsonResponse({"status": "success", "symptom_patterns": pattern_results, "fever_analysis": fever_results, "progression_analysis": progression_results, "warning_indicators": warning_stats.collect()[0].asDict()})
def lifestyle_risk_analysis(request):
connection = mysql.connector.connect(host='localhost', database='tuberculosis_db', user='root', password='password')
lifestyle_query = "SELECT patient_id, occupation, living_environment, household_size, ventilation_quality, smoking_history_years, alcohol_consumption_frequency, diet_quality_score, sleep_quality_score, stress_level, physical_activity_hours FROM lifestyle_factors"
lifestyle_df = spark.read.format("jdbc").option("url", "jdbc:mysql://localhost:3306/tuberculosis_db").option("dbtable", f"({lifestyle_query}) as lifestyle_data").option("user", "root").option("password", "password").load()
environmental_risk_scores = lifestyle_df.withColumn("environmental_risk", (when(col("living_environment") == "拥挤", 3).when(col("living_environment") == "潮湿", 2).otherwise(1) + when(col("household_size") > 6, 3).when(col("household_size") > 4, 2).otherwise(1) + when(col("ventilation_quality") == "差", 3).when(col("ventilation_quality") == "一般", 2).otherwise(1)))
behavioral_risk_scores = environmental_risk_scores.withColumn("behavioral_risk", (when(col("smoking_history_years") > 20, 4).when(col("smoking_history_years") > 10, 3).when(col("smoking_history_years") > 5, 2).when(col("smoking_history_years") > 0, 1).otherwise(0) + when(col("alcohol_consumption_frequency") > 4, 3).when(col("alcohol_consumption_frequency") > 2, 2).otherwise(1) + when(col("diet_quality_score") < 3, 3).when(col("diet_quality_score") < 5, 2).otherwise(1) + when(col("sleep_quality_score") < 3, 2).otherwise(1) + when(col("physical_activity_hours") < 2, 2).otherwise(1)))
comprehensive_risk_assessment = behavioral_risk_scores.withColumn("total_lifestyle_risk", col("environmental_risk") + col("behavioral_risk")).withColumn("risk_category", when(col("total_lifestyle_risk") <= 6, "低风险").when(col("total_lifestyle_risk") <= 12, "中风险").otherwise("高风险"))
occupation_risk_analysis = comprehensive_risk_assessment.groupBy("occupation").agg(count("patient_id").alias("occupation_count"), avg("total_lifestyle_risk").alias("avg_risk_score"), avg("environmental_risk").alias("avg_env_risk"), avg("behavioral_risk").alias("avg_behavioral_risk"))
smoking_alcohol_interaction = lifestyle_df.filter((col("smoking_history_years") > 0) | (col("alcohol_consumption_frequency") > 2)).groupBy("smoking_history_years", "alcohol_consumption_frequency").agg(count("patient_id").alias("combination_count"), avg("diet_quality_score").alias("avg_diet_quality"), avg("sleep_quality_score").alias("avg_sleep_quality"))
environmental_clustering = lifestyle_df.groupBy("living_environment", "ventilation_quality").agg(count("patient_id").alias("env_cluster_count"), avg("household_size").alias("avg_household_size"))
stress_lifestyle_correlation = lifestyle_df.select("stress_level", "sleep_quality_score", "physical_activity_hours", "diet_quality_score").filter(col("stress_level") > 5)
stress_impact_analysis = stress_lifestyle_correlation.withColumn("stress_category", when(col("stress_level") > 8, "高压").when(col("stress_level") > 6, "中压").otherwise("低压"))
stress_stats = stress_impact_analysis.groupBy("stress_category").agg(avg("sleep_quality_score").alias("avg_sleep"), avg("physical_activity_hours").alias("avg_activity"), avg("diet_quality_score").alias("avg_diet"), count("*").alias("stress_count"))
protective_factors_analysis = lifestyle_df.filter((col("diet_quality_score") > 7) & (col("sleep_quality_score") > 7) & (col("physical_activity_hours") > 3)).select("patient_id", "smoking_history_years", "alcohol_consumption_frequency", "stress_level")
protective_stats = protective_factors_analysis.agg(count("patient_id").alias("protected_count"), avg("smoking_history_years").alias("avg_smoking_years"), avg("stress_level").alias("avg_stress"))
high_risk_interventions = comprehensive_risk_assessment.filter(col("risk_category") == "高风险").select("patient_id", "occupation", "environmental_risk", "behavioral_risk", "smoking_history_years", "alcohol_consumption_frequency")
intervention_priorities = []
for patient in high_risk_interventions.collect():
priority_interventions = []
if patient["environmental_risk"] > 5:
priority_interventions.append("改善居住环境")
if patient["smoking_history_years"] > 10:
priority_interventions.append("戒烟计划")
if patient["alcohol_consumption_frequency"] > 3:
priority_interventions.append("酒精干预")
intervention_priorities.append({"patient_id": patient["patient_id"], "occupation": patient["occupation"], "interventions": priority_interventions})
occupation_results = [{"occupation": row["occupation"], "occupation_count": row["occupation_count"], "avg_risk_score": round(row["avg_risk_score"], 2), "avg_env_risk": round(row["avg_env_risk"], 2)} for row in occupation_risk_analysis.collect()]
stress_results = [{"stress_category": row["stress_category"], "stress_count": row["stress_count"], "avg_sleep": round(row["avg_sleep"], 2), "avg_activity": round(row["avg_activity"], 2)} for row in stress_stats.collect()]
connection.close()
return JsonResponse({"status": "success", "occupation_analysis": occupation_results, "stress_analysis": stress_results, "intervention_priorities": intervention_priorities[:15], "protective_factors": protective_stats.collect()[0].asDict()})
基于大数据的结核病数据分析系统文档展示
💖💖作者:计算机毕业设计杰瑞 💙💙个人简介:曾长期从事计算机专业培训教学,本人也热爱上课教学,语言擅长Java、微信小程序、Python、Golang、安卓Android等,开发项目包括大数据、深度学习、网站、小程序、安卓、算法。平常会做一些项目定制化开发、代码讲解、答辩教学、文档编写、也懂一些降重方面的技巧。平常喜欢分享一些自己开发中遇到的问题的解决办法,也喜欢交流技术,大家有技术代码这一块的问题可以问我! 💛💛想说的话:感谢大家的关注与支持! 💜💜 网站实战项目 安卓/小程序实战项目 大数据实战项目 深度学校实战项目 计算机毕业设计选题推荐