看完这个基于Hadoop的甲状腺癌数据系统,你可能会重新规划自己的毕设方向

61 阅读7分钟

前言

一.开发工具简介

  • 大数据框架:Hadoop+Spark(本次没用Hive,支持定制)
  • 开发语言:Python+Java(两个版本都支持)
  • 后端框架:Django+Spring Boot(Spring+SpringMVC+Mybatis)(两个版本都支持)
  • 前端:Vue+ElementUI+Echarts+HTML+CSS+JavaScript+jQuery
  • 详细技术点:Hadoop、HDFS、Spark、Spark SQL、Pandas、NumPy
  • 数据库:MySQL

二.系统内容简介

基于大数据的分化型甲状腺癌复发数据可视化分析系统是一套运用先进大数据技术构建的医疗数据智能分析平台,该系统采用Hadoop分布式存储框架和Spark大数据处理引擎作为核心技术架构,通过Python语言结合Django后端框架实现数据处理逻辑,前端采用Vue+ElementUI+Echarts技术栈打造直观友好的数据可视化界面。系统围绕分化型甲状腺癌患者的复发风险评估需求,构建了包含系统首页、用户中心、用户管理、甲状腺数据管理在内的完整功能模块,重点实现了患者人口特征分析、多维因素关联分析、临床病理特征分析、甲状腺功能指标分析以及患者治疗效果分析等核心功能。通过运用Spark SQL进行大规模数据查询处理,结合Pandas和NumPy进行数据科学计算,系统能够对海量甲状腺癌患者数据进行深度挖掘和统计分析,为医疗工作者提供患者复发风险预测、治疗效果评估和临床决策支持,同时通过丰富的图表和可视化组件将复杂的医疗数据以直观易懂的方式呈现,实现了大数据技术在精准医疗领域的创新应用。

三.系统功能演示

看完这个基于Hadoop的甲状腺癌数据系统,你可能会重新规划自己的毕设方向

四.系统界面展示

在这里插入图片描述 在这里插入图片描述 在这里插入图片描述 在这里插入图片描述 在这里插入图片描述 在这里插入图片描述

在这里插入图片描述 在这里插入图片描述

五.系统源码展示


# 核心功能1:患者人口特征分析
def analyze_population_characteristics(request):
    """分析患者人口特征分布"""
    # 获取所有甲状腺癌患者数据
    patients = ThyroidPatient.objects.all()
    
    # 按年龄段分组统计
    age_groups = {'20-30': 0, '31-40': 0, '41-50': 0, '51-60': 0, '60+': 0}
    for patient in patients:
        age = patient.age
        if 20 <= age <= 30:
            age_groups['20-30'] += 1
        elif 31 <= age <= 40:
            age_groups['31-40'] += 1
        elif 41 <= age <= 50:
            age_groups['41-50'] += 1
        elif 51 <= age <= 60:
            age_groups['51-60'] += 1
        else:
            age_groups['60+'] += 1
    
    # 性别分布统计
    gender_stats = patients.values('gender').annotate(count=Count('id'))
    male_count = sum(item['count'] for item in gender_stats if item['gender'] == 'M')
    female_count = sum(item['count'] for item in gender_stats if item['gender'] == 'F')
    
    # 地区分布分析
    region_stats = patients.values('region').annotate(count=Count('id')).order_by('-count')[:10]
    
    # 计算复发率按年龄段分布
    recurrence_by_age = {}
    for age_range in age_groups.keys():
        total_in_range = age_groups[age_range]
        if age_range == '20-30':
            recurred = patients.filter(age__gte=20, age__lte=30, is_recurrence=True).count()
        elif age_range == '31-40':
            recurred = patients.filter(age__gte=31, age__lte=40, is_recurrence=True).count()
        elif age_range == '41-50':
            recurred = patients.filter(age__gte=41, age__lte=50, is_recurrence=True).count()
        elif age_range == '51-60':
            recurred = patients.filter(age__gte=51, age__lte=60, is_recurrence=True).count()
        else:
            recurred = patients.filter(age__gt=60, is_recurrence=True).count()
        
        recurrence_rate = (recurred / total_in_range * 100) if total_in_range > 0 else 0
        recurrence_by_age[age_range] = round(recurrence_rate, 2)
    
    # 构建返回数据
    result_data = {
        'age_distribution': age_groups,
        'gender_distribution': {'male': male_count, 'female': female_count},
        'region_top10': list(region_stats),
        'recurrence_by_age': recurrence_by_age,
        'total_patients': patients.count()
    }
    
    return JsonResponse({'status': 'success', 'data': result_data})

# 核心功能2:多维因素关联分析
def multi_dimensional_correlation_analysis(request):
    """多维因素关联性分析"""
    # 获取所有患者完整数据
    patients_data = ThyroidPatient.objects.select_related().all()
    
    # 构建分析矩阵
    correlation_matrix = {}
    factors = ['age', 'tumor_size', 'tsh_level', 't3_level', 't4_level', 'thyroglobulin']
    
    # 计算各因素与复发的相关性
    for factor in factors:
        factor_values = []
        recurrence_values = []
        
        for patient in patients_data:
            factor_value = getattr(patient, factor, None)
            if factor_value is not None:
                factor_values.append(float(factor_value))
                recurrence_values.append(1 if patient.is_recurrence else 0)
        
        # 计算皮尔逊相关系数
        if len(factor_values) > 1:
            correlation_coefficient = np.corrcoef(factor_values, recurrence_values)[0, 1]
            correlation_matrix[factor] = round(correlation_coefficient, 4)
    
    # 分析不同肿瘤大小组的复发情况
    tumor_size_analysis = {}
    size_ranges = {'<1cm': (0, 10), '1-2cm': (10, 20), '2-4cm': (20, 40), '>4cm': (40, 100)}
    
    for size_label, (min_size, max_size) in size_ranges.items():
        patients_in_range = patients_data.filter(
            tumor_size__gte=min_size, 
            tumor_size__lt=max_size
        )
        total_count = patients_in_range.count()
        recurrence_count = patients_in_range.filter(is_recurrence=True).count()
        
        recurrence_rate = (recurrence_count / total_count * 100) if total_count > 0 else 0
        tumor_size_analysis[size_label] = {
            'total': total_count,
            'recurred': recurrence_count,
            'rate': round(recurrence_rate, 2)
        }
    
    # TSH水平分组分析
    tsh_groups = {'正常': (0.4, 4.0), '偏低': (0, 0.4), '偏高': (4.0, 100)}
    tsh_analysis = {}
    
    for tsh_label, (min_tsh, max_tsh) in tsh_groups.items():
        patients_in_group = patients_data.filter(
            tsh_level__gte=min_tsh,
            tsh_level__lt=max_tsh
        )
        group_total = patients_in_group.count()
        group_recurred = patients_in_group.filter(is_recurrence=True).count()
        
        group_rate = (group_recurred / group_total * 100) if group_total > 0 else 0
        tsh_analysis[tsh_label] = {
            'count': group_total,
            'recurrence_rate': round(group_rate, 2)
        }
    
    # 年龄与肿瘤大小交叉分析
    age_tumor_cross = {}
    for patient in patients_data:
        age_group = '青年' if patient.age < 40 else ('中年' if patient.age < 60 else '老年')
        tumor_group = '小' if patient.tumor_size < 20 else ('中' if patient.tumor_size < 40 else '大')
        
        key = f"{age_group}_{tumor_group}"
        if key not in age_tumor_cross:
            age_tumor_cross[key] = {'total': 0, 'recurred': 0}
        
        age_tumor_cross[key]['total'] += 1
        if patient.is_recurrence:
            age_tumor_cross[key]['recurred'] += 1
    
    # 计算交叉分析复发率
    for key in age_tumor_cross:
        total = age_tumor_cross[key]['total']
        recurred = age_tumor_cross[key]['recurred']
        age_tumor_cross[key]['rate'] = round((recurred / total * 100), 2) if total > 0 else 0
    
    analysis_result = {
        'correlation_matrix': correlation_matrix,
        'tumor_size_analysis': tumor_size_analysis,
        'tsh_level_analysis': tsh_analysis,
        'age_tumor_cross_analysis': age_tumor_cross
    }
    
    return JsonResponse({'status': 'success', 'data': analysis_result})

# 核心功能3:患者治疗效果分析
def treatment_effectiveness_analysis(request):
    """患者治疗效果综合分析"""
    # 获取所有治疗记录数据
    treatment_records = TreatmentRecord.objects.select_related('patient').all()
    
    # 按治疗方式分组统计效果
    treatment_methods = ['手术', '碘131治疗', 'TSH抑制治疗', '放疗', '化疗']
    treatment_effectiveness = {}
    
    for method in treatment_methods:
        method_records = treatment_records.filter(treatment_method=method)
        total_cases = method_records.count()
        
        # 统计不同效果等级
        excellent = method_records.filter(treatment_effect='优').count()
        good = method_records.filter(treatment_effect='良').count()
        fair = method_records.filter(treatment_effect='可').count()
        poor = method_records.filter(treatment_effect='差').count()
        
        # 计算有效率(优+良)
        effective_rate = ((excellent + good) / total_cases * 100) if total_cases > 0 else 0
        
        # 分析该治疗方式的复发情况
        patients_with_method = [record.patient for record in method_records]
        recurrence_count = sum(1 for patient in patients_with_method if patient.is_recurrence)
        recurrence_rate = (recurrence_count / len(patients_with_method) * 100) if patients_with_method else 0
        
        treatment_effectiveness[method] = {
            'total_cases': total_cases,
            'excellent': excellent,
            'good': good,
            'fair': fair,
            'poor': poor,
            'effective_rate': round(effective_rate, 2),
            'recurrence_rate': round(recurrence_rate, 2)
        }
    
    # 分析治疗周期与效果关系
    cycle_analysis = {}
    cycle_ranges = ['1-3个月', '4-6个月', '7-12个月', '1年以上']
    
    for cycle_range in cycle_ranges:
        if cycle_range == '1-3个月':
            records = treatment_records.filter(treatment_duration__lte=90)
        elif cycle_range == '4-6个月':
            records = treatment_records.filter(treatment_duration__gt=90, treatment_duration__lte=180)
        elif cycle_range == '7-12个月':
            records = treatment_records.filter(treatment_duration__gt=180, treatment_duration__lte=365)
        else:
            records = treatment_records.filter(treatment_duration__gt=365)
        
        cycle_total = records.count()
        cycle_effective = records.filter(treatment_effect__in=['优', '良']).count()
        cycle_effective_rate = (cycle_effective / cycle_total * 100) if cycle_total > 0 else 0
        
        cycle_analysis[cycle_range] = {
            'cases': cycle_total,
            'effective_cases': cycle_effective,
            'effectiveness': round(cycle_effective_rate, 2)
        }
    
    # 分析并发症发生率
    complication_stats = {}
    all_complications = treatment_records.values_list('complications', flat=True)
    complication_types = ['声带麻痹', '低钙血症', '出血', '感染', '无']
    
    for comp_type in complication_types:
        comp_count = sum(1 for comp in all_complications if comp_type in str(comp))
        comp_rate = (comp_count / len(all_complications) * 100) if all_complications else 0
        complication_stats[comp_type] = {
            'count': comp_count,
            'rate': round(comp_rate, 2)
        }
    
    # 生存期分析(按治疗效果分组)
    survival_analysis = {}
    effect_levels = ['优', '良', '可', '差']
    
    for level in effect_levels:
        level_records = treatment_records.filter(treatment_effect=level)
        survival_periods = []
        
        for record in level_records:
            if record.follow_up_time:
                survival_periods.append(record.follow_up_time)
        
        if survival_periods:
            avg_survival = sum(survival_periods) / len(survival_periods)
            max_survival = max(survival_periods)
            min_survival = min(survival_periods)
            
            survival_analysis[level] = {
                'case_count': len(survival_periods),
                'average_survival': round(avg_survival, 1),
                'max_survival': max_survival,
                'min_survival': min_survival
            }
    
    # 治疗费用效益分析
    cost_benefit_analysis = {}
    for method in treatment_methods:
        method_records = treatment_records.filter(treatment_method=method)
        if method_records.exists():
            avg_cost = method_records.aggregate(Avg('treatment_cost'))['treatment_cost__avg']
            effective_cases = method_records.filter(treatment_effect__in=['优', '良']).count()
            total_cases = method_records.count()
            
            cost_per_effective_case = (avg_cost * total_cases / effective_cases) if effective_cases > 0 else 0
            
            cost_benefit_analysis[method] = {
                'average_cost': round(avg_cost, 2) if avg_cost else 0,
                'cost_effectiveness_ratio': round(cost_per_effective_case, 2)
            }
    
    treatment_analysis_result = {
        'treatment_effectiveness': treatment_effectiveness,
        'cycle_analysis': cycle_analysis,
        'complication_statistics': complication_stats,
        'survival_analysis': survival_analysis,
        'cost_benefit_analysis': cost_benefit_analysis
    }
    
    return JsonResponse({'status': 'success', 'data': treatment_analysis_result})

六.系统文档展示

在这里插入图片描述

结束