一、个人简介
- 💖💖作者:计算机编程果茶熊
- 💙💙个人简介:曾长期从事计算机专业培训教学,担任过编程老师,同时本人也热爱上课教学,擅长Java、微信小程序、Python、Golang、安卓Android等多个IT方向。会做一些项目定制化开发、代码讲解、答辩教学、文档编写、也懂一些降重方面的技巧。平常喜欢分享一些自己开发中遇到的问题的解决办法,也喜欢交流技术,大家有技术代码这一块的问题可以问我!
- 💛💛想说的话:感谢大家的关注与支持!
- 💜💜
- 网站实战项目
- 安卓/小程序实战项目
- 大数据实战项目
- 计算机毕业设计选题
- 💕💕文末获取源码联系计算机编程果茶熊
二、系统介绍
- 大数据框架:Hadoop+Spark(Hive需要定制修改)
- 开发语言:Java+Python(两个版本都支持)
- 数据库:MySQL
- 后端框架:SpringBoot(Spring+SpringMVC+Mybatis)+Django(两个版本都支持)
- 前端:Vue+Echarts+HTML+CSS+JavaScript+jQuery
《基于大数据的北京市医保药品数据分析系统》是一个融合现代大数据技术与医保领域实际需求的综合性分析平台。该系统采用Hadoop分布式存储架构作为底层数据管理基础,结合Spark大数据处理框架实现海量医保药品数据的高效计算和分析。系统前端采用Vue框架配合ElementUI组件库构建用户交互界面,通过Echarts图表库实现数据的可视化展示,后端基于Spring Boot框架提供稳定的API服务支撑。核心功能涵盖药品核心属性分析、数据挖掘分析、生产厂家统计分析、医保报销策略研究以及中药及颗粒专项分析等多个维度。系统通过Spark SQL进行复杂查询处理,利用Pandas和NumPy进行数据科学计算,实现对北京市医保药品数据的深度挖掘和智能分析。整个系统不仅具备传统的用户管理和系统管理功能,更突出大数据技术在医保数据处理中的应用价值,为相关决策提供数据支撑和分析依据。
三、基于大数据的北京市医保药品数据分析系统-视频解说
计算机毕设大数据方向:基于北京医保药品数据分析系统技术详解|数据可视化
四、基于大数据的北京市医保药品数据分析系统-功能展示
五、基于大数据的北京市医保药品数据分析系统-代码展示
from pyspark.sql import SparkSession
from pyspark.sql.functions import *
import pandas as pd
import numpy as np
from django.http import JsonResponse
from django.views.decorators.csrf import csrf_exempt
import json
spark = SparkSession.builder.appName("MedicalInsuranceAnalysis").config("spark.sql.adaptive.enabled", "true").getOrCreate()
def drug_core_analysis(request):
if request.method == 'POST':
data = json.loads(request.body)
analysis_type = data.get('analysis_type')
time_range = data.get('time_range', '2024')
drug_df = spark.read.format("jdbc").option("url", "jdbc:mysql://localhost:3306/medical_db").option("dbtable", "drug_info").option("user", "root").option("password", "password").load()
usage_df = spark.read.format("jdbc").option("url", "jdbc:mysql://localhost:3306/medical_db").option("dbtable", "drug_usage").option("user", "root").option("password", "password").load()
combined_df = drug_df.join(usage_df, "drug_id")
if analysis_type == 'frequency':
result_df = combined_df.groupBy("drug_name", "drug_category").agg(sum("usage_count").alias("total_usage"), avg("unit_price").alias("avg_price"), count("patient_id").alias("patient_count")).orderBy(desc("total_usage"))
top_drugs = result_df.limit(20).collect()
analysis_result = []
for row in top_drugs:
cost_effectiveness = float(row.patient_count) / float(row.avg_price) if row.avg_price > 0 else 0
usage_trend = self.calculate_usage_trend(row.drug_name, time_range)
analysis_result.append({
'drug_name': row.drug_name,
'category': row.drug_category,
'total_usage': int(row.total_usage),
'avg_price': round(float(row.avg_price), 2),
'patient_count': int(row.patient_count),
'cost_effectiveness': round(cost_effectiveness, 4),
'usage_trend': usage_trend
})
elif analysis_type == 'price_distribution':
price_ranges = combined_df.withColumn("price_range", when(col("unit_price") < 10, "低价药品").when(col("unit_price") < 50, "中价药品").when(col("unit_price") < 200, "高价药品").otherwise("超高价药品"))
distribution_result = price_ranges.groupBy("price_range", "drug_category").agg(count("drug_id").alias("drug_count"), avg("unit_price").alias("avg_price"), sum("usage_count").alias("total_usage")).collect()
analysis_result = [{'price_range': row.price_range, 'category': row.drug_category, 'drug_count': int(row.drug_count), 'avg_price': round(float(row.avg_price), 2), 'total_usage': int(row.total_usage)} for row in distribution_result]
return JsonResponse({'status': 'success', 'data': analysis_result, 'total_count': len(analysis_result)})
return JsonResponse({'status': 'error', 'message': '请求方法错误'})
def drug_data_mining(request):
if request.method == 'POST':
data = json.loads(request.body)
mining_algorithm = data.get('algorithm', 'association')
confidence_threshold = float(data.get('confidence', 0.6))
support_threshold = float(data.get('support', 0.1))
prescription_df = spark.read.format("jdbc").option("url", "jdbc:mysql://localhost:3306/medical_db").option("dbtable", "prescription_details").option("user", "root").option("password", "password").load()
patient_df = spark.read.format("jdbc").option("url", "jdbc:mysql://localhost:3306/medical_db").option("dbtable", "patient_info").option("user", "root").option("password", "password").load()
combined_data = prescription_df.join(patient_df, "patient_id")
if mining_algorithm == 'association':
drug_combinations = combined_data.groupBy("prescription_id").agg(collect_list("drug_name").alias("drug_list"))
frequent_patterns = self.find_frequent_itemsets(drug_combinations, support_threshold)
association_rules = []
for pattern in frequent_patterns:
if len(pattern['items']) >= 2:
for i in range(len(pattern['items'])):
antecedent = pattern['items'][:i] + pattern['items'][i+1:]
consequent = [pattern['items'][i]]
confidence = self.calculate_confidence(antecedent, consequent, drug_combinations)
lift = self.calculate_lift(antecedent, consequent, drug_combinations)
if confidence >= confidence_threshold:
association_rules.append({
'antecedent': antecedent,
'consequent': consequent,
'support': pattern['support'],
'confidence': round(confidence, 4),
'lift': round(lift, 4)
})
mining_result = sorted(association_rules, key=lambda x: x['confidence'], reverse=True)[:50]
elif mining_algorithm == 'clustering':
feature_df = combined_data.groupBy("drug_name").agg(avg("patient_age").alias("avg_age"), avg("usage_duration").alias("avg_duration"), sum("dosage_amount").alias("total_dosage"), count("patient_id").alias("patient_count"))
features_pandas = feature_df.toPandas()
from sklearn.cluster import KMeans
feature_matrix = features_pandas[['avg_age', 'avg_duration', 'total_dosage', 'patient_count']].values
scaler = StandardScaler()
normalized_features = scaler.fit_transform(feature_matrix)
kmeans = KMeans(n_clusters=5, random_state=42)
cluster_labels = kmeans.fit_predict(normalized_features)
features_pandas['cluster'] = cluster_labels
mining_result = []
for cluster_id in range(5):
cluster_drugs = features_pandas[features_pandas['cluster'] == cluster_id]
cluster_center = kmeans.cluster_centers_[cluster_id]
mining_result.append({
'cluster_id': int(cluster_id),
'drug_count': len(cluster_drugs),
'representative_drugs': cluster_drugs['drug_name'].head(10).tolist(),
'avg_patient_age': round(float(cluster_drugs['avg_age'].mean()), 2),
'avg_usage_duration': round(float(cluster_drugs['avg_duration'].mean()), 2),
'cluster_characteristics': self.analyze_cluster_characteristics(cluster_center, scaler)
})
return JsonResponse({'status': 'success', 'data': mining_result, 'algorithm': mining_algorithm})
return JsonResponse({'status': 'error', 'message': '请求方法错误'})
def medical_insurance_strategy_analysis(request):
if request.method == 'POST':
data = json.loads(request.body)
analysis_dimension = data.get('dimension', 'reimbursement_rate')
time_period = data.get('time_period', '2024')
category_filter = data.get('category', 'all')
reimbursement_df = spark.read.format("jdbc").option("url", "jdbc:mysql://localhost:3306/medical_db").option("dbtable", "reimbursement_records").option("user", "root").option("password", "password").load()
drug_info_df = spark.read.format("jdbc").option("url", "jdbc:mysql://localhost:3306/medical_db").option("dbtable", "drug_info").option("user", "root").option("password", "password").load()
policy_df = spark.read.format("jdbc").option("url", "jdbc:mysql://localhost:3306/medical_db").option("dbtable", "insurance_policy").option("user", "root").option("password", "password").load()
comprehensive_data = reimbursement_df.join(drug_info_df, "drug_id").join(policy_df, "policy_id")
if category_filter != 'all':
comprehensive_data = comprehensive_data.filter(col("drug_category") == category_filter)
if analysis_dimension == 'reimbursement_rate':
rate_analysis = comprehensive_data.groupBy("drug_category", "reimbursement_type").agg(avg("reimbursement_ratio").alias("avg_ratio"), sum("reimbursed_amount").alias("total_reimbursed"), sum("patient_payment").alias("total_patient_cost"), count("record_id").alias("claim_count"))
strategy_recommendations = []
for row in rate_analysis.collect():
cost_burden_ratio = float(row.total_patient_cost) / (float(row.total_reimbursed) + float(row.total_patient_cost))
utilization_efficiency = float(row.claim_count) / float(row.total_reimbursed) * 1000
risk_assessment = self.assess_reimbursement_risk(row.drug_category, row.avg_ratio)
optimal_ratio = self.calculate_optimal_ratio(cost_burden_ratio, utilization_efficiency, risk_assessment)
strategy_recommendations.append({
'category': row.drug_category,
'reimbursement_type': row.reimbursement_type,
'current_avg_ratio': round(float(row.avg_ratio), 4),
'total_reimbursed': round(float(row.total_reimbursed), 2),
'patient_cost_burden': round(cost_burden_ratio, 4),
'utilization_efficiency': round(utilization_efficiency, 4),
'recommended_ratio': round(optimal_ratio, 4),
'potential_savings': round((float(row.avg_ratio) - optimal_ratio) * float(row.total_reimbursed), 2),
'risk_level': risk_assessment
})
elif analysis_dimension == 'cost_effectiveness':
cost_effectiveness_analysis = comprehensive_data.withColumn("cost_per_patient", col("total_cost") / col("patient_count")).withColumn("effectiveness_score", col("treatment_success_rate") * col("patient_satisfaction") / 100)
effectiveness_result = cost_effectiveness_analysis.groupBy("drug_name", "drug_category").agg(avg("cost_per_patient").alias("avg_cost_per_patient"), avg("effectiveness_score").alias("avg_effectiveness"), sum("reimbursed_amount").alias("total_insurance_cost"), count("patient_id").alias("treated_patients")).orderBy(desc("avg_effectiveness"))
strategy_recommendations = []
for row in effectiveness_result.collect():
cost_effectiveness_ratio = float(row.avg_effectiveness) / float(row.avg_cost_per_patient) if row.avg_cost_per_patient > 0 else 0
insurance_value = float(row.total_insurance_cost) / float(row.treated_patients) if row.treated_patients > 0 else 0
priority_score = cost_effectiveness_ratio * 0.6 + (1 / insurance_value * 1000) * 0.4 if insurance_value > 0 else cost_effectiveness_ratio
strategy_recommendations.append({
'drug_name': row.drug_name,
'category': row.drug_category,
'avg_cost_per_patient': round(float(row.avg_cost_per_patient), 2),
'effectiveness_score': round(float(row.avg_effectiveness), 4),
'cost_effectiveness_ratio': round(cost_effectiveness_ratio, 6),
'insurance_cost_per_patient': round(insurance_value, 2),
'priority_score': round(priority_score, 6),
'treatment_volume': int(row.treated_patients),
'strategy_recommendation': self.generate_strategy_recommendation(cost_effectiveness_ratio, insurance_value, priority_score)
})
strategy_recommendations = sorted(strategy_recommendations, key=lambda x: x['priority_score'], reverse=True)[:30]
return JsonResponse({'status': 'success', 'data': strategy_recommendations, 'analysis_dimension': analysis_dimension, 'total_strategies': len(strategy_recommendations)})
return JsonResponse({'status': 'error', 'message': '请求方法错误'})
六、基于大数据的北京市医保药品数据分析系统-文档展示
七、END
- 💛💛想说的话:感谢大家的关注与支持!
- 💜💜
- 网站实战项目
- 安卓/小程序实战项目
- 大数据实战项目
- 计算机毕业设计选题
- 💕💕文末获取源码联系计算机编程果茶熊