前言
- 💖💖作者:计算机程序员小杨
- 💙💙个人简介:我是一名计算机相关专业的从业者,擅长Java、微信小程序、Python、Golang、安卓Android等多个IT方向。会做一些项目定制化开发、代码讲解、答辩教学、文档编写、也懂一些降重方面的技巧。热爱技术,喜欢钻研新工具和框架,也乐于通过代码解决实际问题,大家有技术代码这一块的问题可以问我!
- 💛💛想说的话:感谢大家的关注与支持!
- 💕💕文末获取源码联系 计算机程序员小杨
- 💜💜
- 网站实战项目
- 安卓/小程序实战项目
- 大数据实战项目
- 深度学习实战项目
- 计算机毕业设计选题
- 💜💜
一.开发工具简介
- 大数据框架:Hadoop+Spark(本次没用Hive,支持定制)
- 开发语言:Python+Java(两个版本都支持)
- 后端框架:Django+Spring Boot(Spring+SpringMVC+Mybatis)(两个版本都支持)
- 前端:Vue+ElementUI+Echarts+HTML+CSS+JavaScript+jQuery
- 详细技术点:Hadoop、HDFS、Spark、Spark SQL、Pandas、NumPy
- 数据库:MySQL
二.系统内容简介
基于大数据的校园霸凌数据可视化分析系统是一套采用先进大数据技术栈构建的综合性数据分析平台,该系统充分运用Hadoop分布式存储框架和Spark大数据计算引擎作为核心技术架构,通过HDFS实现海量校园霸凌相关数据的分布式存储,利用Spark SQL进行高效的数据查询和分析处理,结合Pandas和NumPy等Python数据科学库进行深度数据挖掘。系统支持Python+Django和Java+Spring Boot双技术栈开发模式,前端采用Vue+ElementUI构建现代化用户界面,通过Echarts实现丰富的数据可视化展示效果。系统功能涵盖完整的校园霸凌数据分析链条,包含系统首页总览、个人中心管理、用户权限管理、霸凌状态深度分析、霸凌影响因素评估、霸凌人群统计分布分析、体重与霸凌关联性分析等核心模块,同时配备系统管理功能确保平台稳定运行,特别设计的可视化大屏展示模块能够直观呈现多维度分析结果。整套系统基于MySQL数据库进行数据持久化存储,通过大数据技术的深度应用,能够处理大规模校园霸凌相关数据集,为教育管理者和研究人员提供科学的决策支持和数据洞察。
三.系统功能演示
选题纠结症?毕设没方向?这套基于大数据的校园霸凌数据可视化分析系统说不定能帮你逃脱困境!
四.系统界面展示
五.系统源码展示
# 霸凌状态分析核心功能
def analyze_bullying_status(request):
# 从Spark获取霸凌数据进行状态分析
spark_data = spark.sql("SELECT * FROM bullying_records WHERE status IS NOT NULL")
bullying_df = spark_data.toPandas()
# 按状态类型统计分布情况
status_counts = bullying_df['status'].value_counts()
status_distribution = {
'physical': status_counts.get('physical', 0),
'verbal': status_counts.get('verbal', 0),
'social': status_counts.get('social', 0),
'cyber': status_counts.get('cyber', 0)
}
# 计算各状态的严重程度评分
severity_mapping = {'mild': 1, 'moderate': 2, 'severe': 3, 'extreme': 4}
bullying_df['severity_score'] = bullying_df['severity'].map(severity_mapping)
avg_severity_by_status = bullying_df.groupby('status')['severity_score'].mean().to_dict()
# 分析时间趋势变化
bullying_df['report_date'] = pd.to_datetime(bullying_df['report_date'])
monthly_trends = bullying_df.groupby([bullying_df['report_date'].dt.to_period('M'), 'status']).size().unstack(fill_value=0)
trend_data = monthly_trends.to_dict('index')
# 计算状态转换概率矩阵
transition_matrix = np.zeros((4, 4))
status_mapping = {'physical': 0, 'verbal': 1, 'social': 2, 'cyber': 3}
for student_id in bullying_df['student_id'].unique():
student_records = bullying_df[bullying_df['student_id'] == student_id].sort_values('report_date')
for i in range(len(student_records) - 1):
current_status = status_mapping[student_records.iloc[i]['status']]
next_status = status_mapping[student_records.iloc[i + 1]['status']]
transition_matrix[current_status][next_status] += 1
# 归一化转换矩阵
row_sums = transition_matrix.sum(axis=1)
transition_matrix = transition_matrix / row_sums[:, np.newaxis]
transition_matrix = np.nan_to_num(transition_matrix)
# 预测未来状态发展趋势
current_state_vector = np.array([status_distribution['physical'], status_distribution['verbal'],
status_distribution['social'], status_distribution['cyber']])
current_state_vector = current_state_vector / current_state_vector.sum()
future_prediction = np.dot(current_state_vector, transition_matrix)
analysis_result = {
'status_distribution': status_distribution,
'severity_scores': avg_severity_by_status,
'monthly_trends': {str(k): v for k, v in trend_data.items()},
'transition_probabilities': transition_matrix.tolist(),
'future_prediction': future_prediction.tolist()
}
return JsonResponse(analysis_result)
# 霸凌人群统计分析核心功能
def analyze_bullying_demographics(request):
# 使用Spark SQL进行大数据人群统计分析
demographics_query = """
SELECT age, gender, grade, family_income, academic_performance,
bullying_type, frequency, duration
FROM bullying_demographics
WHERE record_date >= date_sub(current_date(), 365)
"""
demo_spark_df = spark.sql(demographics_query)
demo_df = demo_spark_df.toPandas()
# 年龄分布分析
age_groups = pd.cut(demo_df['age'], bins=[0, 12, 15, 18, 21], labels=['12以下', '12-15', '15-18', '18以上'])
age_bullying_stats = demo_df.groupby([age_groups, 'bullying_type']).size().unstack(fill_value=0)
age_analysis = age_bullying_stats.div(age_bullying_stats.sum(axis=1), axis=0).fillna(0)
# 性别差异分析
gender_analysis = demo_df.groupby(['gender', 'bullying_type']).size().unstack(fill_value=0)
gender_ratios = gender_analysis.div(gender_analysis.sum(axis=0), axis=1).fillna(0)
# 学业表现与霸凌关联分析
performance_mapping = {'excellent': 4, 'good': 3, 'average': 2, 'poor': 1}
demo_df['performance_score'] = demo_df['academic_performance'].map(performance_mapping)
performance_correlation = demo_df.groupby('bullying_type')['performance_score'].mean()
# 家庭经济状况影响分析
income_groups = pd.cut(demo_df['family_income'], bins=[0, 50000, 100000, 200000, float('inf')],
labels=['低收入', '中低收入', '中高收入', '高收入'])
income_bullying_correlation = demo_df.groupby([income_groups, 'bullying_type']).size().unstack(fill_value=0)
income_normalized = income_bullying_correlation.div(income_bullying_correlation.sum(axis=1), axis=0)
# 霸凌频次与持续时间分析
frequency_duration_analysis = demo_df.groupby('bullying_type').agg({
'frequency': ['mean', 'std', 'median'],
'duration': ['mean', 'std', 'median']
}).round(2)
# 计算风险指数
risk_factors = demo_df.groupby(['age_groups', 'gender', 'academic_performance']).size()
total_students = len(demo_df)
risk_index = (risk_factors / total_students * 100).round(2)
# 构建人群特征画像
high_risk_profiles = demo_df[demo_df['frequency'] > demo_df['frequency'].quantile(0.75)]
profile_characteristics = {
'age_distribution': high_risk_profiles['age'].value_counts().to_dict(),
'gender_ratio': high_risk_profiles['gender'].value_counts(normalize=True).to_dict(),
'common_types': high_risk_profiles['bullying_type'].value_counts().head(3).to_dict()
}
demographics_result = {
'age_analysis': age_analysis.to_dict(),
'gender_differences': gender_ratios.to_dict(),
'performance_impact': performance_correlation.to_dict(),
'income_correlation': income_normalized.fillna(0).to_dict(),
'frequency_duration_stats': frequency_duration_analysis.to_dict(),
'risk_index': risk_index.to_dict(),
'high_risk_profiles': profile_characteristics
}
return JsonResponse(demographics_result)
# 可视化大屏展示核心功能
def generate_dashboard_data(request):
# 实时获取大屏展示所需的综合数据
current_time = datetime.now()
time_range_query = f"""
SELECT * FROM bullying_comprehensive_view
WHERE update_time >= '{current_time - timedelta(days=30)}'
"""
dashboard_spark_df = spark.sql(time_range_query)
dashboard_df = dashboard_spark_df.toPandas()
# 实时统计核心指标
total_incidents = len(dashboard_df)
resolved_incidents = len(dashboard_df[dashboard_df['status'] == 'resolved'])
pending_incidents = len(dashboard_df[dashboard_df['status'] == 'pending'])
resolution_rate = (resolved_incidents / total_incidents * 100) if total_incidents > 0 else 0
# 地理分布热力图数据
geographic_distribution = dashboard_df.groupby(['school_district', 'school_name']).agg({
'incident_id': 'count',
'severity_score': 'mean'
}).reset_index()
geographic_data = geographic_distribution.to_dict('records')
# 时间序列趋势数据
dashboard_df['incident_date'] = pd.to_datetime(dashboard_df['incident_date'])
daily_incidents = dashboard_df.groupby(dashboard_df['incident_date'].dt.date).size()
weekly_trend = dashboard_df.groupby(dashboard_df['incident_date'].dt.isocalendar().week).size()
trend_data = {
'daily': daily_incidents.tail(30).to_dict(),
'weekly': weekly_trend.tail(12).to_dict()
}
# 类型分布饼图数据
type_distribution = dashboard_df['bullying_type'].value_counts()
type_percentages = (type_distribution / type_distribution.sum() * 100).round(1)
pie_chart_data = [{'name': k, 'value': v} for k, v in type_percentages.items()]
# 严重程度雷达图数据
severity_by_type = dashboard_df.groupby('bullying_type')['severity_score'].mean()
radar_data = [{'type': k, 'severity': round(v, 2)} for k, v in severity_by_type.items()]
# 受影响人群分析
age_gender_analysis = dashboard_df.groupby(['age_group', 'gender']).size().unstack(fill_value=0)
affected_demographics = age_gender_analysis.to_dict()
# 预警指标计算
recent_incidents = dashboard_df[dashboard_df['incident_date'] >= current_time - timedelta(days=7)]
weekly_increase_rate = len(recent_incidents) / (len(dashboard_df) / 4) if len(dashboard_df) > 0 else 0
severity_trend = recent_incidents['severity_score'].mean() if len(recent_incidents) > 0 else 0
alert_level = 'high' if weekly_increase_rate > 1.2 or severity_trend > 2.5 else 'normal'
# 干预效果评估
intervention_data = dashboard_df[dashboard_df['intervention_applied'] == True]
intervention_success_rate = len(intervention_data[intervention_data['status'] == 'resolved']) / len(intervention_data) * 100 if len(intervention_data) > 0 else 0
# 学校排名数据
school_rankings = dashboard_df.groupby('school_name').agg({
'incident_id': 'count',
'severity_score': 'mean',
'resolution_time': 'mean'
}).round(2)
school_rankings['composite_score'] = (school_rankings['severity_score'] * 0.4 +
school_rankings['resolution_time'] * 0.3 +
school_rankings['incident_id'] * 0.3)
top_schools = school_rankings.nsmallest(10, 'composite_score').to_dict('index')
dashboard_result = {
'key_metrics': {
'total_incidents': total_incidents,
'resolution_rate': round(resolution_rate, 1),
'pending_cases': pending_incidents,
'alert_level': alert_level
},
'geographic_heatmap': geographic_data,
'time_trends': trend_data,
'type_distribution': pie_chart_data,
'severity_radar': radar_data,
'demographics': affected_demographics,
'intervention_effectiveness': round(intervention_success_rate, 1),
'school_rankings': top_schools,
'last_updated': current_time.isoformat()
}
return JsonResponse(dashboard_result)