前言
- 💖💖作者:计算机程序员小杨
- 💙💙个人简介:我是一名计算机相关专业的从业者,擅长Java、微信小程序、Python、Golang、安卓Android等多个IT方向。会做一些项目定制化开发、代码讲解、答辩教学、文档编写、也懂一些降重方面的技巧。热爱技术,喜欢钻研新工具和框架,也乐于通过代码解决实际问题,大家有技术代码这一块的问题可以问我!
- 💛💛想说的话:感谢大家的关注与支持!
- 💕💕文末获取源码联系 计算机程序员小杨
- 💜💜
- 网站实战项目
- 安卓/小程序实战项目
- 大数据实战项目
- 深度学习实战项目
- 计算机毕业设计选题
- 💜💜
一.开发工具简介
- 大数据框架:Hadoop+Spark(本次没用Hive,支持定制)
- 开发语言:Python+Java(两个版本都支持)
- 后端框架:Django+Spring Boot(Spring+SpringMVC+Mybatis)(两个版本都支持)
- 前端:Vue+ElementUI+Echarts+HTML+CSS+JavaScript+jQuery
- 详细技术点:Hadoop、HDFS、Spark、Spark SQL、Pandas、NumPy
- 数据库:MySQL
二.系统内容简介
《基于大数据的学生考试表现影响因素数据可视化分析系统》是一套综合运用Hadoop分布式存储、Spark大数据处理和现代Web技术的教育数据分析平台。系统采用Python作为主要开发语言,结合Django后端框架构建稳定的服务架构,前端运用Vue+ElementUI+Echarts技术栈实现直观的数据可视化展示。系统核心功能涵盖学生考试表现影响因素管理、综合影响因素分析、教育资源环境分析、家庭背景因素分析、个人学习行为分析和社会健康因素分析等六大模块,通过HDFS分布式文件系统存储海量教育数据,利用Spark SQL和Pandas、NumPy等数据处理工具进行深度挖掘分析。系统能够从多维度解析影响学生考试成绩的关键因素,生成详细的数据报告和可视化图表,为教育管理者、教师和研究人员提供科学的决策支持,同时支持用户个人中心管理和系统权限控制,确保数据安全性和用户体验的完整性。
三.系统功能演示
大数据毕设展示现场:学生考试表现影响因素数据可视化分析系统惊艳全场|毕设开发
四.系统界面展示
五.系统源码展示
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, avg, sum, count, when, desc
from django.http import JsonResponse
from django.views import View
import pandas as pd
import numpy as np
from django.db import connection
import json
spark = SparkSession.builder.appName("StudentPerformanceAnalysis").config("spark.sql.adaptive.enabled", "true").config("spark.sql.adaptive.coalescePartitions.enabled", "true").getOrCreate()
class ComprehensiveAnalysisView(View):
def post(self, request):
data = json.loads(request.body)
student_ids = data.get('student_ids', [])
analysis_type = data.get('analysis_type', 'overall')
time_range = data.get('time_range', {})
query = """
SELECT s.student_id, s.student_name, s.grade_level, s.class_name,
e.subject, e.exam_score, e.exam_date, e.exam_type,
f.family_income, f.parent_education, f.family_structure,
l.study_hours, l.homework_completion, l.attendance_rate,
r.school_type, r.teacher_quality, r.facility_score
FROM students s
JOIN exam_results e ON s.student_id = e.student_id
JOIN family_background f ON s.student_id = f.student_id
JOIN learning_behavior l ON s.student_id = l.student_id
JOIN resource_environment r ON s.school_id = r.school_id
WHERE s.student_id IN %s AND e.exam_date BETWEEN %s AND %s
"""
with connection.cursor() as cursor:
cursor.execute(query, [tuple(student_ids), time_range.get('start'), time_range.get('end')])
raw_data = cursor.fetchall()
columns = ['student_id', 'student_name', 'grade_level', 'class_name', 'subject', 'exam_score', 'exam_date', 'exam_type', 'family_income', 'parent_education', 'family_structure', 'study_hours', 'homework_completion', 'attendance_rate', 'school_type', 'teacher_quality', 'facility_score']
df_pandas = pd.DataFrame(raw_data, columns=columns)
spark_df = spark.createDataFrame(df_pandas)
performance_by_income = spark_df.groupBy("family_income").agg(avg("exam_score").alias("avg_score"), count("*").alias("count")).orderBy(desc("avg_score"))
performance_by_education = spark_df.groupBy("parent_education").agg(avg("exam_score").alias("avg_score"), count("*").alias("count")).orderBy(desc("avg_score"))
performance_by_study_hours = spark_df.groupBy("study_hours").agg(avg("exam_score").alias("avg_score"), count("*").alias("count")).orderBy("study_hours")
correlation_matrix = df_pandas[['exam_score', 'family_income', 'study_hours', 'homework_completion', 'attendance_rate', 'teacher_quality', 'facility_score']].corr()
high_performers = spark_df.filter(col("exam_score") >= 85).groupBy("subject").agg(count("*").alias("high_count"))
low_performers = spark_df.filter(col("exam_score") < 60).groupBy("subject").agg(count("*").alias("low_count"))
subject_analysis = high_performers.join(low_performers, "subject", "outer").fillna(0)
risk_students = spark_df.filter((col("exam_score") < 70) & (col("attendance_rate") < 0.8)).select("student_id", "student_name", "exam_score", "attendance_rate").distinct()
improvement_suggestions = []
for row in risk_students.collect():
suggestion = f"学生{row['student_name']}需要提高出勤率和学习成绩"
improvement_suggestions.append({"student_id": row['student_id'], "suggestion": suggestion})
result = {
"income_analysis": [row.asDict() for row in performance_by_income.collect()],
"education_analysis": [row.asDict() for row in performance_by_education.collect()],
"study_hours_analysis": [row.asDict() for row in performance_by_study_hours.collect()],
"correlation_data": correlation_matrix.to_dict(),
"subject_performance": [row.asDict() for row in subject_analysis.collect()],
"risk_students": [row.asDict() for row in risk_students.collect()],
"improvement_suggestions": improvement_suggestions,
"total_analyzed": spark_df.count()
}
return JsonResponse(result)
class LearningBehaviorAnalysisView(View):
def post(self, request):
data = json.loads(request.body)
student_id = data.get('student_id')
analysis_period = data.get('period', 'semester')
query = """
SELECT lb.student_id, lb.study_hours, lb.homework_completion, lb.attendance_rate,
lb.participation_score, lb.self_study_time, lb.group_study_frequency,
e.exam_score, e.subject, e.exam_date, e.exam_type,
s.student_name, s.grade_level
FROM learning_behavior lb
JOIN exam_results e ON lb.student_id = e.student_id
JOIN students s ON lb.student_id = s.student_id
WHERE lb.student_id = %s
ORDER BY e.exam_date DESC
"""
with connection.cursor() as cursor:
cursor.execute(query, [student_id])
behavior_data = cursor.fetchall()
columns = ['student_id', 'study_hours', 'homework_completion', 'attendance_rate', 'participation_score', 'self_study_time', 'group_study_frequency', 'exam_score', 'subject', 'exam_date', 'exam_type', 'student_name', 'grade_level']
df_pandas = pd.DataFrame(behavior_data, columns=columns)
spark_df = spark.createDataFrame(df_pandas)
behavior_metrics = spark_df.agg(avg("study_hours").alias("avg_study_hours"), avg("homework_completion").alias("avg_homework_rate"), avg("attendance_rate").alias("avg_attendance"), avg("participation_score").alias("avg_participation")).collect()[0]
performance_trend = spark_df.groupBy("subject").agg(avg("exam_score").alias("avg_score"), count("*").alias("exam_count")).orderBy(desc("avg_score"))
study_efficiency = spark_df.withColumn("efficiency_score", col("exam_score") / col("study_hours")).filter(col("study_hours") > 0).agg(avg("efficiency_score").alias("avg_efficiency")).collect()[0]['avg_efficiency']
behavior_impact = df_pandas[['study_hours', 'homework_completion', 'attendance_rate', 'participation_score', 'exam_score']].corr()['exam_score'].to_dict()
weekly_pattern = spark_df.groupBy("exam_type").agg(avg("study_hours").alias("avg_hours"), avg("exam_score").alias("avg_score"))
improvement_areas = []
if behavior_metrics['avg_attendance'] < 0.85:
improvement_areas.append("出勤率需要提高")
if behavior_metrics['avg_homework_rate'] < 0.80:
improvement_areas.append("作业完成率有待改善")
if behavior_metrics['avg_study_hours'] < 4:
improvement_areas.append("学习时间需要增加")
study_recommendations = []
if study_efficiency and study_efficiency < 15:
study_recommendations.append("建议优化学习方法,提高学习效率")
if behavior_metrics['avg_participation'] < 70:
study_recommendations.append("建议增加课堂参与度")
behavior_score = (behavior_metrics['avg_attendance'] * 0.3 + behavior_metrics['avg_homework_rate'] * 0.3 + min(behavior_metrics['avg_study_hours']/8, 1) * 0.2 + behavior_metrics['avg_participation']/100 * 0.2) * 100
result = {
"student_id": student_id,
"behavior_metrics": behavior_metrics.asDict(),
"performance_by_subject": [row.asDict() for row in performance_trend.collect()],
"study_efficiency": study_efficiency,
"behavior_impact": behavior_impact,
"weekly_study_pattern": [row.asDict() for row in weekly_pattern.collect()],
"improvement_areas": improvement_areas,
"study_recommendations": study_recommendations,
"overall_behavior_score": round(behavior_score, 2)
}
return JsonResponse(result)
class EducationResourceAnalysisView(View):
def post(self, request):
data = json.loads(request.body)
region_id = data.get('region_id')
school_type = data.get('school_type', 'all')
query = """
SELECT r.school_id, r.school_name, r.school_type, r.teacher_quality,
r.facility_score, r.library_resources, r.lab_equipment, r.funding_level,
AVG(e.exam_score) as avg_school_score, COUNT(e.student_id) as student_count,
s.grade_level, s.class_name
FROM resource_environment r
JOIN students s ON r.school_id = s.school_id
JOIN exam_results e ON s.student_id = e.student_id
WHERE r.region_id = %s
GROUP BY r.school_id, r.school_name, r.school_type, r.teacher_quality, r.facility_score, r.library_resources, r.lab_equipment, r.funding_level, s.grade_level, s.class_name
"""
params = [region_id]
if school_type != 'all':
query += " HAVING r.school_type = %s"
params.append(school_type)
with connection.cursor() as cursor:
cursor.execute(query, params)
resource_data = cursor.fetchall()
columns = ['school_id', 'school_name', 'school_type', 'teacher_quality', 'facility_score', 'library_resources', 'lab_equipment', 'funding_level', 'avg_school_score', 'student_count', 'grade_level', 'class_name']
df_pandas = pd.DataFrame(resource_data, columns=columns)
spark_df = spark.createDataFrame(df_pandas)
resource_performance = spark_df.groupBy("school_type").agg(avg("avg_school_score").alias("type_avg_score"), avg("teacher_quality").alias("avg_teacher_quality"), avg("facility_score").alias("avg_facility_score"), sum("student_count").alias("total_students")).orderBy(desc("type_avg_score"))
teacher_impact = spark_df.groupBy("teacher_quality").agg(avg("avg_school_score").alias("score_by_teacher"), count("*").alias("school_count")).orderBy("teacher_quality")
facility_impact = spark_df.groupBy("facility_score").agg(avg("avg_school_score").alias("score_by_facility"), count("*").alias("school_count")).orderBy("facility_score")
resource_correlation = df_pandas[['teacher_quality', 'facility_score', 'library_resources', 'lab_equipment', 'funding_level', 'avg_school_score']].corr()['avg_school_score'].to_dict()
top_schools = spark_df.orderBy(desc("avg_school_score")).limit(10).select("school_name", "avg_school_score", "teacher_quality", "facility_score")
resource_gaps = spark_df.filter(col("avg_school_score") < 70).select("school_name", "avg_school_score", "teacher_quality", "facility_score", "funding_level")
funding_efficiency = spark_df.withColumn("efficiency_ratio", col("avg_school_score") / col("funding_level")).filter(col("funding_level") > 0).orderBy(desc("efficiency_ratio"))
improvement_priorities = []
low_performance_schools = spark_df.filter(col("avg_school_score") < 75).collect()
for school in low_performance_schools:
priorities = []
if school['teacher_quality'] < 70:
priorities.append("师资力量建设")
if school['facility_score'] < 60:
priorities.append("基础设施改善")
if school['library_resources'] < 50:
priorities.append("图书资源扩充")
improvement_priorities.append({"school_name": school['school_name'], "priorities": priorities})
result = {
"resource_performance_by_type": [row.asDict() for row in resource_performance.collect()],
"teacher_quality_impact": [row.asDict() for row in teacher_impact.collect()],
"facility_impact_analysis": [row.asDict() for row in facility_impact.collect()],
"resource_correlation": resource_correlation,
"top_performing_schools": [row.asDict() for row in top_schools.collect()],
"resource_gap_schools": [row.asDict() for row in resource_gaps.collect()],
"funding_efficiency_ranking": [row.asDict() for row in funding_efficiency.collect()],
"improvement_priorities": improvement_priorities,
"total_schools_analyzed": spark_df.select("school_id").distinct().count()
}
return JsonResponse(result)