加粗样式💖💖作者:计算机编程小咖 💙💙个人简介:曾长期从事计算机专业培训教学,本人也热爱上课教学,语言擅长Java、微信小程序、Python、Golang、安卓Android等,开发项目包括大数据、深度学习、网站、小程序、安卓、算法。平常会做一些项目定制化开发、代码讲解、答辩教学、文档编写、也懂一些降重方面的技巧。平常喜欢分享一些自己开发中遇到的问题的解决办法,也喜欢交流技术,大家有技术代码这一块的问题可以问我! 💛💛想说的话:感谢大家的关注与支持! 💜💜 网站实战项目 安卓/小程序实战项目 大数据实战项目 深度学习实战项目
@TOC
大学生就业因素数据分析系统介绍
基于大数据的大学生就业因素数据分析系统是一个运用现代大数据技术深度挖掘和分析大学生就业相关因素的综合性数据分析平台。该系统采用Hadoop分布式存储框架结合Spark大数据处理引擎作为核心技术架构,能够高效处理海量的学生就业相关数据,通过Spark SQL进行复杂的数据查询和统计分析,利用Pandas和NumPy进行精确的数据处理和科学计算。系统提供Python+Django和Java+SpringBoot两套完整的后端解决方案,前端采用Vue框架配合ElementUI组件库和Echarts可视化图表库,构建了直观美观的用户交互界面。系统功能涵盖用户管理、个人信息维护、就业因素信息管理等基础模块,更重要的是提供了大屏可视化展示、学生学业成就分析、就业多维因素分析、学生实践技能分析和学生综合画像分析等核心分析功能,能够从多个维度全面剖析影响大学生就业的各种因素,为教育管理者和学生个人提供数据驱动的决策支持。整个系统基于MySQL数据库进行数据存储,通过HDFS分布式文件系统实现大数据的可靠存储,充分发挥了大数据技术在教育数据分析领域的强大优势,为高校就业指导和学生职业规划提供了科学、准确、全面的数据分析工具。
大学生就业因素数据分析系统演示视频
大学生就业因素数据分析系统演示图片
大学生就业因素数据分析系统代码展示
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, count, avg, sum, when, desc, asc
from pyspark.ml.feature import VectorAssembler
from pyspark.ml.stat import Correlation
import pandas as pd
import numpy as np
from django.http import JsonResponse
from django.views.decorators.csrf import csrf_exempt
import json
spark = SparkSession.builder.appName("EmploymentFactorAnalysis").config("spark.executor.memory", "2g").config("spark.executor.cores", "2").getOrCreate()
def employment_multidimensional_analysis(request):
try:
student_df = spark.read.format("jdbc").option("url", "jdbc:mysql://localhost:3306/employment_db").option("driver", "com.mysql.cj.jdbc.Driver").option("dbtable", "student_info").option("user", "root").option("password", "123456").load()
employment_df = spark.read.format("jdbc").option("url", "jdbc://localhost:3306/employment_db").option("driver", "com.mysql.cj.jdbc.Driver").option("dbtable", "employment_factor").option("user", "root").option("password", "123456").load()
joined_df = student_df.join(employment_df, student_df.student_id == employment_df.student_id, "inner")
gpa_employment_rate = joined_df.groupBy("gpa_level").agg((count(when(col("employment_status") == "已就业", 1)) / count("*") * 100).alias("employment_rate"))
major_employment_stats = joined_df.groupBy("major").agg(count("*").alias("total_count"), count(when(col("employment_status") == "已就业", 1)).alias("employed_count"), avg("salary").alias("avg_salary"))
skill_impact_analysis = joined_df.groupBy("technical_skills_level").agg(avg("salary").alias("avg_salary"), (count(when(col("employment_status") == "已就业", 1)) / count("*") * 100).alias("employment_rate"))
internship_correlation = joined_df.groupBy("internship_experience").agg(avg("interview_count").alias("avg_interviews"), avg("salary").alias("avg_salary"), (count(when(col("employment_status") == "已就业", 1)) / count("*") * 100).alias("employment_rate"))
certificate_impact = joined_df.groupBy("certificate_count_level").agg(count("*").alias("student_count"), avg("salary").alias("avg_salary"), (count(when(col("employment_status") == "已就业", 1)) / count("*") * 100).alias("employment_rate"))
comprehensive_ranking = joined_df.select("student_id", "student_name", "gpa", "technical_skills_level", "internship_experience", "certificate_count", "employment_status", "salary").withColumn("comprehensive_score", col("gpa") * 0.3 + when(col("technical_skills_level") == "高", 30).when(col("technical_skills_level") == "中", 20).otherwise(10) + when(col("internship_experience") == "有", 25).otherwise(0) + col("certificate_count") * 5)
top_students = comprehensive_ranking.orderBy(desc("comprehensive_score")).limit(100)
correlation_features = ["gpa", "certificate_count", "internship_months", "project_count", "salary"]
assembler = VectorAssembler(inputCols=correlation_features[:-1], outputCol="features")
correlation_df = assembler.transform(joined_df.select(*correlation_features))
correlation_matrix = Correlation.corr(correlation_df, "features").head()[0].toArray()
result_data = {"gpa_employment": gpa_employment_rate.collect(), "major_stats": major_employment_stats.collect(), "skill_impact": skill_impact_analysis.collect(), "internship_effect": internship_correlation.collect(), "certificate_impact": certificate_impact.collect(), "top_students": top_students.collect(), "correlation_matrix": correlation_matrix.tolist()}
return JsonResponse({"status": "success", "data": result_data})
except Exception as e:
return JsonResponse({"status": "error", "message": str(e)})
def student_comprehensive_portrait_analysis(request):
try:
student_id = request.GET.get('student_id')
base_df = spark.read.format("jdbc").option("url", "jdbc:mysql://localhost:3306/employment_db").option("driver", "com.mysql.cj.jdbc.Driver").option("dbtable", "student_info").option("user", "root").option("password", "123456").load()
achievement_df = spark.read.format("jdbc").option("url", "jdbc:mysql://localhost:3306/employment_db").option("driver", "com.mysql.cj.jdbc.Driver").option("dbtable", "student_achievement").option("user", "root").option("password", "123456").load()
skill_df = spark.read.format("jdbc").option("url", "jdbc:mysql://localhost:3306/employment_db").option("driver", "com.mysql.cj.jdbc.Driver").option("dbtable", "practical_skills").option("user", "root").option("password", "123456").load()
target_student = base_df.filter(col("student_id") == student_id)
student_achievement = achievement_df.filter(col("student_id") == student_id)
student_skills = skill_df.filter(col("student_id") == student_id)
same_major_students = base_df.filter(col("major") == target_student.select("major").collect()[0][0])
major_avg_gpa = same_major_students.agg(avg("gpa").alias("avg_gpa")).collect()[0][0]
major_avg_skills = same_major_students.join(skill_df, "student_id").agg(avg("programming_score").alias("avg_programming"), avg("project_score").alias("avg_project"), avg("teamwork_score").alias("avg_teamwork")).collect()[0]
student_gpa_rank = same_major_students.filter(col("gpa") >= target_student.select("gpa").collect()[0][0]).count()
total_major_students = same_major_students.count()
gpa_percentile = (total_major_students - student_gpa_rank + 1) / total_major_students * 100
strength_analysis = []
current_skills = student_skills.collect()[0]
if current_skills["programming_score"] > major_avg_skills["avg_programming"]:
strength_analysis.append({"skill": "编程能力", "score": current_skills["programming_score"], "advantage": current_skills["programming_score"] - major_avg_skills["avg_programming"]})
if current_skills["project_score"] > major_avg_skills["avg_project"]:
strength_analysis.append({"skill": "项目经验", "score": current_skills["project_score"], "advantage": current_skills["project_score"] - major_avg_skills["avg_project"]})
if current_skills["teamwork_score"] > major_avg_skills["avg_teamwork"]:
strength_analysis.append({"skill": "团队协作", "score": current_skills["teamwork_score"], "advantage": current_skills["teamwork_score"] - major_avg_skills["avg_teamwork"]})
weakness_analysis = []
if current_skills["programming_score"] < major_avg_skills["avg_programming"]:
weakness_analysis.append({"skill": "编程能力", "score": current_skills["programming_score"], "gap": major_avg_skills["avg_programming"] - current_skills["programming_score"]})
if current_skills["project_score"] < major_avg_skills["avg_project"]:
weakness_analysis.append({"skill": "项目经验", "score": current_skills["project_score"], "gap": major_avg_skills["avg_project"] - current_skills["project_score"]})
comprehensive_score = (target_student.select("gpa").collect()[0][0] * 30 + current_skills["programming_score"] * 0.25 + current_skills["project_score"] * 0.25 + current_skills["teamwork_score"] * 0.2)
improvement_suggestions = []
if len(weakness_analysis) > 0:
weakness_analysis.sort(key=lambda x: x["gap"], reverse=True)
for weakness in weakness_analysis[:3]:
if weakness["skill"] == "编程能力":
improvement_suggestions.append("建议加强算法训练和代码实践,参与开源项目")
elif weakness["skill"] == "项目经验":
improvement_suggestions.append("建议主动参与实际项目开发,积累项目管理经验")
elif weakness["skill"] == "团队协作":
improvement_suggestions.append("建议参与团队项目,提升沟通协调能力")
portrait_result = {"student_info": target_student.collect()[0].asDict(), "gpa_ranking": {"rank": student_gpa_rank, "total": total_major_students, "percentile": gpa_percentile}, "major_comparison": {"major_avg_gpa": major_avg_gpa, "major_avg_skills": major_avg_skills}, "strengths": strength_analysis, "weaknesses": weakness_analysis, "comprehensive_score": comprehensive_score, "suggestions": improvement_suggestions}
return JsonResponse({"status": "success", "data": portrait_result})
except Exception as e:
return JsonResponse({"status": "error", "message": str(e)})
def dashboard_visualization_data(request):
try:
employment_df = spark.read.format("jdbc").option("url", "jdbc:mysql://localhost:3306/employment_db").option("driver", "com.mysql.cj.jdbc.Driver").option("dbtable", "employment_factor").option("user", "root").option("password", "123456").load()
student_df = spark.read.format("jdbc").option("url", "jdbc:mysql://localhost:3306/employment_db").option("driver", "com.mysql.cj.jdbc.Driver").option("dbtable", "student_info").option("user", "root").option("password", "123456").load()
skill_df = spark.read.format("jdbc").option("url", "jdbc:mysql://localhost:3306/employment_db").option("driver", "com.mysql.cj.jdbc.Driver").option("dbtable", "practical_skills").option("user", "root").option("password", "123456").load()
total_students = student_df.count()
employed_students = employment_df.filter(col("employment_status") == "已就业").count()
employment_rate = round(employed_students / total_students * 100, 2)
avg_salary = employment_df.filter(col("employment_status") == "已就业").agg(avg("salary").alias("avg_salary")).collect()[0][0]
major_distribution = student_df.groupBy("major").agg(count("*").alias("student_count")).orderBy(desc("student_count"))
salary_distribution = employment_df.filter(col("employment_status") == "已就业").groupBy(when(col("salary") < 5000, "5000以下").when((col("salary") >= 5000) & (col("salary") < 8000), "5000-8000").when((col("salary") >= 8000) & (col("salary") < 12000), "8000-12000").when((col("salary") >= 12000) & (col("salary") < 20000), "12000-20000").otherwise("20000以上").alias("salary_range")).agg(count("*").alias("count")).orderBy("salary_range")
monthly_employment_trend = employment_df.filter(col("employment_status") == "已就业").groupBy("employment_month").agg(count("*").alias("employment_count")).orderBy("employment_month")
skill_level_stats = skill_df.groupBy(when(col("programming_score") >= 90, "优秀").when((col("programming_score") >= 80) & (col("programming_score") < 90), "良好").when((col("programming_score") >= 70) & (col("programming_score") < 80), "中等").otherwise("待提升").alias("skill_level")).agg(count("*").alias("student_count"))
top_employers = employment_df.filter(col("employment_status") == "已就业").groupBy("company_name").agg(count("*").alias("hire_count")).orderBy(desc("hire_count")).limit(10)
gpa_employment_correlation = student_df.join(employment_df, "student_id").groupBy(when(col("gpa") >= 3.5, "优秀").when((col("gpa") >= 3.0) & (col("gpa") < 3.5), "良好").when((col("gpa") >= 2.5) & (col("gpa") < 3.0), "中等").otherwise("待提升").alias("gpa_level")).agg((count(when(col("employment_status") == "已就业", 1)) / count("*") * 100).alias("employment_rate")).orderBy("gpa_level")
internship_impact = employment_df.groupBy("internship_experience").agg(count("*").alias("total_count"), (count(when(col("employment_status") == "已就业", 1)) / count("*") * 100).alias("employment_ra_*_
大学生就业因素数据分析系统文档展示
💖💖作者:计算机编程小咖 💙💙个人简介:曾长期从事计算机专业培训教学,本人也热爱上课教学,语言擅长Java、微信小程序、Python、Golang、安卓Android等,开发项目包括大数据、深度学习、网站、小程序、安卓、算法。平常会做一些项目定制化开发、代码讲解、答辩教学、文档编写、也懂一些降重方面的技巧。平常喜欢分享一些自己开发中遇到的问题的解决办法,也喜欢交流技术,大家有技术代码这一块的问题可以问我! 💛💛想说的话:感谢大家的关注与支持! 💜💜 网站实战项目 安卓/小程序实战项目 大数据实战项目 深度学习实战项目