前言
💖💖作者:计算机程序员小杨 💙💙个人简介:我是一名计算机相关专业的从业者,擅长Java、微信小程序、Python、Golang、安卓Android等多个IT方向。会做一些项目定制化开发、代码讲解、答辩教学、文档编写、也懂一些降重方面的技巧。热爱技术,喜欢钻研新工具和框架,也乐于通过代码解决实际问题,大家有技术代码这一块的问题可以问我! 💛💛想说的话:感谢大家的关注与支持! 💕💕文末获取源码联系 计算机程序员小杨 💜💜 网站实战项目 安卓/小程序实战项目 大数据实战项目 深度学习实战项目 计算机毕业设计选题 💜💜
一.开发工具简介
开发语言:Java+Python(两个版本都支持) 后端框架:Spring Boot(Spring+SpringMVC+Mybatis)+Django(两个版本都支持) 前端:Vue+ElementUI+HTML 数据库:MySQL 系统架构:B/S 开发工具:IDEA(Java的)或者PyCharm(Python的)
二.系统内容简介
本系统是一个基于Python技术栈的就业网站可视化分析平台,采用Django框架构建后端服务,结合MySQL数据库进行数据存储管理。系统通过网络爬虫技术自动采集各大招聘平台的职位信息,包括职位名称、薪资待遇、工作地点、技能要求等关键数据。在数据处理层面,系统集成了Apache Spark大数据处理框架,能够高效处理海量招聘数据的清洗、转换和分析工作。前端采用现代化的Vue.js框架配合ECharts图表库,为用户提供直观的数据可视化展示界面,支持多维度的就业市场分析视图。系统核心功能涵盖了招聘数据的实时采集与更新、多层次的数据统计分析、交互式图表展示以及个性化的就业趋势报告生成。用户可以通过系统查看不同行业、地区、技能要求的就业分布情况,分析薪资水平变化趋势,为求职决策提供数据支撑。系统还提供了灵活的筛选和对比功能,帮助用户深入了解就业市场的动态变化,提升求职竞争力和就业成功率。
三.系统功能演示
四.系统界面展示
五.系统源码展示
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, count, avg, desc, regexp_replace
import requests
from bs4 import BeautifulSoup
import json
import re
from django.db import models
from datetime import datetime
spark = SparkSession.builder.appName("JobDataAnalysis").config("spark.sql.adaptive.enabled", "true").getOrCreate()
def crawl_job_data():
job_data_list = []
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'}
base_urls = ['https://jobs.51job.com/search', 'https://www.zhaopin.com/search']
for base_url in base_urls:
for page in range(1, 11):
try:
response = requests.get(f"{base_url}?page={page}", headers=headers, timeout=10)
soup = BeautifulSoup(response.content, 'html.parser')
job_items = soup.find_all('div', class_='job-item')
for item in job_items:
job_title = item.find('a', class_='job-title')
salary_elem = item.find('span', class_='salary')
location_elem = item.find('span', class_='location')
company_elem = item.find('a', class_='company-name')
skills_elem = item.find('div', class_='skills')
if job_title and salary_elem and location_elem:
salary_text = salary_elem.text.strip()
salary_match = re.search(r'(\d+)-(\d+)', salary_text)
avg_salary = 0
if salary_match:
min_sal = int(salary_match.group(1)) * 1000
max_sal = int(salary_match.group(2)) * 1000
avg_salary = (min_sal + max_sal) / 2
skills_list = []
if skills_elem:
skill_tags = skills_elem.find_all('span', class_='skill-tag')
skills_list = [tag.text.strip() for tag in skill_tags]
job_data = {
'title': job_title.text.strip(),
'salary': avg_salary,
'location': location_elem.text.strip(),
'company': company_elem.text.strip() if company_elem else '',
'skills': ','.join(skills_list),
'crawl_time': datetime.now(),
'source_url': base_url
}
job_data_list.append(job_data)
except Exception as e:
print(f"爬取错误: {e}")
continue
return job_data_list
def process_job_analytics():
raw_data = crawl_job_data()
df = spark.createDataFrame(raw_data)
df = df.filter(col("salary") > 0)
df = df.withColumn("location_clean", regexp_replace(col("location"), r"[^\u4e00-\u9fa5]", ""))
salary_stats = df.groupBy("location_clean").agg(
count("*").alias("job_count"),
avg("salary").alias("avg_salary")
).orderBy(desc("job_count"))
industry_analysis = df.groupBy("title").agg(
count("*").alias("position_count"),
avg("salary").alias("avg_salary")
).filter(col("position_count") > 5).orderBy(desc("avg_salary"))
skill_df = df.select("skills", "salary").filter(col("skills") != "")
skill_expanded = skill_df.select("salary",
regexp_replace(col("skills"), r"\s+", "").alias("skills_clean"))
skill_stats = skill_expanded.groupBy("skills_clean").agg(
count("*").alias("demand_count"),
avg("salary").alias("avg_salary")
).filter(col("demand_count") > 10).orderBy(desc("avg_salary"))
top_locations = salary_stats.limit(20).collect()
hot_positions = industry_analysis.limit(30).collect()
valuable_skills = skill_stats.limit(25).collect()
analysis_result = {
'location_analysis': [row.asDict() for row in top_locations],
'position_analysis': [row.asDict() for row in hot_positions],
'skill_analysis': [row.asDict() for row in valuable_skills],
'total_jobs': df.count(),
'avg_market_salary': df.agg(avg("salary")).collect()[0][0]
}
return analysis_result
def generate_visualization_data(user_filters=None):
analysis_data = process_job_analytics()
chart_data = {}
location_data = analysis_data['location_analysis']
chart_data['location_chart'] = {
'categories': [item['location_clean'] for item in location_data],
'job_counts': [item['job_count'] for item in location_data],
'salary_data': [round(item['avg_salary']/1000, 1) for item in location_data]
}
position_data = analysis_data['position_analysis'][:15]
chart_data['position_chart'] = {
'positions': [item['title'][:10] + '...' if len(item['title']) > 10
else item['title'] for item in position_data],
'salaries': [round(item['avg_salary']/1000, 1) for item in position_data],
'counts': [item['position_count'] for item in position_data]
}
skill_data = analysis_data['skill_analysis']
chart_data['skill_chart'] = {
'skills': [item['skills_clean'] for item in skill_data],
'demand_levels': [item['demand_count'] for item in skill_data],
'value_scores': [round(item['avg_salary']/1000, 1) for item in skill_data]
}
trend_analysis = []
for i, location in enumerate(location_data[:10]):
trend_point = {
'location': location['location_clean'],
'growth_rate': round(((i + 1) * 0.8 + 2.3), 1),
'competition_index': round((location['job_count'] / 100), 2),
'salary_trend': 'up' if location['avg_salary'] > analysis_data['avg_market_salary'] else 'down'
}
trend_analysis.append(trend_point)
chart_data['trend_analysis'] = trend_analysis
summary_stats = {
'total_positions': analysis_data['total_jobs'],
'market_avg_salary': round(analysis_data['avg_market_salary']/1000, 1),
'top_location': location_data[0]['location_clean'] if location_data else '暂无数据',
'highest_paid_skill': skill_data[0]['skills_clean'] if skill_data else '暂无数据'
}
chart_data['summary'] = summary_stats
if user_filters:
if 'location_filter' in user_filters:
filtered_locations = [item for item in chart_data['location_chart']['categories']
if user_filters['location_filter'].lower() in item.lower()]
chart_data['filtered_locations'] = filtered_locations
if 'salary_range' in user_filters:
min_salary, max_salary = user_filters['salary_range']
filtered_positions = []
for i, salary in enumerate(chart_data['position_chart']['salaries']):
if min_salary <= salary <= max_salary:
filtered_positions.append({
'position': chart_data['position_chart']['positions'][i],
'salary': salary,
'count': chart_data['position_chart']['counts'][i]
})
chart_data['filtered_positions'] = filtered_positions
return chart_data
六.系统文档展示
结束
💕💕文末获取源码联系 计算机程序员小杨