前言
💖💖作者:计算机程序员小杨 💙💙个人简介:我是一名计算机相关专业的从业者,擅长Java、微信小程序、Python、Golang、安卓Android等多个IT方向。会做一些项目定制化开发、代码讲解、答辩教学、文档编写、也懂一些降重方面的技巧。热爱技术,喜欢钻研新工具和框架,也乐于通过代码解决实际问题,大家有技术代码这一块的问题可以问我! 💛💛想说的话:感谢大家的关注与支持! 💕💕文末获取源码联系 计算机程序员小杨 💜💜 网站实战项目 安卓/小程序实战项目 大数据实战项目 深度学习实战项目 计算机毕业设计选题 💜💜
一.开发工具简介
大数据框架:Hadoop+Spark(本次没用Hive,支持定制) 开发语言:Python+Java(两个版本都支持) 后端框架:Django+Spring Boot(Spring+SpringMVC+Mybatis)(两个版本都支持) 前端:Vue+ElementUI+Echarts+HTML+CSS+JavaScript+jQuery 详细技术点:Hadoop、HDFS、Spark、Spark SQL、Pandas、NumPy 数据库:MySQL
二.系统内容简介
《历届奥运会数据可视化分析系统》是一个基于大数据技术的体育数据分析平台,采用Hadoop+Spark分布式计算框架对历届奥运会数据进行深度挖掘和智能分析。系统运用Python作为核心开发语言,结合Django后端框架构建稳定的服务架构,前端采用Vue+ElementUI+Echarts技术栈实现交互式数据可视化界面。通过HDFS分布式存储管理海量奥运数据,利用Spark SQL进行高效数据查询处理,配合Pandas和NumPy进行数据科学计算,MySQL数据库保障数据持久化存储。系统提供竞争格局分析、综合评价分析、国家实力分析、历史趋势分析、奖牌分布分析、时序聚类分析等多维度分析功能,并集成可视化大屏展示,为用户提供全方位的奥运数据洞察服务,支持体育决策制定和竞技水平评估。
三.系统功能演示
四.系统界面展示
五.系统源码展示
from pyspark.sql import SparkSession
from pyspark.sql.functions import *
from pyspark.ml.clustering import KMeans
from pyspark.ml.feature import VectorAssembler
import pandas as pd
import numpy as np
from django.http import JsonResponse
from django.views import View
spark = SparkSession.builder.appName("OlympicsDataAnalysis").config("spark.sql.adaptive.enabled", "true").getOrCreate()
def competition_pattern_analysis(request):
olympics_df = spark.read.format("jdbc").option("url", "jdbc:mysql://localhost:3306/olympics").option("dbtable", "medal_records").option("user", "root").option("password", "password").load()
country_medals = olympics_df.groupBy("country", "year", "sport").agg(sum("gold").alias("gold_count"), sum("silver").alias("silver_count"), sum("bronze").alias("bronze_count"))
country_medals = country_medals.withColumn("total_medals", col("gold_count") + col("silver_count") + col("bronze_count"))
country_medals = country_medals.withColumn("weighted_score", col("gold_count") * 3 + col("silver_count") * 2 + col("bronze_count"))
top_countries = country_medals.groupBy("country").agg(sum("total_medals").alias("total"), avg("weighted_score").alias("avg_score")).orderBy(desc("total")).limit(20)
competition_matrix = country_medals.groupBy("country").pivot("sport").agg(sum("weighted_score")).fillna(0)
dominance_analysis = country_medals.groupBy("country", "sport").agg(sum("weighted_score").alias("sport_score")).withColumn("rank", row_number().over(Window.partitionBy("sport").orderBy(desc("sport_score"))))
dominant_sports = dominance_analysis.filter(col("rank") <= 3).groupBy("country").agg(collect_list("sport").alias("dominant_sports"))
yearly_performance = country_medals.groupBy("country", "year").agg(sum("weighted_score").alias("year_score")).withColumn("performance_trend", lag("year_score").over(Window.partitionBy("country").orderBy("year")))
performance_change = yearly_performance.withColumn("trend_direction", when(col("year_score") > col("performance_trend"), "improving").when(col("year_score") < col("performance_trend"), "declining").otherwise("stable"))
competition_intensity = country_medals.groupBy("sport", "year").agg(count("country").alias("participant_count"), stddev("weighted_score").alias("score_variance"))
market_share = country_medals.withColumn("sport_total", sum("weighted_score").over(Window.partitionBy("sport", "year"))).withColumn("market_share", col("weighted_score") / col("sport_total") * 100)
result_data = {"top_countries": top_countries.toPandas().to_dict("records"), "dominant_sports": dominant_sports.toPandas().to_dict("records"), "competition_intensity": competition_intensity.toPandas().to_dict("records")}
return JsonResponse(result_data)
def comprehensive_evaluation_analysis(request):
athletes_df = spark.read.format("jdbc").option("url", "jdbc:mysql://localhost:3306/olympics").option("dbtable", "athlete_performance").option("user", "root").option("password", "password").load()
events_df = spark.read.format("jdbc").option("url", "jdbc:mysql://localhost:3306/olympics").option("dbtable", "event_results").option("user", "root").option("password", "password").load()
athlete_medals = athletes_df.groupBy("athlete_id", "country", "sport").agg(sum("gold").alias("gold_medals"), sum("silver").alias("silver_medals"), sum("bronze").alias("bronze_medals"))
athlete_medals = athlete_medals.withColumn("total_medals", col("gold_medals") + col("silver_medals") + col("bronze_medals")).withColumn("medal_points", col("gold_medals") * 5 + col("silver_medals") * 3 + col("bronze_medals") * 1)
participation_stats = events_df.groupBy("athlete_id").agg(count("event_id").alias("events_participated"), countDistinct("year").alias("olympics_attended"))
consistency_metrics = events_df.groupBy("athlete_id").agg(avg("performance_score").alias("avg_performance"), stddev("performance_score").alias("performance_consistency"))
longevity_analysis = events_df.groupBy("athlete_id").agg((max("year") - min("year")).alias("career_span"), count("year").alias("active_years"))
comprehensive_scores = athlete_medals.join(participation_stats, "athlete_id").join(consistency_metrics, "athlete_id").join(longevity_analysis, "athlete_id")
comprehensive_scores = comprehensive_scores.withColumn("efficiency_score", col("medal_points") / col("events_participated")).withColumn("longevity_score", col("career_span") * 0.3 + col("active_years") * 0.7)
comprehensive_scores = comprehensive_scores.withColumn("consistency_score", 100 - col("performance_consistency")).withColumn("final_evaluation", col("medal_points") * 0.4 + col("efficiency_score") * 0.3 + col("longevity_score") * 0.2 + col("consistency_score") * 0.1)
country_evaluation = comprehensive_scores.groupBy("country").agg(avg("final_evaluation").alias("avg_athlete_score"), sum("medal_points").alias("total_country_points"), count("athlete_id").alias("athlete_count"))
country_evaluation = country_evaluation.withColumn("country_strength_index", col("avg_athlete_score") * 0.6 + (col("total_country_points") / col("athlete_count")) * 0.4)
sport_evaluation = comprehensive_scores.groupBy("sport").agg(avg("final_evaluation").alias("sport_avg_score"), max("final_evaluation").alias("sport_max_score"))
elite_athletes = comprehensive_scores.filter(col("final_evaluation") > 80).orderBy(desc("final_evaluation"))
evaluation_result = {"country_rankings": country_evaluation.orderBy(desc("country_strength_index")).toPandas().to_dict("records"), "sport_analysis": sport_evaluation.toPandas().to_dict("records"), "elite_athletes": elite_athletes.limit(50).toPandas().to_dict("records")}
return JsonResponse(evaluation_result)
def temporal_clustering_analysis(request):
temporal_df = spark.read.format("jdbc").option("url", "jdbc:mysql://localhost:3306/olympics").option("dbtable", "temporal_performance").option("user", "root").option("password", "password").load()
country_yearly = temporal_df.groupBy("country", "year").agg(sum("gold").alias("gold"), sum("silver").alias("silver"), sum("bronze").alias("bronze"), sum("total_athletes").alias("athletes"))
country_yearly = country_yearly.withColumn("medal_efficiency", (col("gold") * 3 + col("silver") * 2 + col("bronze")) / col("athletes")).withColumn("gold_ratio", col("gold") / (col("gold") + col("silver") + col("bronze")))
window_spec = Window.partitionBy("country").orderBy("year").rowsBetween(-1, 1)
country_yearly = country_yearly.withColumn("performance_trend", avg("medal_efficiency").over(window_spec)).withColumn("stability_index", stddev("medal_efficiency").over(window_spec))
feature_cols = ["medal_efficiency", "gold_ratio", "performance_trend", "stability_index"]
assembler = VectorAssembler(inputCols=feature_cols, outputCol="features")
feature_data = assembler.transform(country_yearly.fillna(0))
kmeans = KMeans(k=5, seed=42, featuresCol="features", predictionCol="cluster")
model = kmeans.fit(feature_data)
clustered_data = model.transform(feature_data)
cluster_analysis = clustered_data.groupBy("cluster").agg(avg("medal_efficiency").alias("avg_efficiency"), avg("gold_ratio").alias("avg_gold_ratio"), count("country").alias("cluster_size"))
cluster_characteristics = clustered_data.groupBy("cluster").agg(collect_list("country").alias("countries"), avg("performance_trend").alias("trend_pattern"))
temporal_patterns = clustered_data.groupBy("cluster", "year").agg(avg("medal_efficiency").alias("yearly_efficiency")).orderBy("cluster", "year")
evolution_analysis = clustered_data.withColumn("prev_cluster", lag("cluster").over(Window.partitionBy("country").orderBy("year"))).filter(col("cluster") != col("prev_cluster"))
cluster_transitions = evolution_analysis.groupBy("prev_cluster", "cluster").agg(count("country").alias("transition_count"))
performance_cycles = clustered_data.withColumn("cycle_phase", when(col("performance_trend") > col("medal_efficiency"), "growth").when(col("performance_trend") < col("medal_efficiency"), "decline").otherwise("stable"))
cycle_analysis = performance_cycles.groupBy("cluster", "cycle_phase").agg(count("country").alias("phase_count"), avg("medal_efficiency").alias("phase_efficiency"))
clustering_result = {"cluster_summary": cluster_analysis.toPandas().to_dict("records"), "cluster_countries": cluster_characteristics.toPandas().to_dict("records"), "temporal_patterns": temporal_patterns.toPandas().to_dict("records"), "transitions": cluster_transitions.toPandas().to_dict("records")}
return JsonResponse(clustering_result)
六.系统文档展示
结束
💕💕文末获取源码联系 计算机程序员小杨