一、个人简介
💖💖作者:计算机编程果茶熊 💙💙个人简介:曾长期从事计算机专业培训教学,担任过编程老师,同时本人也热爱上课教学,擅长Java、微信小程序、Python、Golang、安卓Android等多个IT方向。会做一些项目定制化开发、代码讲解、答辩教学、文档编写、也懂一些降重方面的技巧。平常喜欢分享一些自己开发中遇到的问题的解决办法,也喜欢交流技术,大家有技术代码这一块的问题可以问我! 💛💛想说的话:感谢大家的关注与支持! 💜💜 网站实战项目 安卓/小程序实战项目 大数据实战项目 计算机毕业设计选题 💕💕文末获取源码联系计算机编程果茶熊
二、系统介绍
大数据框架:Hadoop+Spark(Hive需要定制修改) 开发语言:Java+Python(两个版本都支持) 数据库:MySQL 后端框架:SpringBoot(Spring+SpringMVC+Mybatis)+Django(两个版本都支持) 前端:Vue+Echarts+HTML+CSS+JavaScript+jQuery
哺乳动物睡眠数据可视化分析系统是一个基于大数据技术的生物信息学分析平台,采用Hadoop+Spark分布式计算框架对哺乳动物睡眠行为数据进行深度挖掘和可视化展示。系统通过Django和后端架构支持,结合Vue+ElementUI+Echarts前端技术栈,构建了完整的数据分析生态。系统核心功能涵盖生理指标雷达分析、睡眠时长统计、睡眠与体脑重关联性分析、危险等级评估、睡眠模式聚类等十五项专业分析模块。通过Spark SQL进行大规模数据查询处理,利用Pandas和NumPy进行科学计算,最终将分析结果以直观的图表形式呈现,为生物学研究人员提供了一个集数据存储、处理、分析、可视化于一体的综合性研究工具,有效提升了哺乳动物睡眠行为研究的数据处理效率和分析深度。
三、视频解说
四、部分功能展示
五、部分代码展示
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, avg, count, sum, when, desc, asc
from pyspark.ml.clustering import KMeans
from pyspark.ml.feature import VectorAssembler
import pandas as pd
import numpy as np
spark = SparkSession.builder.appName("MammalSleepAnalysis").config("spark.sql.adaptive.enabled", "true").getOrCreate()
def comprehensive_sleep_analysis(data_path):
df = spark.read.format("csv").option("header", "true").option("inferSchema", "true").load(data_path)
sleep_stats = df.select(
avg("total_sleep").alias("avg_sleep"),
avg("rem_sleep").alias("avg_rem"),
avg("non_rem_sleep").alias("avg_non_rem"),
count("*").alias("total_animals")
).collect()[0]
sleep_by_predation = df.groupBy("predation_index").agg(
avg("total_sleep").alias("avg_sleep_by_predation"),
count("*").alias("count_by_predation")
).orderBy("predation_index")
body_brain_correlation = df.select("body_weight", "brain_weight", "total_sleep").na.drop()
correlation_matrix = body_brain_correlation.toPandas().corr()
sleep_duration_categories = df.withColumn(
"sleep_category",
when(col("total_sleep") < 6, "Short Sleeper")
.when((col("total_sleep") >= 6) & (col("total_sleep") < 12), "Medium Sleeper")
.otherwise("Long Sleeper")
)
category_distribution = sleep_duration_categories.groupBy("sleep_category").count().orderBy(desc("count"))
exposure_analysis = df.select("sleep_exposure", "total_sleep").na.drop()
exposure_impact = exposure_analysis.groupBy("sleep_exposure").agg(
avg("total_sleep").alias("avg_sleep_by_exposure"),
count("*").alias("animals_count")
).orderBy("sleep_exposure")
top_sleepers = df.select("animal_name", "total_sleep").orderBy(desc("total_sleep")).limit(10)
bottom_sleepers = df.select("animal_name", "total_sleep").orderBy(asc("total_sleep")).limit(10)
danger_sleep_relationship = df.select("danger_level", "total_sleep", "life_span").na.drop()
danger_impact = danger_sleep_relationship.groupBy("danger_level").agg(
avg("total_sleep").alias("avg_sleep_by_danger"),
avg("life_span").alias("avg_lifespan_by_danger"),
count("*").alias("species_count")
).orderBy("danger_level")
return {
'basic_stats': sleep_stats,
'predation_analysis': sleep_by_predation.collect(),
'correlation_matrix': correlation_matrix,
'sleep_categories': category_distribution.collect(),
'exposure_analysis': exposure_impact.collect(),
'top_sleepers': top_sleepers.collect(),
'bottom_sleepers': bottom_sleepers.collect(),
'danger_analysis': danger_impact.collect()
}
def sleep_clustering_analysis(data_path):
df = spark.read.format("csv").option("header", "true").option("inferSchema", "true").load(data_path)
feature_cols = ["total_sleep", "rem_sleep", "non_rem_sleep", "body_weight", "brain_weight", "life_span", "predation_index", "sleep_exposure", "danger_level"]
clean_df = df.select(*feature_cols).na.drop()
assembler = VectorAssembler(inputCols=feature_cols, outputCol="features")
feature_df = assembler.transform(clean_df)
kmeans = KMeans(k=4, seed=42, featuresCol="features", predictionCol="cluster")
model = kmeans.fit(feature_df)
clustered_df = model.transform(feature_df)
cluster_analysis = clustered_df.groupBy("cluster").agg(
avg("total_sleep").alias("avg_total_sleep"),
avg("rem_sleep").alias("avg_rem_sleep"),
avg("body_weight").alias("avg_body_weight"),
avg("brain_weight").alias("avg_brain_weight"),
avg("life_span").alias("avg_life_span"),
avg("predation_index").alias("avg_predation_index"),
avg("sleep_exposure").alias("avg_sleep_exposure"),
avg("danger_level").alias("avg_danger_level"),
count("*").alias("cluster_size")
).orderBy("cluster")
cluster_centers = model.clusterCenters()
cluster_characteristics = []
for i, center in enumerate(cluster_centers):
characteristics = {
'cluster_id': i,
'center_values': center.tolist(),
'dominant_features': []
}
for j, feature in enumerate(feature_cols):
if center[j] > np.mean([c[j] for c in cluster_centers]):
characteristics['dominant_features'].append(f"High_{feature}")
elif center[j] < np.mean([c[j] for c in cluster_centers]):
characteristics['dominant_features'].append(f"Low_{feature}")
cluster_characteristics.append(characteristics)
sleep_pattern_distribution = clustered_df.withColumn(
"sleep_pattern",
when(col("total_sleep") > 15, "Extended_Sleep")
.when((col("total_sleep") >= 10) & (col("total_sleep") <= 15), "Normal_Sleep")
.when((col("total_sleep") >= 5) & (col("total_sleep") < 10), "Moderate_Sleep")
.otherwise("Minimal_Sleep")
).groupBy("cluster", "sleep_pattern").count().orderBy("cluster", desc("count"))
return {
'cluster_stats': cluster_analysis.collect(),
'cluster_centers': cluster_characteristics,
'pattern_distribution': sleep_pattern_distribution.collect(),
'total_clusters': len(cluster_centers)
}
def physiological_radar_analysis(data_path):
df = spark.read.format("csv").option("header", "true").option("inferSchema", "true").load(data_path)
physiological_metrics = ["body_weight", "brain_weight", "total_sleep", "rem_sleep", "life_span", "predation_index", "sleep_exposure", "danger_level"]
normalized_df = df.select("animal_name", *physiological_metrics).na.drop()
stats_dict = {}
for metric in physiological_metrics:
metric_stats = normalized_df.select(metric).describe().collect()
stats_dict[metric] = {
'min': float([row for row in metric_stats if row['summary'] == 'min'][0][metric]),
'max': float([row for row in metric_stats if row['summary'] == 'max'][0][metric]),
'mean': float([row for row in metric_stats if row['summary'] == 'mean'][0][metric]),
'stddev': float([row for row in metric_stats if row['summary'] == 'stddev'][0][metric])
}
radar_data = []
for animal_row in normalized_df.collect():
animal_radar = {'animal_name': animal_row['animal_name'], 'metrics': {}}
for metric in physiological_metrics:
raw_value = float(animal_row[metric]) if animal_row[metric] is not None else 0
min_val = stats_dict[metric]['min']
max_val = stats_dict[metric]['max']
normalized_value = (raw_value - min_val) / (max_val - min_val) if max_val != min_val else 0
animal_radar['metrics'][metric] = {
'raw_value': raw_value,
'normalized_value': normalized_value * 100,
'percentile_rank': normalized_value
}
radar_data.append(animal_radar)
species_groups = normalized_df.withColumn(
"body_size_group",
when(col("body_weight") < 1, "Small")
.when((col("body_weight") >= 1) & (col("body_weight") < 50), "Medium")
.otherwise("Large")
).withColumn(
"brain_size_group",
when(col("brain_weight") < 10, "Small_Brain")
.when((col("brain_weight") >= 10) & (col("brain_weight") < 100), "Medium_Brain")
.otherwise("Large_Brain")
)
group_radar_analysis = species_groups.groupBy("body_size_group", "brain_size_group").agg(
avg("total_sleep").alias("avg_sleep"),
avg("rem_sleep").alias("avg_rem"),
avg("life_span").alias("avg_lifespan"),
avg("predation_index").alias("avg_predation"),
avg("sleep_exposure").alias("avg_exposure"),
avg("danger_level").alias("avg_danger"),
count("*").alias("group_size")
).orderBy("body_size_group", "brain_size_group")
return {
'individual_radar': radar_data,
'normalization_stats': stats_dict,
'group_analysis': group_radar_analysis.collect(),
'metrics_list': physiological_metrics
}
六、部分文档展示
七、END
💕💕文末获取源码联系计算机编程果茶熊