Python大数据毕设实战:烟酒成瘾数据分析与可视化系统开发详解|毕设|计算机毕设|系统

47 阅读6分钟

一、个人简介

💖💖作者:计算机编程果茶熊 💙💙个人简介:曾长期从事计算机专业培训教学,担任过编程老师,同时本人也热爱上课教学,擅长Java、微信小程序、Python、Golang、安卓Android等多个IT方向。会做一些项目定制化开发、代码讲解、答辩教学、文档编写、也懂一些降重方面的技巧。平常喜欢分享一些自己开发中遇到的问题的解决办法,也喜欢交流技术,大家有技术代码这一块的问题可以问我! 💛💛想说的话:感谢大家的关注与支持! 💜💜 网站实战项目 安卓/小程序实战项目 大数据实战项目 计算机毕业设计选题 💕💕文末获取源码联系计算机编程果茶熊

二、系统介绍

大数据框架:Hadoop+Spark(Hive需要定制修改) 开发语言:Java+Python(两个版本都支持) 数据库:MySQL 后端框架:SpringBoot(Spring+SpringMVC+Mybatis)+Django(两个版本都支持) 前端:Vue+Echarts+HTML+CSS+JavaScript+jQuery

基于大数据的烟酒成瘾个体数据分析与可视化系统是一套运用现代大数据处理技术构建的专业分析平台,系统采用Hadoop+Spark作为核心大数据处理框架,结合Python语言的强大数据处理能力,通过Django框架搭建稳定的后端服务架构。系统前端采用Vue+ElementUI构建现代化用户界面,集成Echarts图表库实现丰富的数据可视化展示效果。系统核心功能涵盖用户管理、成瘾历史分析、人口统计学分析、健康与生活方式分析、风险评估分析等多个维度,能够对烟酒成瘾个体的各类数据进行深度挖掘和智能分析。通过Spark SQL进行大规模数据查询处理,利用Pandas和NumPy进行精确的数据计算和统计分析,最终通过可视化大屏将复杂的分析结果以直观的图表形式展现给用户,为相关研究人员和决策者提供科学的数据支撑和决策参考。

三、基于大数据的烟酒成瘾个体数据分析与可视化系统-视频解说

Python大数据毕设实战:烟酒成瘾数据分析与可视化系统开发详解|毕设|计算机毕设|系统

四、基于大数据的烟酒成瘾个体数据分析与可视化系统-功能展示

在这里插入图片描述 在这里插入图片描述 在这里插入图片描述 在这里插入图片描述 在这里插入图片描述 在这里插入图片描述

五、基于大数据的烟酒成瘾个体数据分析与可视化系统-代码展示


from pyspark.sql import SparkSession
from pyspark.sql.functions import *
from pyspark.sql.types import *
import pandas as pd
import numpy as np
from django.http import JsonResponse
from django.views.decorators.http import require_http_methods
import json

spark = SparkSession.builder.appName("AddictionAnalysis").config("spark.sql.adaptive.enabled", "true").getOrCreate()

@require_http_methods(["POST"])
def addiction_history_analysis(request):
    data = json.loads(request.body)
    user_id = data.get('user_id')
    time_range = data.get('time_range', 30)
    addiction_df = spark.read.option("header", "true").csv("hdfs://localhost:9000/addiction_data/history_records.csv")
    filtered_df = addiction_df.filter((col("user_id") == user_id) & (col("record_date") >= date_sub(current_date(), time_range)))
    daily_consumption = filtered_df.groupBy("record_date").agg(
        sum("cigarette_count").alias("daily_cigarettes"),
        sum("alcohol_volume").alias("daily_alcohol"),
        avg("stress_level").alias("avg_stress")
    ).orderBy("record_date")
    trend_analysis = daily_consumption.withColumn("cigarette_trend", 
        col("daily_cigarettes") - lag("daily_cigarettes").over(Window.orderBy("record_date")))
    trend_analysis = trend_analysis.withColumn("alcohol_trend",
        col("daily_alcohol") - lag("daily_alcohol").over(Window.orderBy("record_date")))
    correlation_data = filtered_df.select("stress_level", "cigarette_count", "alcohol_volume").toPandas()
    stress_cigarette_corr = np.corrcoef(correlation_data['stress_level'], correlation_data['cigarette_count'])[0,1]
    stress_alcohol_corr = np.corrcoef(correlation_data['stress_level'], correlation_data['alcohol_volume'])[0,1]
    peak_consumption_days = daily_consumption.filter(
        (col("daily_cigarettes") > daily_consumption.select(percentile_approx("daily_cigarettes", 0.8)).collect()[0][0]) |
        (col("daily_alcohol") > daily_consumption.select(percentile_approx("daily_alcohol", 0.8)).collect()[0][0])
    ).collect()
    weekly_pattern = filtered_df.withColumn("day_of_week", date_format("record_date", "EEEE")).groupBy("day_of_week").agg(
        avg("cigarette_count").alias("avg_cigarettes_per_day"),
        avg("alcohol_volume").alias("avg_alcohol_per_day")
    ).collect()
    result_data = {
        'daily_trends': [row.asDict() for row in trend_analysis.collect()],
        'correlations': {'stress_cigarette': float(stress_cigarette_corr), 'stress_alcohol': float(stress_alcohol_corr)},
        'peak_days': [row.asDict() for row in peak_consumption_days],
        'weekly_patterns': [row.asDict() for row in weekly_pattern]
    }
    return JsonResponse({'status': 'success', 'data': result_data})

@require_http_methods(["POST"])  
def demographic_analysis(request):
    data = json.loads(request.body)
    analysis_type = data.get('analysis_type', 'age_group')
    region_filter = data.get('region_filter', None)
    demographic_df = spark.read.option("header", "true").csv("hdfs://localhost:9000/addiction_data/demographic_info.csv")
    consumption_df = spark.read.option("header", "true").csv("hdfs://localhost:9000/addiction_data/consumption_records.csv")
    joined_df = demographic_df.join(consumption_df, "user_id", "inner")
    if region_filter:
        joined_df = joined_df.filter(col("region") == region_filter)
    if analysis_type == 'age_group':
        age_groups = joined_df.withColumn("age_group", 
            when(col("age") < 25, "18-24")
            .when((col("age") >= 25) & (col("age") < 35), "25-34")
            .when((col("age") >= 35) & (col("age") < 45), "35-44")
            .when((col("age") >= 45) & (col("age") < 55), "45-54")
            .otherwise("55+")
        )
        analysis_result = age_groups.groupBy("age_group").agg(
            count("user_id").alias("user_count"),
            avg("cigarette_daily_avg").alias("avg_cigarettes"),
            avg("alcohol_weekly_avg").alias("avg_alcohol"),
            sum(when(col("addiction_level") == "severe", 1).otherwise(0)).alias("severe_cases")
        )
    elif analysis_type == 'gender':
        analysis_result = joined_df.groupBy("gender").agg(
            count("user_id").alias("user_count"),
            avg("cigarette_daily_avg").alias("avg_cigarettes"),
            avg("alcohol_weekly_avg").alias("avg_alcohol"),
            (sum("cigarette_daily_avg") / count("user_id")).alias("consumption_per_capita")
        )
    education_impact = joined_df.groupBy("education_level").agg(
        avg("addiction_score").alias("avg_addiction_score"),
        count("user_id").alias("group_size")
    ).orderBy(desc("avg_addiction_score"))
    income_correlation = joined_df.select("monthly_income", "cigarette_daily_avg", "alcohol_weekly_avg").toPandas()
    income_cigarette_corr = np.corrcoef(income_correlation['monthly_income'], income_correlation['cigarette_daily_avg'])[0,1]
    regional_stats = joined_df.groupBy("region").agg(
        count("user_id").alias("total_users"),
        avg("addiction_score").alias("regional_avg_score"),
        sum("cigarette_daily_avg").alias("total_cigarette_consumption")
    ).collect()
    result_data = {
        'main_analysis': [row.asDict() for row in analysis_result.collect()],
        'education_impact': [row.asDict() for row in education_impact.collect()],
        'income_correlations': {'income_cigarette': float(income_cigarette_corr)},
        'regional_statistics': [row.asDict() for row in regional_stats]
    }
    return JsonResponse({'status': 'success', 'data': result_data})

@require_http_methods(["POST"])
def risk_assessment_analysis(request):
    data = json.loads(request.body)
    user_ids = data.get('user_ids', [])
    assessment_date = data.get('assessment_date', None)
    health_df = spark.read.option("header", "true").csv("hdfs://localhost:9000/addiction_data/health_records.csv")
    lifestyle_df = spark.read.option("header", "true").csv("hdfs://localhost:9000/addiction_data/lifestyle_data.csv") 
    consumption_df = spark.read.option("header", "true").csv("hdfs://localhost:9000/addiction_data/daily_consumption.csv")
    combined_df = health_df.join(lifestyle_df, "user_id", "inner").join(consumption_df, "user_id", "inner")
    if user_ids:
        combined_df = combined_df.filter(col("user_id").isin(user_ids))
    if assessment_date:
        combined_df = combined_df.filter(col("assessment_date") == assessment_date)
    risk_factors = combined_df.withColumn("consumption_risk_score",
        (col("cigarettes_per_day") * 0.3 + col("alcohol_units_per_week") * 0.2))
    risk_factors = risk_factors.withColumn("health_risk_score",
        when(col("blood_pressure_systolic") > 140, 2).otherwise(0) +
        when(col("bmi") > 30, 1.5).otherwise(0) +
        when(col("family_history_addiction") == "yes", 1).otherwise(0))
    risk_factors = risk_factors.withColumn("lifestyle_risk_score",
        when(col("exercise_frequency") < 2, 1).otherwise(0) +
        when(col("sleep_hours") < 6, 1.5).otherwise(0) +
        when(col("stress_level") > 7, 2).otherwise(0))
    total_risk = risk_factors.withColumn("total_risk_score",
        col("consumption_risk_score") + col("health_risk_score") + col("lifestyle_risk_score"))
    risk_categories = total_risk.withColumn("risk_level",
        when(col("total_risk_score") < 3, "low")
        .when((col("total_risk_score") >= 3) & (col("total_risk_score") < 6), "moderate")
        .when((col("total_risk_score") >= 6) & (col("total_risk_score") < 9), "high")
        .otherwise("critical"))
    risk_distribution = risk_categories.groupBy("risk_level").agg(
        count("user_id").alias("user_count"),
        avg("total_risk_score").alias("avg_score")
    ).collect()
    high_risk_users = risk_categories.filter(col("risk_level").isin(["high", "critical"])).select(
        "user_id", "total_risk_score", "risk_level", "consumption_risk_score", "health_risk_score", "lifestyle_risk_score"
    ).collect()
    factor_importance = combined_df.select("cigarettes_per_day", "alcohol_units_per_week", "stress_level", "bmi").toPandas()
    correlation_matrix = factor_importance.corr()
    predictive_model = risk_categories.select("consumption_risk_score", "health_risk_score", "lifestyle_risk_score", "total_risk_score")
    future_risk_trends = predictive_model.withColumn("projected_6_month_risk",
        col("total_risk_score") * 1.1 + when(col("lifestyle_risk_score") > 2, 0.5).otherwise(0))
    result_data = {
        'risk_distribution': [row.asDict() for row in risk_distribution],
        'high_risk_individuals': [row.asDict() for row in high_risk_users],
        'factor_correlations': correlation_matrix.to_dict(),
        'future_projections': [row.asDict() for row in future_risk_trends.collect()]
    }
    return JsonResponse({'status': 'success', 'data': result_data})



六、基于大数据的烟酒成瘾个体数据分析与可视化系统-文档展示

在这里插入图片描述

七、END

💛💛想说的话:感谢大家的关注与支持! 💜💜 网站实战项目 安卓/小程序实战项目 大数据实战项目 计算机毕业设计选题 💕💕文末获取源码联系计算机编程果茶熊