【大数据】新疆特产销售数据可视化分析系统计算机项目 Hadoop+Spark环境配置数据科学与大数据技术附源码+文档+讲解

一、个人简介

💖💖作者：计算机编程果茶熊 💙💙个人简介：曾长期从事计算机专业培训教学，担任过编程老师，同时本人也热爱上课教学，擅长Java、微信小程序、Python、Golang、安卓Android等多个IT方向。会做一些项目定制化开发、代码讲解、答辩教学、文档编写、也懂一些降重方面的技巧。平常喜欢分享一些自己开发中遇到的问题的解决办法，也喜欢交流技术，大家有技术代码这一块的问题可以问我！ 💛💛想说的话：感谢大家的关注与支持！ 💜💜 网站实战项目安卓/小程序实战项目大数据实战项目计算机毕业设计选题 💕💕文末获取源码联系计算机编程果茶熊

二、系统介绍

大数据框架：Hadoop+Spark（Hive需要定制修改）开发语言：Java+Python（两个版本都支持）数据库：MySQL 后端框架：SpringBoot(Spring+SpringMVC+Mybatis)+Django（两个版本都支持）前端：Vue+Echarts+HTML+CSS+JavaScript+jQuery

三、视频解说

新疆特产销售数据可视化分析系统

四、部分功能展示

在这里插入图片描述

五、部分代码展示


from pyspark.sql import SparkSession
from pyspark.sql.functions import *
from pyspark.sql.types import *
import pandas as pd
import numpy as np
from django.http import JsonResponse
from django.views.decorators.csrf import csrf_exempt
import json
from datetime import datetime, timedelta

spark = SparkSession.builder.appName("XinjiangSpecialtyAnalysis").config("spark.sql.adaptive.enabled", "true").config("spark.sql.adaptive.coalescePartitions.enabled", "true").getOrCreate()

@csrf_exempt
def analyze_product_categories(request):
    sales_df = spark.read.format("jdbc").option("url", "jdbc:mysql://localhost:3306/specialty_db").option("dbtable", "sales_records").option("user", "root").option("password", "password").load()
    category_sales = sales_df.groupBy("category", "subcategory").agg(sum("sales_amount").alias("total_sales"), count("*").alias("order_count"), avg("unit_price").alias("avg_price"), sum("quantity").alias("total_quantity"))
    category_growth = sales_df.filter(col("sale_date") >= date_sub(current_date(), 30)).groupBy("category").agg(sum("sales_amount").alias("recent_sales"))
    previous_month_sales = sales_df.filter((col("sale_date") >= date_sub(current_date(), 60)) & (col("sale_date") < date_sub(current_date(), 30))).groupBy("category").agg(sum("sales_amount").alias("previous_sales"))
    growth_rate = category_growth.join(previous_month_sales, "category", "left").withColumn("growth_rate", when(col("previous_sales") > 0, (col("recent_sales") - col("previous_sales")) / col("previous_sales") * 100).otherwise(0))
    top_categories = category_sales.orderBy(desc("total_sales")).limit(10)
    category_profit_margin = sales_df.groupBy("category").agg(avg((col("sales_amount") - col("cost_amount")) / col("sales_amount") * 100).alias("profit_margin"))
    seasonal_trends = sales_df.withColumn("month", month("sale_date")).groupBy("category", "month").agg(sum("sales_amount").alias("monthly_sales")).orderBy("category", "month")
    price_distribution = sales_df.groupBy("category").agg(min("unit_price").alias("min_price"), max("unit_price").alias("max_price"), percentile_approx("unit_price", 0.25).alias("q1_price"), percentile_approx("unit_price", 0.75).alias("q3_price"))
    inventory_turnover = sales_df.join(spark.read.format("jdbc").option("url", "jdbc:mysql://localhost:3306/specialty_db").option("dbtable", "inventory").option("user", "root").option("password", "password").load(), "product_id", "inner").groupBy("category").agg((sum("quantity") / avg("stock_quantity")).alias("turnover_rate"))
    result_data = {"top_categories": top_categories.toPandas().to_dict("records"), "growth_rates": growth_rate.toPandas().to_dict("records"), "profit_margins": category_profit_margin.toPandas().to_dict("records"), "seasonal_trends": seasonal_trends.toPandas().to_dict("records"), "price_distribution": price_distribution.toPandas().to_dict("records"), "inventory_turnover": inventory_turnover.toPandas().to_dict("records")}
    return JsonResponse(result_data)

@csrf_exempt
def analyze_store_performance(request):
    store_df = spark.read.format("jdbc").option("url", "jdbc:mysql://localhost:3306/specialty_db").option("dbtable", "sales_records").option("user", "root").option("password", "password").load()
    store_sales_metrics = store_df.groupBy("store_id", "store_name").agg(sum("sales_amount").alias("total_revenue"), count("*").alias("total_orders"), countDistinct("customer_id").alias("unique_customers"), avg("sales_amount").alias("avg_order_value"), sum("quantity").alias("total_items_sold"))
    store_conversion_rate = store_df.join(spark.read.format("jdbc").option("url", "jdbc:mysql://localhost:3306/specialty_db").option("dbtable", "store_visits").option("user", "root").option("password", "password").load(), "store_id", "inner").groupBy("store_id").agg((count("order_id") / sum("visit_count") * 100).alias("conversion_rate"))
    customer_retention = store_df.groupBy("store_id", "customer_id").agg(count("*").alias("purchase_frequency")).groupBy("store_id").agg(avg("purchase_frequency").alias("avg_customer_frequency"), sum(when(col("purchase_frequency") > 1, 1).otherwise(0)).alias("repeat_customers"), count("*").alias("total_customers")).withColumn("retention_rate", col("repeat_customers") / col("total_customers") * 100)
    monthly_performance = store_df.withColumn("year_month", date_format("sale_date", "yyyy-MM")).groupBy("store_id", "year_month").agg(sum("sales_amount").alias("monthly_revenue"), count("*").alias("monthly_orders")).withColumn("revenue_growth", lag("monthly_revenue").over(Window.partitionBy("store_id").orderBy("year_month")))
    product_diversity = store_df.groupBy("store_id").agg(countDistinct("category").alias("category_count"), countDistinct("product_id").alias("product_count"), (countDistinct("category") / countDistinct("product_id")).alias("diversity_index"))
    store_rating_impact = store_df.join(spark.read.format("jdbc").option("url", "jdbc:mysql://localhost:3306/specialty_db").option("dbtable", "store_ratings").option("user", "root").option("password", "password").load(), "store_id", "inner").groupBy("store_id").agg(avg("rating").alias("avg_rating"), corr("rating", "sales_amount").alias("rating_sales_correlation"))
    regional_comparison = store_df.join(spark.read.format("jdbc").option("url", "jdbc:mysql://localhost:3306/specialty_db").option("dbtable", "stores").option("user", "root").option("password", "password").load(), "store_id", "inner").groupBy("region", "store_id").agg(sum("sales_amount").alias("store_revenue")).withColumn("region_rank", rank().over(Window.partitionBy("region").orderBy(desc("store_revenue"))))
    peak_hours_analysis = store_df.withColumn("hour", hour("sale_time")).groupBy("store_id", "hour").agg(count("*").alias("hourly_orders"), sum("sales_amount").alias("hourly_revenue")).withColumn("peak_hour_rank", rank().over(Window.partitionBy("store_id").orderBy(desc("hourly_orders"))))
    result_data = {"store_metrics": store_sales_metrics.toPandas().to_dict("records"), "conversion_rates": store_conversion_rate.toPandas().to_dict("records"), "retention_rates": customer_retention.toPandas().to_dict("records"), "monthly_trends": monthly_performance.toPandas().to_dict("records"), "product_diversity": product_diversity.toPandas().to_dict("records"), "rating_analysis": store_rating_impact.toPandas().to_dict("records"), "regional_ranking": regional_comparison.toPandas().to_dict("records"), "peak_hours": peak_hours_analysis.toPandas().to_dict("records")}
    return JsonResponse(result_data)

@csrf_exempt
def analyze_price_specifications(request):
    price_df = spark.read.format("jdbc").option("url", "jdbc:mysql://localhost:3306/specialty_db").option("dbtable", "sales_records").option("user", "root").option("password", "password").load()
    price_elasticity = price_df.groupBy("product_id").agg(corr("unit_price", "quantity").alias("price_quantity_correlation"), stddev("unit_price").alias("price_volatility"), avg("unit_price").alias("avg_price"), sum("quantity").alias("total_sold"))
    specification_analysis = price_df.groupBy("specification", "category").agg(avg("unit_price").alias("avg_spec_price"), sum("sales_amount").alias("spec_revenue"), count("*").alias("spec_orders"), avg("quantity").alias("avg_spec_quantity"))
    price_segments = price_df.withColumn("price_segment", when(col("unit_price") < 50, "低价段").when(col("unit_price") < 200, "中价段").otherwise("高价段")).groupBy("price_segment", "category").agg(sum("sales_amount").alias("segment_revenue"), count("*").alias("segment_orders"), avg("customer_satisfaction").alias("avg_satisfaction"))
    competitor_pricing = price_df.join(spark.read.format("jdbc").option("url", "jdbc:mysql://localhost:3306/specialty_db").option("dbtable", "competitor_prices").option("user", "root").option("password", "password").load(), ["product_name", "specification"], "inner").withColumn("price_difference", col("unit_price") - col("competitor_price")).groupBy("category").agg(avg("price_difference").alias("avg_price_diff"), count(when(col("price_difference") > 0, 1)).alias("higher_priced_count"), count("*").alias("total_compared"))
    dynamic_pricing_effect = price_df.withColumn("previous_price", lag("unit_price").over(Window.partitionBy("product_id").orderBy("sale_date"))).filter(col("previous_price").isNotNull()).withColumn("price_change_rate", (col("unit_price") - col("previous_price")) / col("previous_price") * 100).groupBy("product_id").agg(corr("price_change_rate", "quantity").alias("price_change_sales_correlation"), avg("price_change_rate").alias("avg_price_change"))
    bulk_discount_analysis = price_df.withColumn("unit_discount_rate", when(col("quantity") >= 10, (col("original_price") - col("unit_price")) / col("original_price") * 100).otherwise(0)).groupBy("category", "specification").agg(avg("unit_discount_rate").alias("avg_bulk_discount"), sum(when(col("quantity") >= 10, col("sales_amount")).otherwise(0)).alias("bulk_sales_revenue"))
    seasonal_price_trends = price_df.withColumn("season", when(month("sale_date").isin([12, 1, 2]), "冬季").when(month("sale_date").isin([3, 4, 5]), "春季").when(month("sale_date").isin([6, 7, 8]), "夏季").otherwise("秋季")).groupBy("category", "specification", "season").agg(avg("unit_price").alias("seasonal_avg_price"), sum("sales_amount").alias("seasonal_revenue"))
    customer_price_sensitivity = price_df.join(spark.read.format("jdbc").option("url", "jdbc:mysql://localhost:3306/specialty_db").option("dbtable", "customers").option("user", "root").option("password", "password").load(), "customer_id", "inner").groupBy("customer_segment", "category").agg(avg("unit_price").alias("segment_avg_price"), sum("quantity").alias("segment_quantity"), corr("unit_price", "customer_satisfaction").alias("price_satisfaction_correlation"))
    result_data = {"price_elasticity": price_elasticity.toPandas().to_dict("records"), "specification_pricing": specification_analysis.toPandas().to_dict("records"), "price_segments": price_segments.toPandas().to_dict("records"), "competitor_comparison": competitor_pricing.toPandas().to_dict("records"), "dynamic_pricing": dynamic_pricing_effect.toPandas().to_dict("records"), "bulk_discounts": bulk_discount_analysis.toPandas().to_dict("records"), "seasonal_trends": seasonal_price_trends.toPandas().to_dict("records"), "customer_sensitivity": customer_price_sensitivity.toPandas().to_dict("records")}
    return JsonResponse(result_data)

六、部分文档展示

在这里插入图片描述

七、END

💕💕文末获取源码联系计算机编程果茶熊

【大数据】新疆特产销售数据可视化分析系统 计算机项目 Hadoop+Spark环境配置 数据科学与大数据技术 附源码+文档+讲解