前言
💖💖作者:计算机程序员小杨 💙💙个人简介:我是一名计算机相关专业的从业者,擅长Java、微信小程序、Python、Golang、安卓Android等多个IT方向。会做一些项目定制化开发、代码讲解、答辩教学、文档编写、也懂一些降重方面的技巧。热爱技术,喜欢钻研新工具和框架,也乐于通过代码解决实际问题,大家有技术代码这一块的问题可以问我! 💛💛想说的话:感谢大家的关注与支持! 💕💕文末获取源码联系 计算机程序员小杨 💜💜 网站实战项目 安卓/小程序实战项目 大数据实战项目 深度学习实战项目 计算机毕业设计选题 💜💜
一.开发工具简介
大数据框架:Hadoop+Spark(本次没用Hive,支持定制) 开发语言:Python+Java(两个版本都支持) 后端框架:Django+Spring Boot(Spring+SpringMVC+Mybatis)(两个版本都支持) 前端:Vue+ElementUI+Echarts+HTML+CSS+JavaScript+jQuery 详细技术点:Hadoop、HDFS、Spark、Spark SQL、Pandas、NumPy 数据库:MySQL
二.系统内容简介
全球产品库存数据分析与可视化系统是一套基于大数据技术构建的企业级库存管理分析平台,采用Hadoop+Spark分布式计算框架处理海量库存数据,通过Python和Java双语言支持实现灵活的开发选择。系统后端采用Django和Spring Boot框架提供稳定的API服务,前端运用Vue+ElementUI构建现代化的用户界面,结合Echarts实现丰富的数据可视化展示。系统核心功能涵盖产品库存数据管理、库存价值分析、产品时效分析、仓储优化分析以及销售风险分析等五大模块,能够从多维度对企业库存状况进行深度剖析。通过Spark SQL进行高效的数据查询和计算,结合Pandas和NumPy进行数据处理和统计分析,系统能够处理大规模库存数据并生成精确的分析报告。数据存储采用MySQL数据库,确保数据的一致性和可靠性,同时支持HDFS分布式文件系统存储大量历史数据,为企业库存决策提供科学依据。
三.系统功能演示
最全面的数据分析项目:全球产品库存系统涵盖库存价值到销售风险分析|大数据专业|数据可视化|系统
四.系统界面展示
五.系统源码展示
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, sum, avg, count, when, max, min, datediff, current_date
from pyspark.sql.types import StructType, StructField, StringType, IntegerType, DoubleType, DateType
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
spark = SparkSession.builder.appName("GlobalProductInventoryAnalysis").config("spark.sql.adaptive.enabled", "true").config("spark.sql.adaptive.coalescePartitions.enabled", "true").getOrCreate()
def inventory_value_analysis(inventory_data):
df = spark.createDataFrame(inventory_data)
total_value = df.select(sum(col("quantity") * col("unit_price")).alias("total_inventory_value")).collect()[0]["total_inventory_value"]
category_value = df.groupBy("category").agg(sum(col("quantity") * col("unit_price")).alias("category_value"), count("*").alias("product_count")).orderBy(col("category_value").desc())
high_value_products = df.filter(col("quantity") * col("unit_price") > total_value * 0.1).select("product_id", "product_name", (col("quantity") * col("unit_price")).alias("product_value"))
low_stock_high_value = df.filter((col("quantity") < col("safety_stock")) & (col("unit_price") > df.select(avg("unit_price")).collect()[0][0])).select("product_id", "product_name", "quantity", "safety_stock", "unit_price")
abc_analysis = df.withColumn("product_value", col("quantity") * col("unit_price")).orderBy(col("product_value").desc())
total_products = abc_analysis.count()
abc_result = abc_analysis.withColumn("cumulative_value", sum("product_value").over(Window.orderBy(col("product_value").desc()).rowsBetween(Window.unboundedPreceding, Window.currentRow)))
abc_classified = abc_result.withColumn("abc_class", when(col("cumulative_value") <= total_value * 0.7, "A").when(col("cumulative_value") <= total_value * 0.9, "B").otherwise("C"))
inventory_turnover = df.withColumn("turnover_ratio", when(col("quantity") > 0, col("monthly_sales") / col("quantity")).otherwise(0))
slow_moving_products = inventory_turnover.filter(col("turnover_ratio") < 0.5).select("product_id", "product_name", "quantity", "turnover_ratio", (col("quantity") * col("unit_price")).alias("tied_capital"))
category_performance = df.groupBy("category").agg(avg("quantity").alias("avg_quantity"), sum(col("quantity") * col("unit_price")).alias("total_value"), avg(col("monthly_sales") / col("quantity")).alias("avg_turnover"))
warehouse_value_distribution = df.groupBy("warehouse_id").agg(sum(col("quantity") * col("unit_price")).alias("warehouse_value"), count("*").alias("sku_count"))
return {"total_value": total_value, "category_analysis": category_value.collect(), "high_value_products": high_value_products.collect(), "abc_classification": abc_classified.collect(), "slow_moving": slow_moving_products.collect(), "category_performance": category_performance.collect(), "warehouse_distribution": warehouse_value_distribution.collect()}
def product_timeliness_analysis(inventory_data, sales_data):
inventory_df = spark.createDataFrame(inventory_data)
sales_df = spark.createDataFrame(sales_data)
current_date_val = datetime.now().date()
expiry_analysis = inventory_df.filter(col("expiry_date").isNotNull()).withColumn("days_to_expiry", datediff(col("expiry_date"), lit(current_date_val)))
near_expiry_products = expiry_analysis.filter(col("days_to_expiry") <= 30).select("product_id", "product_name", "quantity", "expiry_date", "days_to_expiry", (col("quantity") * col("unit_price")).alias("potential_loss"))
expired_products = expiry_analysis.filter(col("days_to_expiry") < 0).select("product_id", "product_name", "quantity", "expiry_date", (col("quantity") * col("unit_price")).alias("actual_loss"))
sales_velocity = sales_df.groupBy("product_id").agg(avg("daily_sales").alias("avg_daily_sales"), sum("monthly_sales").alias("total_monthly_sales"))
inventory_sales_joined = inventory_df.join(sales_velocity, "product_id", "left")
stock_days = inventory_sales_joined.withColumn("estimated_stock_days", when(col("avg_daily_sales") > 0, col("quantity") / col("avg_daily_sales")).otherwise(999))
overstock_products = stock_days.filter(col("estimated_stock_days") > 90).select("product_id", "product_name", "quantity", "estimated_stock_days", "avg_daily_sales")
understock_products = stock_days.filter((col("estimated_stock_days") < 7) & (col("avg_daily_sales") > 0)).select("product_id", "product_name", "quantity", "estimated_stock_days", "avg_daily_sales", "safety_stock")
seasonal_analysis = sales_df.withColumn("month", month("sale_date")).groupBy("product_id", "month").agg(avg("daily_sales").alias("monthly_avg_sales"))
seasonal_variance = seasonal_analysis.groupBy("product_id").agg(stddev("monthly_avg_sales").alias("sales_variance"), avg("monthly_avg_sales").alias("overall_avg"))
seasonal_products = seasonal_variance.filter(col("sales_variance") / col("overall_avg") > 0.5).select("product_id", "sales_variance", "overall_avg")
batch_tracking = inventory_df.filter(col("batch_number").isNotNull()).groupBy("product_id", "batch_number").agg(sum("quantity").alias("batch_quantity"), min("production_date").alias("production_date"), min("expiry_date").alias("batch_expiry"))
return {"near_expiry": near_expiry_products.collect(), "expired": expired_products.collect(), "overstock": overstock_products.collect(), "understock": understock_products.collect(), "seasonal_products": seasonal_products.collect(), "batch_info": batch_tracking.collect()}
def storage_optimization_analysis(inventory_data, warehouse_data):
inventory_df = spark.createDataFrame(inventory_data)
warehouse_df = spark.createDataFrame(warehouse_data)
space_utilization = inventory_df.join(warehouse_df, "warehouse_id").withColumn("volume_occupied", col("quantity") * col("unit_volume"))
warehouse_utilization = space_utilization.groupBy("warehouse_id", "warehouse_name", "total_capacity").agg(sum("volume_occupied").alias("used_capacity"))
warehouse_efficiency = warehouse_utilization.withColumn("utilization_rate", col("used_capacity") / col("total_capacity")).withColumn("efficiency_status", when(col("utilization_rate") > 0.9, "Overcrowded").when(col("utilization_rate") < 0.3, "Underutilized").otherwise("Optimal"))
product_space_analysis = inventory_df.groupBy("category").agg(sum(col("quantity") * col("unit_volume")).alias("total_volume"), count("*").alias("sku_count"), avg(col("quantity") * col("unit_volume")).alias("avg_volume_per_sku"))
location_optimization = inventory_df.join(warehouse_df, "warehouse_id").select("product_id", "product_name", "warehouse_id", "warehouse_name", "quantity", "unit_volume", "access_frequency", "location_zone")
high_frequency_products = location_optimization.filter(col("access_frequency") > location_optimization.select(avg("access_frequency")).collect()[0][0]).select("product_id", "product_name", "warehouse_id", "access_frequency", "location_zone")
cross_dock_candidates = inventory_df.filter((col("quantity") < col("safety_stock") * 0.5) & (col("monthly_sales") > 100)).select("product_id", "product_name", "quantity", "monthly_sales", "warehouse_id")
storage_cost_analysis = inventory_df.join(warehouse_df, "warehouse_id").withColumn("storage_cost", col("quantity") * col("unit_volume") * col("cost_per_cubic_meter"))
cost_by_category = storage_cost_analysis.groupBy("category").agg(sum("storage_cost").alias("total_storage_cost"), avg("storage_cost").alias("avg_storage_cost"))
space_reallocation = warehouse_utilization.filter(col("utilization_rate") < 0.3).join(warehouse_utilization.filter(col("utilization_rate") > 0.9), col("warehouse_id") != col("warehouse_id"))
inventory_consolidation = inventory_df.groupBy("product_id").agg(count("warehouse_id").alias("warehouse_count"), sum("quantity").alias("total_quantity")).filter(col("warehouse_count") > 1)
return {"warehouse_utilization": warehouse_efficiency.collect(), "space_by_category": product_space_analysis.collect(), "high_frequency_items": high_frequency_products.collect(), "cross_dock_candidates": cross_dock_candidates.collect(), "storage_costs": cost_by_category.collect(), "consolidation_opportunities": inventory_consolidation.collect()}
六.系统文档展示
结束
💕💕文末获取源码联系 计算机程序员小杨