💖💖作者:计算机毕业设计杰瑞 💙💙个人简介:曾长期从事计算机专业培训教学,本人也热爱上课教学,语言擅长Java、微信小程序、Python、Golang、安卓Android等,开发项目包括大数据、深度学习、网站、小程序、安卓、算法。平常会做一些项目定制化开发、代码讲解、答辩教学、文档编写、也懂一些降重方面的技巧。平常喜欢分享一些自己开发中遇到的问题的解决办法,也喜欢交流技术,大家有技术代码这一块的问题可以问我! 💛💛想说的话:感谢大家的关注与支持! 💜💜 网站实战项目 安卓/小程序实战项目 大数据实战项目 深度学校实战项目 计算机毕业设计选题推荐
基于大数据的全球产品库存数据分析系统介绍
《全球产品库存数据分析系统》是一款基于大数据技术架构的智能库存管理分析平台,采用Hadoop分布式存储和Spark内存计算引擎作为核心技术支撑,能够高效处理海量产品库存数据。系统支持Python+Django和Java+Spring Boot双技术栈开发方案,前端采用Vue+ElementUI+Echarts技术栈构建现代化用户界面,通过MySQL数据库实现数据持久化存储。系统涵盖系统首页、个人中心、用户管理、产品库存数据管理、系统管理五大基础功能模块,同时集成库存价值分析、产品时效分析、仓储优化分析、销售风险分析四大核心智能分析模块。通过Spark SQL、Pandas、NumPy等数据处理技术,系统能够对全球范围内的产品库存数据进行深度挖掘和智能分析,为企业库存管理决策提供科学依据,实现库存成本控制、风险预警、效率优化等多维度管理目标。
基于大数据的全球产品库存数据分析系统演示视频
基于大数据的全球产品库存数据分析系统演示图片
基于大数据的全球产品库存数据分析系统代码展示
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, sum as spark_sum, avg, max as spark_max, min as spark_min, when, desc, asc
from pyspark.sql.types import StructType, StructField, StringType, IntegerType, DoubleType, DateType
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import pymysql
spark = SparkSession.builder.appName("GlobalInventoryAnalysis").config("spark.sql.adaptive.enabled", "true").getOrCreate()
def inventory_value_analysis():
connection = pymysql.connect(host='localhost', user='root', password='password', database='inventory_db')
inventory_query = "SELECT product_id, product_name, category, quantity, unit_price, supplier, warehouse_location FROM inventory_data"
inventory_df = pd.read_sql(query=inventory_query, con=connection)
spark_df = spark.createDataFrame(inventory_df)
spark_df = spark_df.withColumn("total_value", col("quantity") * col("unit_price"))
category_analysis = spark_df.groupBy("category").agg(
spark_sum("total_value").alias("category_total_value"),
spark_sum("quantity").alias("category_total_quantity"),
avg("unit_price").alias("avg_unit_price"),
spark_max("total_value").alias("max_product_value"),
spark_min("total_value").alias("min_product_value")
).orderBy(desc("category_total_value"))
warehouse_analysis = spark_df.groupBy("warehouse_location").agg(
spark_sum("total_value").alias("warehouse_total_value"),
avg("total_value").alias("avg_product_value")
).orderBy(desc("warehouse_total_value"))
high_value_products = spark_df.filter(col("total_value") > 10000).select("product_id", "product_name", "total_value", "quantity", "unit_price").orderBy(desc("total_value"))
low_inventory_high_value = spark_df.filter((col("quantity") < 50) & (col("unit_price") > 200)).select("product_id", "product_name", "quantity", "unit_price", "total_value")
supplier_performance = spark_df.groupBy("supplier").agg(
spark_sum("total_value").alias("supplier_inventory_value"),
avg("unit_price").alias("avg_supplier_price"),
spark_sum("quantity").alias("total_supplier_quantity")
).orderBy(desc("supplier_inventory_value"))
total_inventory_value = spark_df.agg(spark_sum("total_value").alias("total_value")).collect()[0]['total_value']
category_percentage = category_analysis.withColumn("value_percentage", (col("category_total_value") / total_inventory_value) * 100)
risk_products = spark_df.filter(col("total_value") > 20000).select("product_id", "product_name", "total_value", "supplier")
connection.close()
return {
"category_analysis": category_analysis.toPandas().to_dict('records'),
"warehouse_analysis": warehouse_analysis.toPandas().to_dict('records'),
"high_value_products": high_value_products.toPandas().to_dict('records'),
"supplier_performance": supplier_performance.toPandas().to_dict('records'),
"total_inventory_value": total_inventory_value,
"category_percentage": category_percentage.toPandas().to_dict('records')
}
def product_timeliness_analysis():
connection = pymysql.connect(host='localhost', user='root', password='password', database='inventory_db')
product_query = "SELECT product_id, product_name, production_date, expiry_date, category, quantity, warehouse_location FROM product_timeliness"
product_df = pd.read_sql(query=product_query, con=connection)
product_df['production_date'] = pd.to_datetime(product_df['production_date'])
product_df['expiry_date'] = pd.to_datetime(product_df['expiry_date'])
current_date = datetime.now()
product_df['days_to_expiry'] = (product_df['expiry_date'] - current_date).dt.days
product_df['shelf_life_days'] = (product_df['expiry_date'] - product_df['production_date']).dt.days
product_df['remaining_shelf_life_ratio'] = product_df['days_to_expiry'] / product_df['shelf_life_days']
spark_timeliness_df = spark.createDataFrame(product_df)
expired_products = spark_timeliness_df.filter(col("days_to_expiry") < 0).select("product_id", "product_name", "days_to_expiry", "quantity", "warehouse_location")
near_expiry_products = spark_timeliness_df.filter((col("days_to_expiry") >= 0) & (col("days_to_expiry") <= 30)).select("product_id", "product_name", "days_to_expiry", "quantity", "category")
fresh_products = spark_timeliness_df.filter(col("remaining_shelf_life_ratio") > 0.8).select("product_id", "product_name", "remaining_shelf_life_ratio", "quantity")
category_expiry_analysis = spark_timeliness_df.groupBy("category").agg(
avg("days_to_expiry").alias("avg_days_to_expiry"),
spark_sum(when(col("days_to_expiry") < 0, col("quantity")).otherwise(0)).alias("expired_quantity"),
spark_sum(when((col("days_to_expiry") >= 0) & (col("days_to_expiry") <= 30), col("quantity")).otherwise(0)).alias("near_expiry_quantity"),
spark_sum("quantity").alias("total_quantity")
)
warehouse_timeliness = spark_timeliness_df.groupBy("warehouse_location").agg(
avg("remaining_shelf_life_ratio").alias("avg_shelf_life_ratio"),
spark_sum(when(col("days_to_expiry") < 30, col("quantity")).otherwise(0)).alias("urgent_handling_quantity")
)
shelf_life_distribution = spark_timeliness_df.groupBy("shelf_life_days").agg(
spark_sum("quantity").alias("quantity_count")
).orderBy("shelf_life_days")
risk_assessment = spark_timeliness_df.withColumn("risk_level",
when(col("days_to_expiry") < 0, "高风险")
.when((col("days_to_expiry") >= 0) & (col("days_to_expiry") <= 7), "中高风险")
.when((col("days_to_expiry") > 7) & (col("days_to_expiry") <= 30), "中风险")
.otherwise("低风险")
)
risk_summary = risk_assessment.groupBy("risk_level").agg(spark_sum("quantity").alias("risk_quantity"))
connection.close()
return {
"expired_products": expired_products.toPandas().to_dict('records'),
"near_expiry_products": near_expiry_products.toPandas().to_dict('records'),
"category_expiry_analysis": category_expiry_analysis.toPandas().to_dict('records'),
"warehouse_timeliness": warehouse_timeliness.toPandas().to_dict('records'),
"risk_summary": risk_summary.toPandas().to_dict('records')
}
def warehouse_optimization_analysis():
connection = pymysql.connect(host='localhost', user='root', password='password', database='inventory_db')
warehouse_query = "SELECT warehouse_id, warehouse_location, product_id, product_name, quantity, storage_area, category, access_frequency FROM warehouse_data"
warehouse_df = pd.read_sql(query=warehouse_query, con=connection)
spark_warehouse_df = spark.createDataFrame(warehouse_df)
storage_utilization = spark_warehouse_df.groupBy("warehouse_location").agg(
spark_sum("storage_area").alias("total_storage_area"),
spark_sum("quantity").alias("total_products"),
avg("access_frequency").alias("avg_access_frequency"),
spark_max("storage_area").alias("max_single_storage"),
spark_min("storage_area").alias("min_single_storage")
)
category_distribution = spark_warehouse_df.groupBy("warehouse_location", "category").agg(
spark_sum("quantity").alias("category_quantity"),
spark_sum("storage_area").alias("category_storage_area")
)
high_frequency_products = spark_warehouse_df.filter(col("access_frequency") > 50).select("warehouse_location", "product_id", "product_name", "access_frequency", "storage_area").orderBy(desc("access_frequency"))
low_frequency_products = spark_warehouse_df.filter(col("access_frequency") < 10).select("warehouse_location", "product_id", "product_name", "access_frequency", "quantity")
storage_efficiency = spark_warehouse_df.withColumn("storage_efficiency_ratio", col("quantity") / col("storage_area"))
efficiency_analysis = storage_efficiency.groupBy("warehouse_location").agg(
avg("storage_efficiency_ratio").alias("avg_efficiency_ratio"),
spark_max("storage_efficiency_ratio").alias("max_efficiency_ratio"),
spark_min("storage_efficiency_ratio").alias("min_efficiency_ratio")
).orderBy(desc("avg_efficiency_ratio"))
optimization_recommendations = spark_warehouse_df.withColumn("optimization_priority",
when((col("access_frequency") > 30) & (col("storage_area") > 100), "优先近距离存储")
.when((col("access_frequency") < 5) & (col("quantity") > 1000), "建议远程存储")
.when(col("storage_area") > 200, "考虑拆分存储")
.otherwise("当前存储合理")
)
priority_summary = optimization_recommendations.groupBy("warehouse_location", "optimization_priority").agg(
spark_sum("quantity").alias("affected_quantity")
)
warehouse_capacity_analysis = spark_warehouse_df.groupBy("warehouse_location").agg(
(spark_sum("storage_area") / 10000).alias("utilization_percentage"),
spark_sum("quantity").alias("current_inventory_count")
)
cross_warehouse_analysis = spark_warehouse_df.groupBy("product_id").agg(
spark_sum("quantity").alias("total_across_warehouses"),
avg("access_frequency").alias("avg_access_across_warehouses")
).filter(col("total_across_warehouses") > 500)
connection.close()
return {
"storage_utilization": storage_utilization.toPandas().to_dict('records'),
"efficiency_analysis": efficiency_analysis.toPandas().to_dict('records'),
"high_frequency_products": high_frequency_products.toPandas().to_dict('records'),
"optimization_recommendations": priority_summary.toPandas().to_dict('records'),
"warehouse_capacity_analysis": warehouse_capacity_analysis.toPandas().to_dict('records')
}
基于大数据的全球产品库存数据分析系统文档展示
💖💖作者:计算机毕业设计杰瑞 💙💙个人简介:曾长期从事计算机专业培训教学,本人也热爱上课教学,语言擅长Java、微信小程序、Python、Golang、安卓Android等,开发项目包括大数据、深度学习、网站、小程序、安卓、算法。平常会做一些项目定制化开发、代码讲解、答辩教学、文档编写、也懂一些降重方面的技巧。平常喜欢分享一些自己开发中遇到的问题的解决办法,也喜欢交流技术,大家有技术代码这一块的问题可以问我! 💛💛想说的话:感谢大家的关注与支持! 💜💜 网站实战项目 安卓/小程序实战项目 大数据实战项目 深度学校实战项目 计算机毕业设计选题推荐