💖💖作者:计算机毕业设计小途 💙💙个人简介:曾长期从事计算机专业培训教学,本人也热爱上课教学,语言擅长Java、微信小程序、Python、Golang、安卓Android等,开发项目包括大数据、深度学习、网站、小程序、安卓、算法。平常会做一些项目定制化开发、代码讲解、答辩教学、文档编写、也懂一些降重方面的技巧。平常喜欢分享一些自己开发中遇到的问题的解决办法,也喜欢交流技术,大家有技术代码这一块的问题可以问我! 💛💛想说的话:感谢大家的关注与支持! 💜💜 网站实战项目 安卓/小程序实战项目 大数据实战项目 深度学习实战项目
@TOC
电商物流数据分析与可视化系统介绍
本系统是一套基于大数据技术的电商物流数据分析与可视化平台,采用Hadoop分布式存储和Spark大数据处理引擎作为核心技术架构,能够高效处理海量电商物流数据并提供深度分析能力。系统前端采用Vue框架结合ElementUI组件库构建现代化用户界面,通过Echarts图表库实现丰富的数据可视化效果,后端基于Django框架和Spring Boot双技术栈支持,使用Python和Java双语言开发,确保系统的灵活性和扩展性。在数据处理层面,系统利用HDFS分布式文件系统存储大规模物流数据,通过Spark SQL进行高性能数据查询和分析,结合Pandas和NumPy科学计算库实现复杂的数据挖掘算法。功能方面,系统提供物流配送时效分析、成本折扣影响分析、客户评分满意度分析、产品特征影响分析等多个核心分析模块,通过多维指标综合分析帮助企业深入了解物流运营状况,系统还配备了大屏可视化功能,能够实时展示关键业务指标和趋势变化,为管理决策提供直观的数据支撑。此外,系统具备完善的用户管理机制和个人中心功能,支持密码修改、个人信息维护等基础操作,同时提供系统公告和简介功能,确保用户能够及时获取系统更新信息和使用指南,整体架构设计充分体现了现代大数据技术在电商物流领域的深度应用。
电商物流数据分析与可视化系统演示视频
电商物流数据分析与可视化系统演示图片
电商物流数据分析与可视化系统代码展示
spark = SparkSession.builder.appName("ECommerceLogisticsAnalysis").config("spark.sql.adaptive.enabled", "true").config("spark.sql.adaptive.coalescePartitions.enabled", "true").getOrCreate()
def analyze_delivery_efficiency():
logistics_df = spark.read.format("jdbc").option("url", "jdbc:mysql://localhost:3306/logistics").option("dbtable", "logistics_orders").option("user", "root").option("password", "password").load()
logistics_df.createOrReplaceTempView("logistics_orders")
efficiency_query = """
SELECT
delivery_region,
AVG(DATEDIFF(actual_delivery_date, order_date)) as avg_delivery_days,
COUNT(*) as total_orders,
SUM(CASE WHEN DATEDIFF(actual_delivery_date, order_date) <= promised_days THEN 1 ELSE 0 END) as on_time_orders,
ROUND(SUM(CASE WHEN DATEDIFF(actual_delivery_date, order_date) <= promised_days THEN 1 ELSE 0 END) * 100.0 / COUNT(*), 2) as on_time_rate
FROM logistics_orders
WHERE actual_delivery_date IS NOT NULL
GROUP BY delivery_region
ORDER BY on_time_rate DESC
"""
efficiency_result = spark.sql(efficiency_query)
efficiency_pandas = efficiency_result.toPandas()
region_performance = {}
for index, row in efficiency_pandas.iterrows():
region_performance[row['delivery_region']] = {
'avg_days': float(row['avg_delivery_days']),
'total_orders': int(row['total_orders']),
'on_time_orders': int(row['on_time_orders']),
'on_time_rate': float(row['on_time_rate'])
}
best_regions = efficiency_pandas.nlargest(5, 'on_time_rate')['delivery_region'].tolist()
worst_regions = efficiency_pandas.nsmallest(5, 'on_time_rate')['delivery_region'].tolist()
return {'region_performance': region_performance, 'best_regions': best_regions, 'worst_regions': worst_regions}
def analyze_cost_discount_impact():
orders_df = spark.read.format("jdbc").option("url", "jdbc:mysql://localhost:3306/logistics").option("dbtable", "order_details").option("user", "root").option("password", "password").load()
orders_df.createOrReplaceTempView("order_details")
cost_analysis_query = """
SELECT
CASE
WHEN discount_rate = 0 THEN 'no_discount'
WHEN discount_rate <= 0.1 THEN 'low_discount'
WHEN discount_rate <= 0.3 THEN 'medium_discount'
ELSE 'high_discount'
END as discount_category,
AVG(shipping_cost) as avg_shipping_cost,
AVG(total_amount) as avg_order_value,
COUNT(*) as order_count,
SUM(profit_margin) as total_profit,
AVG(profit_margin) as avg_profit_margin
FROM order_details
WHERE shipping_cost > 0 AND total_amount > 0
GROUP BY
CASE
WHEN discount_rate = 0 THEN 'no_discount'
WHEN discount_rate <= 0.1 THEN 'low_discount'
WHEN discount_rate <= 0.3 THEN 'medium_discount'
ELSE 'high_discount'
END
ORDER BY avg_profit_margin DESC
"""
cost_result = spark.sql(cost_analysis_query)
cost_pandas = cost_result.toPandas()
discount_impact = {}
for index, row in cost_pandas.iterrows():
category = row['discount_category']
discount_impact[category] = {
'avg_shipping_cost': float(row['avg_shipping_cost']),
'avg_order_value': float(row['avg_order_value']),
'order_count': int(row['order_count']),
'total_profit': float(row['total_profit']),
'avg_profit_margin': float(row['avg_profit_margin']),
'cost_efficiency': float(row['total_profit'] / row['avg_shipping_cost']) if row['avg_shipping_cost'] > 0 else 0
}
optimal_discount = cost_pandas.loc[cost_pandas['avg_profit_margin'].idxmax(), 'discount_category']
return {'discount_impact': discount_impact, 'optimal_discount_category': optimal_discount}
def analyze_customer_satisfaction():
reviews_df = spark.read.format("jdbc").option("url", "jdbc:mysql://localhost:3306/logistics").option("dbtable", "customer_reviews").option("user", "root").option("password", "password").load()
reviews_df.createOrReplaceTempView("customer_reviews")
satisfaction_query = """
SELECT
delivery_speed_rating,
packaging_rating,
service_rating,
overall_rating,
COUNT(*) as review_count,
AVG(delivery_speed_rating) as avg_speed_rating,
AVG(packaging_rating) as avg_packaging_rating,
AVG(service_rating) as avg_service_rating,
AVG(overall_rating) as avg_overall_rating
FROM customer_reviews
WHERE delivery_speed_rating IS NOT NULL AND overall_rating >= 1 AND overall_rating <= 5
GROUP BY delivery_speed_rating, packaging_rating, service_rating, overall_rating
HAVING COUNT(*) >= 5
ORDER BY avg_overall_rating DESC
"""
satisfaction_result = spark.sql(satisfaction_query)
satisfaction_pandas = satisfaction_result.toPandas()
rating_distribution = {}
for rating in [1, 2, 3, 4, 5]:
rating_data = satisfaction_pandas[satisfaction_pandas['overall_rating'] == rating]
if not rating_data.empty:
rating_distribution[f'rating_{rating}'] = {
'count': int(rating_data['review_count'].sum()),
'avg_speed': float(rating_data['avg_speed_rating'].mean()),
'avg_packaging': float(rating_data['avg_packaging_rating'].mean()),
'avg_service': float(rating_data['avg_service_rating'].mean())
}
satisfaction_score = satisfaction_pandas['avg_overall_rating'].mean()
high_satisfaction_reviews = satisfaction_pandas[satisfaction_pandas['avg_overall_rating'] >= 4.0]
low_satisfaction_reviews = satisfaction_pandas[satisfaction_pandas['avg_overall_rating'] <= 2.0]
improvement_areas = []
if not high_satisfaction_reviews.empty and not low_satisfaction_reviews.empty:
speed_diff = high_satisfaction_reviews['avg_speed_rating'].mean() - low_satisfaction_reviews['avg_speed_rating'].mean()
packaging_diff = high_satisfaction_reviews['avg_packaging_rating'].mean() - low_satisfaction_reviews['avg_packaging_rating'].mean()
service_diff = high_satisfaction_reviews['avg_service_rating'].mean() - low_satisfaction_reviews['avg_service_rating'].mean()
if speed_diff > 0.5:
improvement_areas.append('delivery_speed')
if packaging_diff > 0.5:
improvement_areas.append('packaging_quality')
if service_diff > 0.5:
improvement_areas.append('customer_service')
return {'rating_distribution': rating_distribution, 'overall_satisfaction_score': float(satisfaction_score), 'improvement_areas': improvement_areas}
电商物流数据分析与可视化系统文档展示
💖💖作者:计算机毕业设计小途 💙💙个人简介:曾长期从事计算机专业培训教学,本人也热爱上课教学,语言擅长Java、微信小程序、Python、Golang、安卓Android等,开发项目包括大数据、深度学习、网站、小程序、安卓、算法。平常会做一些项目定制化开发、代码讲解、答辩教学、文档编写、也懂一些降重方面的技巧。平常喜欢分享一些自己开发中遇到的问题的解决办法,也喜欢交流技术,大家有技术代码这一块的问题可以问我! 💛💛想说的话:感谢大家的关注与支持! 💜💜 网站实战项目 安卓/小程序实战项目 大数据实战项目 深度学习实战项目