前言
- 💖💖作者:计算机程序员小杨
- 💙💙个人简介:我是一名计算机相关专业的从业者,擅长Java、微信小程序、Python、Golang、安卓Android等多个IT方向。会做一些项目定制化开发、代码讲解、答辩教学、文档编写、也懂一些降重方面的技巧。热爱技术,喜欢钻研新工具和框架,也乐于通过代码解决实际问题,大家有技术代码这一块的问题可以问我!
- 💛💛想说的话:感谢大家的关注与支持!
- 💕💕文末获取源码联系 计算机程序员小杨
- 💜💜
- 网站实战项目
- 安卓/小程序实战项目
- 大数据实战项目
- 深度学习实战项目
- 计算机毕业设计选题
- 💜💜
一.开发工具简介
- 大数据框架:Hadoop+Spark(本次没用Hive,支持定制)
- 开发语言:Python+Java(两个版本都支持)
- 后端框架:Django+Spring Boot(Spring+SpringMVC+Mybatis)(两个版本都支持)
- 前端:Vue+ElementUI+Echarts+HTML+CSS+JavaScript+jQuery
- 详细技术点:Hadoop、HDFS、Spark、Spark SQL、Pandas、NumPy
- 数据库:MySQL
二.系统内容简介
《基于大数据的电商物流数据分析与可视化系统》是一套采用现代大数据技术栈构建的综合性数据分析平台,系统基于Hadoop分布式存储框架和Spark大数据处理引擎作为核心技术架构,支持Python和Java两种开发语言实现,后端分别采用Django和Spring Boot框架提供稳定的服务支撑,前端运用Vue框架结合ElementUI组件库、Echarts图表库以及传统的HTML、CSS、JavaScript、jQuery技术栈构建用户交互界面,数据存储采用MySQL关系型数据库管理。系统功能涵盖系统首页、个人中心、物流配送时效分析、成本折扣影响分析、客户评分满意度分析、产品特征影响分析、多维指标综合分析、系统管理以及大屏展示等九大核心模块,通过HDFS分布式文件系统实现海量数据存储,利用Spark SQL进行高效数据查询处理,结合Pandas和NumPy进行数据清洗和统计分析,最终通过丰富的可视化图表展现电商物流各项关键指标的分析结果,为电商企业提供物流运营决策支持,实现从数据采集、存储、处理到可视化展示的完整大数据分析流程,充分体现了大数据技术在电商物流领域的实际应用价值。
三.系统功能演示
那些年我们一起踩过的毕设坑,基于大数据的电商物流分析系统帮你避开|大数据毕业设计
四.系统界面展示
五.系统源码展示
# 核心功能1:物流配送时效分析
def logistics_delivery_analysis(request):
# 从Spark SQL查询物流配送数据
spark_query = """
SELECT restaurant_id, order_date, delivery_time, distance,
weather_condition, traffic_level, delivery_status
FROM delivery_records
WHERE order_date >= date_sub(current_date(), 30)
"""
df = spark.sql(spark_query).toPandas()
# 计算配送时效指标
df['delivery_duration'] = pd.to_datetime(df['delivery_time']) - pd.to_datetime(df['order_date'])
df['duration_minutes'] = df['delivery_duration'].dt.total_seconds() / 60
# 按距离区间分组分析
df['distance_range'] = pd.cut(df['distance'],
bins=[0, 2, 5, 10, float('inf')],
labels=['2km内', '2-5km', '5-10km', '10km以上'])
# 计算各区间平均配送时长
avg_delivery_time = df.groupby('distance_range')['duration_minutes'].agg([
'mean', 'median', 'std', 'count'
]).round(2)
# 天气因素对配送时效的影响分析
weather_impact = df.groupby('weather_condition')['duration_minutes'].mean().sort_values(ascending=False)
# 时段配送效率分析
df['hour'] = pd.to_datetime(df['order_date']).dt.hour
df['time_period'] = df['hour'].apply(lambda x: '早餐' if 6 <= x < 10 else
('午餐' if 10 <= x < 14 else
('下午茶' if 14 <= x < 17 else '晚餐')))
time_efficiency = df.groupby('time_period')['duration_minutes'].mean()
# 配送成功率统计
success_rate = df.groupby('distance_range')['delivery_status'].apply(
lambda x: (x == '成功').sum() / len(x) * 100
).round(2)
# 异常配送识别(超过平均时长2倍标准差的订单)
threshold = df['duration_minutes'].mean() + 2 * df['duration_minutes'].std()
abnormal_orders = df[df['duration_minutes'] > threshold]
analysis_result = {
'avg_delivery_stats': avg_delivery_time.to_dict(),
'weather_impact': weather_impact.to_dict(),
'time_efficiency': time_efficiency.to_dict(),
'success_rate': success_rate.to_dict(),
'abnormal_count': len(abnormal_orders),
'total_orders': len(df)
}
return JsonResponse(analysis_result)
# 核心功能2:客户评分满意度分析
def customer_satisfaction_analysis(request):
# 获取客户评分相关数据
rating_query = """
SELECT r.restaurant_id, r.customer_id, r.rating_score, r.review_text,
r.rating_date, r.order_amount, rs.cuisine_type, rs.price_level,
c.age_group, c.gender, c.consumption_level
FROM customer_ratings r
JOIN restaurant_info rs ON r.restaurant_id = rs.id
JOIN customer_info c ON r.customer_id = c.id
WHERE r.rating_date >= date_sub(current_date(), 90)
"""
ratings_df = spark.sql(rating_query).toPandas()
# 总体满意度分布分析
satisfaction_distribution = ratings_df['rating_score'].value_counts().sort_index()
avg_rating = ratings_df['rating_score'].mean()
# 按餐厅类型分析满意度
cuisine_satisfaction = ratings_df.groupby('cuisine_type')['rating_score'].agg([
'mean', 'count', 'std'
]).round(2)
cuisine_satisfaction.columns = ['平均评分', '评价数量', '评分标准差']
# 价格区间对满意度的影响
price_satisfaction = ratings_df.groupby('price_level')['rating_score'].mean().sort_values(ascending=False)
# 客户群体满意度分析
demographic_analysis = ratings_df.groupby(['age_group', 'gender'])['rating_score'].mean().unstack(fill_value=0)
# 消费金额与满意度相关性分析
ratings_df['amount_range'] = pd.cut(ratings_df['order_amount'],
bins=[0, 50, 100, 200, float('inf')],
labels=['50元以下', '50-100元', '100-200元', '200元以上'])
amount_satisfaction = ratings_df.groupby('amount_range')['rating_score'].mean()
# 时间趋势分析(按月统计)
ratings_df['rating_month'] = pd.to_datetime(ratings_df['rating_date']).dt.to_period('M')
monthly_trend = ratings_df.groupby('rating_month')['rating_score'].mean()
# 低分订单特征分析(评分<=3分)
low_rating_orders = ratings_df[ratings_df['rating_score'] <= 3]
low_rating_features = {
'avg_amount': low_rating_orders['order_amount'].mean(),
'main_cuisine': low_rating_orders['cuisine_type'].mode().iloc[0] if not low_rating_orders.empty else None,
'count': len(low_rating_orders)
}
# 高满意度餐厅识别(平均评分>=4.5且评价数>=20)
restaurant_ratings = ratings_df.groupby('restaurant_id')['rating_score'].agg(['mean', 'count'])
excellent_restaurants = restaurant_ratings[ (restaurant_ratings['mean'] >= 4.5) & (restaurant_ratings['count'] >= 20)
]
satisfaction_result = {
'overall_avg_rating': round(avg_rating, 2),
'satisfaction_distribution': satisfaction_distribution.to_dict(),
'cuisine_satisfaction': cuisine_satisfaction.to_dict(),
'price_satisfaction': price_satisfaction.to_dict(),
'demographic_analysis': demographic_analysis.to_dict(),
'amount_satisfaction': amount_satisfaction.to_dict(),
'monthly_trend': {str(k): v for k, v in monthly_trend.items()},
'low_rating_features': low_rating_features,
'excellent_restaurants_count': len(excellent_restaurants)
}
return JsonResponse(satisfaction_result)
# 核心功能3:多维指标综合分析
def comprehensive_multi_dimensional_analysis(request):
# 综合查询多维度数据
comprehensive_query = """
SELECT r.id as restaurant_id, r.restaurant_name, r.cuisine_type,
r.price_level, r.location_district, r.license_date,
AVG(cr.rating_score) as avg_rating,
COUNT(cr.id) as total_reviews,
AVG(dr.duration_minutes) as avg_delivery_time,
SUM(o.order_amount) as total_revenue,
COUNT(o.id) as total_orders,
AVG(o.discount_rate) as avg_discount
FROM restaurant_info r
LEFT JOIN customer_ratings cr ON r.id = cr.restaurant_id
LEFT JOIN delivery_records dr ON r.id = dr.restaurant_id
LEFT JOIN order_records o ON r.id = o.restaurant_id
WHERE cr.rating_date >= date_sub(current_date(), 180)
GROUP BY r.id, r.restaurant_name, r.cuisine_type, r.price_level,
r.location_district, r.license_date
"""
multi_dim_df = spark.sql(comprehensive_query).toPandas()
# 计算综合竞争力指数
# 标准化各项指标到0-1区间
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
# 选择关键指标进行标准化
key_metrics = ['avg_rating', 'total_reviews', 'total_revenue', 'total_orders']
multi_dim_df[key_metrics] = multi_dim_df[key_metrics].fillna(0)
normalized_metrics = scaler.fit_transform(multi_dim_df[key_metrics])
# 配送时间指标需要反向标准化(时间越短越好)
multi_dim_df['delivery_score'] = 1 - scaler.fit_transform(
multi_dim_df[['avg_delivery_time']].fillna(multi_dim_df['avg_delivery_time'].mean())
).flatten()
# 计算综合竞争力指数(加权平均)
weights = {'avg_rating': 0.3, 'total_reviews': 0.2, 'total_revenue': 0.25,
'total_orders': 0.15, 'delivery_score': 0.1}
multi_dim_df['competitiveness_index'] = (
normalized_metrics[:, 0] * weights['avg_rating'] +
normalized_metrics[:, 1] * weights['total_reviews'] +
normalized_metrics[:, 2] * weights['total_revenue'] +
normalized_metrics[:, 3] * weights['total_orders'] +
multi_dim_df['delivery_score'] * weights['delivery_score']
)
# 区域竞争分析
district_analysis = multi_dim_df.groupby('location_district').agg({
'competitiveness_index': 'mean',
'avg_rating': 'mean',
'total_revenue': 'sum',
'restaurant_name': 'count'
}).round(2)
district_analysis.columns = ['平均竞争力', '平均评分', '总营收', '餐厅数量']
# 菜系类型表现分析
cuisine_performance = multi_dim_df.groupby('cuisine_type').agg({
'avg_rating': 'mean',
'avg_delivery_time': 'mean',
'avg_discount': 'mean',
'competitiveness_index': 'mean'
}).round(2)
# 价格档次市场表现
price_market_analysis = multi_dim_df.groupby('price_level').agg({
'total_revenue': ['sum', 'mean'],
'total_orders': ['sum', 'mean'],
'avg_rating': 'mean'
}).round(2)
# 经营年限与表现相关性
multi_dim_df['operating_years'] = (
pd.Timestamp.now() - pd.to_datetime(multi_dim_df['license_date'])
).dt.days / 365
multi_dim_df['years_group'] = pd.cut(multi_dim_df['operating_years'],
bins=[0, 1, 3, 5, float('inf')],
labels=['1年内', '1-3年', '3-5年', '5年以上'])
years_performance = multi_dim_df.groupby('years_group')['competitiveness_index'].mean()
# 识别优秀表现餐厅(竞争力指数前10%)
top_threshold = multi_dim_df['competitiveness_index'].quantile(0.9)
top_performers = multi_dim_df[multi_dim_df['competitiveness_index'] >= top_threshold]
# 市场集中度分析
revenue_concentration = (multi_dim_df.nlargest(10, 'total_revenue')['total_revenue'].sum() /
multi_dim_df['total_revenue'].sum() * 100)
comprehensive_result = {
'overall_metrics': {
'total_restaurants': len(multi_dim_df),
'avg_competitiveness': multi_dim_df['competitiveness_index'].mean(),
'market_concentration': round(revenue_concentration, 2)
},
'district_analysis': district_analysis.to_dict(),
'cuisine_performance': cuisine_performance.to_dict(),
'price_market_analysis': price_market_analysis.to_dict(),
'years_performance': years_performance.to_dict(),
'top_performers_count': len(top_performers),
'correlation_matrix': multi_dim_df[key_metrics + ['competitiveness_index']].corr().to_dict()
}
return JsonResponse(comprehensive_result)
六.系统文档展示
结束
💕💕文末获取源码联系 计算机程序员小杨