一、个人简介
💖💖作者:计算机编程果茶熊 💙💙个人简介:曾长期从事计算机专业培训教学,担任过编程老师,同时本人也热爱上课教学,擅长Java、微信小程序、Python、Golang、安卓Android等多个IT方向。会做一些项目定制化开发、代码讲解、答辩教学、文档编写、也懂一些降重方面的技巧。平常喜欢分享一些自己开发中遇到的问题的解决办法,也喜欢交流技术,大家有技术代码这一块的问题可以问我! 💛💛想说的话:感谢大家的关注与支持! 💜💜 网站实战项目 安卓/小程序实战项目 大数据实战项目 计算机毕业设计选题 💕💕文末获取源码联系计算机编程果茶熊
二、系统介绍
大数据框架:Hadoop+Spark(Hive需要定制修改) 开发语言:Java+Python(两个版本都支持) 数据库:MySQL 后端框架:SpringBoot(Spring+SpringMVC+Mybatis)+Django(两个版本都支持) 前端:Vue+Echarts+HTML+CSS+JavaScript+jQuery
《共享单车数据分析可视化系统》是一个基于大数据技术栈构建的综合性数据分析平台,采用Hadoop+Spark分布式计算框架处理海量共享单车运营数据。系统以Python为核心开发语言,后端采用Django框架提供RESTful API服务,前端运用Vue.js结合ElementUI组件库和ECharts可视化库构建交互界面。系统核心功能涵盖用户管理、共享单车数据管理、多维度数据分析(时间维度、环境维度、用户行为维度)以及综合需求分析,并提供直观的可视化大屏展示。通过HDFS分布式文件系统存储原始数据,利用Spark SQL进行复杂查询分析,结合Pandas和NumPy进行数据处理与统计计算,最终将分析结果存储至MySQL数据库。系统能够有效处理共享单车企业日常产生的骑行轨迹、用户行为、车辆状态等多源异构数据,为运营决策提供科学依据。
三、视频解说
四、部分功能展示
五、部分代码展示
from pyspark.sql import SparkSession
from django.http import JsonResponse
from django.views.decorators.csrf import csrf_exempt
import json
import pandas as pd
from datetime import datetime, timedelta
import numpy as np
spark = SparkSession.builder.appName("BikeDataAnalysis").master("local[*]").getOrCreate()
@csrf_exempt
def time_dimension_analysis(request):
if request.method == 'POST':
data = json.loads(request.body)
start_date = data.get('start_date')
end_date = data.get('end_date')
df = spark.read.format("jdbc").option("url", "jdbc:mysql://localhost:3306/bike_db").option("dbtable", "bike_records").option("user", "root").option("password", "password").load()
df.createOrReplaceTempView("bike_records")
hourly_usage = spark.sql(f"""
SELECT HOUR(start_time) as hour, COUNT(*) as usage_count,
AVG(duration) as avg_duration, SUM(distance) as total_distance
FROM bike_records
WHERE DATE(start_time) BETWEEN '{start_date}' AND '{end_date}'
GROUP BY HOUR(start_time)
ORDER BY hour
""").collect()
daily_trend = spark.sql(f"""
SELECT DATE(start_time) as date, COUNT(*) as daily_count,
COUNT(DISTINCT user_id) as active_users,
AVG(duration) as avg_duration
FROM bike_records
WHERE DATE(start_time) BETWEEN '{start_date}' AND '{end_date}'
GROUP BY DATE(start_time)
ORDER BY date
""").collect()
peak_hours = [row.asDict() for row in hourly_usage if row['usage_count'] > np.mean([r['usage_count'] for r in hourly_usage])]
weekly_pattern = spark.sql(f"""
SELECT DAYOFWEEK(start_time) as weekday, COUNT(*) as usage_count,
AVG(duration) as avg_duration
FROM bike_records
WHERE DATE(start_time) BETWEEN '{start_date}' AND '{end_date}'
GROUP BY DAYOFWEEK(start_time)
ORDER BY weekday
""").collect()
monthly_stats = spark.sql(f"""
SELECT YEAR(start_time) as year, MONTH(start_time) as month,
COUNT(*) as monthly_rides, SUM(distance) as total_distance,
COUNT(DISTINCT user_id) as unique_users
FROM bike_records
WHERE DATE(start_time) BETWEEN '{start_date}' AND '{end_date}'
GROUP BY YEAR(start_time), MONTH(start_time)
ORDER BY year, month
""").collect()
result = {
'hourly_usage': [row.asDict() for row in hourly_usage],
'daily_trend': [row.asDict() for row in daily_trend],
'peak_hours': peak_hours,
'weekly_pattern': [row.asDict() for row in weekly_pattern],
'monthly_stats': [row.asDict() for row in monthly_stats]
}
return JsonResponse(result)
@csrf_exempt
def environment_dimension_analysis(request):
if request.method == 'POST':
data = json.loads(request.body)
region_id = data.get('region_id')
weather_type = data.get('weather_type')
df = spark.read.format("jdbc").option("url", "jdbc:mysql://localhost:3306/bike_db").option("dbtable", "bike_records").option("user", "root").option("password", "password").load()
weather_df = spark.read.format("jdbc").option("url", "jdbc:mysql://localhost:3306/bike_db").option("dbtable", "weather_data").option("user", "root").option("password", "password").load()
df.createOrReplaceTempView("bike_records")
weather_df.createOrReplaceTempView("weather_data")
weather_impact = spark.sql(f"""
SELECT w.weather_type, COUNT(b.id) as ride_count,
AVG(b.duration) as avg_duration, AVG(w.temperature) as avg_temp,
AVG(w.humidity) as avg_humidity
FROM bike_records b
JOIN weather_data w ON DATE(b.start_time) = w.date
WHERE b.region_id = {region_id}
GROUP BY w.weather_type
ORDER BY ride_count DESC
""").collect()
location_hotspots = spark.sql(f"""
SELECT start_station_id, COUNT(*) as start_count,
AVG(duration) as avg_duration, SUM(distance) as total_distance
FROM bike_records
WHERE region_id = {region_id}
GROUP BY start_station_id
ORDER BY start_count DESC
LIMIT 20
""").collect()
temperature_correlation = spark.sql(f"""
SELECT CASE
WHEN w.temperature < 0 THEN 'very_cold'
WHEN w.temperature < 10 THEN 'cold'
WHEN w.temperature < 20 THEN 'mild'
WHEN w.temperature < 30 THEN 'warm'
ELSE 'hot'
END as temp_range,
COUNT(b.id) as ride_count, AVG(b.duration) as avg_duration
FROM bike_records b
JOIN weather_data w ON DATE(b.start_time) = w.date
WHERE b.region_id = {region_id}
GROUP BY temp_range
ORDER BY ride_count DESC
""").collect()
distance_distribution = spark.sql(f"""
SELECT CASE
WHEN distance < 1 THEN 'short'
WHEN distance < 3 THEN 'medium'
WHEN distance < 5 THEN 'long'
ELSE 'very_long'
END as distance_range,
COUNT(*) as count, AVG(duration) as avg_duration
FROM bike_records
WHERE region_id = {region_id}
GROUP BY distance_range
ORDER BY count DESC
""").collect()
result = {
'weather_impact': [row.asDict() for row in weather_impact],
'location_hotspots': [row.asDict() for row in location_hotspots],
'temperature_correlation': [row.asDict() for row in temperature_correlation],
'distance_distribution': [row.asDict() for row in distance_distribution]
}
return JsonResponse(result)
@csrf_exempt
def user_behavior_analysis(request):
if request.method == 'POST':
data = json.loads(request.body)
user_type = data.get('user_type', 'all')
analysis_period = data.get('analysis_period', 30)
df = spark.read.format("jdbc").option("url", "jdbc:mysql://localhost:3306/bike_db").option("dbtable", "bike_records").option("user", "root").option("password", "password").load()
user_df = spark.read.format("jdbc").option("url", "jdbc:mysql://localhost:3306/bike_db").option("dbtable", "users").option("user", "root").option("password", "password").load()
df.createOrReplaceTempView("bike_records")
user_df.createOrReplaceTempView("users")
user_frequency = spark.sql(f"""
SELECT u.user_type, u.age_group, COUNT(b.id) as ride_count,
AVG(b.duration) as avg_duration, SUM(b.distance) as total_distance,
COUNT(DISTINCT DATE(b.start_time)) as active_days
FROM bike_records b
JOIN users u ON b.user_id = u.id
WHERE DATE(b.start_time) >= DATE_SUB(CURRENT_DATE(), {analysis_period})
GROUP BY u.user_type, u.age_group
ORDER BY ride_count DESC
""").collect()
usage_patterns = spark.sql(f"""
SELECT user_id, COUNT(*) as total_rides,
AVG(duration) as avg_duration, MAX(duration) as max_duration,
MIN(duration) as min_duration, SUM(distance) as total_distance,
COUNT(DISTINCT start_station_id) as unique_stations
FROM bike_records
WHERE DATE(start_time) >= DATE_SUB(CURRENT_DATE(), {analysis_period})
GROUP BY user_id
HAVING COUNT(*) >= 5
ORDER BY total_rides DESC
""").collect()
retention_analysis = spark.sql(f"""
SELECT DATE(start_time) as date,
COUNT(DISTINCT user_id) as daily_active_users,
COUNT(DISTINCT CASE WHEN b.user_id IN (
SELECT DISTINCT user_id FROM bike_records
WHERE DATE(start_time) = DATE_SUB(DATE(b.start_time), 1)
) THEN b.user_id END) as returning_users
FROM bike_records b
WHERE DATE(start_time) >= DATE_SUB(CURRENT_DATE(), {analysis_period})
GROUP BY DATE(start_time)
ORDER BY date
""").collect()
user_segmentation = spark.sql(f"""
SELECT CASE
WHEN COUNT(*) = 1 THEN 'one_time'
WHEN COUNT(*) <= 5 THEN 'occasional'
WHEN COUNT(*) <= 20 THEN 'regular'
ELSE 'frequent'
END as user_segment,
COUNT(DISTINCT user_id) as user_count,
AVG(avg_duration) as segment_avg_duration,
SUM(total_distance) as segment_total_distance
FROM (
SELECT user_id, COUNT(*) as rides, AVG(duration) as avg_duration,
SUM(distance) as total_distance
FROM bike_records
WHERE DATE(start_time) >= DATE_SUB(CURRENT_DATE(), {analysis_period})
GROUP BY user_id
) user_stats
GROUP BY user_segment
ORDER BY user_count DESC
""").collect()
result = {
'user_frequency': [row.asDict() for row in user_frequency],
'usage_patterns': [row.asDict() for row in usage_patterns],
'retention_analysis': [row.asDict() for row in retention_analysis],
'user_segmentation': [row.asDict() for row in user_segmentation]
}
return JsonResponse(result)
六、部分文档展示
七、END
💕💕文末获取源码联系计算机编程果茶熊