前言
💖💖作者:计算机程序员小杨 💙💙个人简介:我是一名计算机相关专业的从业者,擅长Java、微信小程序、Python、Golang、安卓Android等多个IT方向。会做一些项目定制化开发、代码讲解、答辩教学、文档编写、也懂一些降重方面的技巧。热爱技术,喜欢钻研新工具和框架,也乐于通过代码解决实际问题,大家有技术代码这一块的问题可以问我! 💛💛想说的话:感谢大家的关注与支持! 💕💕文末获取源码联系 计算机程序员小杨 💜💜 网站实战项目 安卓/小程序实战项目 大数据实战项目 深度学习实战项目 计算机毕业设计选题 💜💜
一.开发工具简介
大数据框架:Hadoop+Spark(本次没用Hive,支持定制) 开发语言:Python+Java(两个版本都支持) 后端框架:Django+Spring Boot(Spring+SpringMVC+Mybatis)(两个版本都支持) 前端:Vue+ElementUI+Echarts+HTML+CSS+JavaScript+jQuery 详细技术点:Hadoop、HDFS、Spark、Spark SQL、Pandas、NumPy 数据库:MySQL
二.系统内容简介
《广东省房价数据可视化分析系统》是基于大数据技术构建的房地产市场分析平台,采用Hadoop+Spark分布式计算框架处理海量房价数据,结合Python生态下的Django后端框架和Vue+ElementUI+Echarts前端技术栈,实现对广东省房地产市场的全方位数据挖掘与可视化展示。系统通过HDFS分布式文件系统存储房价数据,利用Spark SQL进行大规模数据查询分析,配合Pandas和NumPy进行数据预处理与统计计算,将复杂的房价变化趋势、区域分布特征、房型结构差异等信息转化为直观的图表和交互式大屏展示。平台涵盖时间趋势分析、地理位置分析、房型价格分析、市场结构分析、楼盘特色分析等核心功能模块,为房地产从业者、政策制定者和购房群体提供科学的数据支撑,助力房地产市场的理性分析与决策制定。
三.系统功能演示
四.系统界面展示
五.系统源码展示
from pyspark.sql import SparkSession
from pyspark.sql.functions import avg, count, max, min, sum, year, month, desc, asc
from django.http import JsonResponse
from django.views.decorators.http import require_http_methods
import pandas as pd
import numpy as np
spark = SparkSession.builder.appName("GuangdongHousePriceAnalysis").config("spark.sql.adaptive.enabled", "true").config("spark.sql.adaptive.coalescePartitions.enabled", "true").getOrCreate()
@require_http_methods(["GET"])
def time_trend_analysis(request):
city = request.GET.get('city', 'all')
start_date = request.GET.get('start_date')
end_date = request.GET.get('end_date')
df = spark.read.format("jdbc").option("url", "jdbc:mysql://localhost:3306/house_price").option("dbtable", "house_price_data").option("user", "root").option("password", "password").load()
if city != 'all':
df = df.filter(df.city == city)
if start_date and end_date:
df = df.filter((df.date >= start_date) & (df.date <= end_date))
df = df.withColumn("year", year(df.date)).withColumn("month", month(df.date))
monthly_avg = df.groupBy("year", "month").agg(avg("price").alias("avg_price"), count("*").alias("count")).orderBy("year", "month")
result_list = monthly_avg.collect()
trend_data = []
for row in result_list:
trend_data.append({
'period': f"{row['year']}-{row['month']:02d}",
'avg_price': round(row['avg_price'], 2),
'count': row['count']
})
price_changes = []
for i in range(1, len(trend_data)):
current_price = trend_data[i]['avg_price']
previous_price = trend_data[i-1]['avg_price']
change_rate = ((current_price - previous_price) / previous_price) * 100
price_changes.append(change_rate)
volatility = np.std(price_changes) if price_changes else 0
max_price_period = max(trend_data, key=lambda x: x['avg_price'])
min_price_period = min(trend_data, key=lambda x: x['avg_price'])
return JsonResponse({
'trend_data': trend_data,
'volatility': round(volatility, 2),
'max_price_period': max_price_period,
'min_price_period': min_price_period,
'total_periods': len(trend_data)
})
@require_http_methods(["GET"])
def geographic_analysis(request):
analysis_type = request.GET.get('type', 'district')
df = spark.read.format("jdbc").option("url", "jdbc:mysql://localhost:3306/house_price").option("dbtable", "house_price_data").option("user", "root").option("password", "password").load()
if analysis_type == 'city':
geographic_stats = df.groupBy("city").agg(avg("price").alias("avg_price"), count("*").alias("total_count"), max("price").alias("max_price"), min("price").alias("min_price")).orderBy(desc("avg_price"))
elif analysis_type == 'district':
geographic_stats = df.groupBy("city", "district").agg(avg("price").alias("avg_price"), count("*").alias("total_count"), max("price").alias("max_price"), min("price").alias("min_price")).orderBy("city", desc("avg_price"))
else:
geographic_stats = df.groupBy("province").agg(avg("price").alias("avg_price"), count("*").alias("total_count")).orderBy(desc("avg_price"))
result_data = []
stats_list = geographic_stats.collect()
for row in stats_list:
if analysis_type == 'district':
location_name = f"{row['city']}-{row['district']}"
elif analysis_type == 'city':
location_name = row['city']
else:
location_name = row['province']
price_range = row['max_price'] - row['min_price'] if analysis_type != 'province' else 0
result_data.append({
'location': location_name,
'avg_price': round(row['avg_price'], 2),
'total_count': row['total_count'],
'max_price': row.get('max_price', 0),
'min_price': row.get('min_price', 0),
'price_range': price_range
})
price_distribution = {}
for item in result_data:
price_level = 'high' if item['avg_price'] > 30000 else 'medium' if item['avg_price'] > 15000 else 'low'
price_distribution[price_level] = price_distribution.get(price_level, 0) + 1
top_5_locations = result_data[:5]
location_coordinates = df.select("city", "longitude", "latitude").distinct().collect()
coordinate_map = {row['city']: {'lng': row['longitude'], 'lat': row['latitude']} for row in location_coordinates}
return JsonResponse({
'geographic_data': result_data,
'price_distribution': price_distribution,
'top_locations': top_5_locations,
'coordinates': coordinate_map,
'analysis_type': analysis_type
})
@require_http_methods(["GET"])
def house_type_price_analysis(request):
city_filter = request.GET.get('city')
df = spark.read.format("jdbc").option("url", "jdbc:mysql://localhost:3306/house_price").option("dbtable", "house_price_data").option("user", "root").option("password", "password").load()
if city_filter:
df = df.filter(df.city == city_filter)
house_type_stats = df.groupBy("house_type").agg(avg("price").alias("avg_price"), count("*").alias("count"), avg("area").alias("avg_area"), max("price").alias("max_price"), min("price").alias("min_price")).orderBy(desc("avg_price"))
type_result = []
type_list = house_type_stats.collect()
for row in type_list:
price_per_sqm = row['avg_price'] / row['avg_area'] if row['avg_area'] > 0 else 0
type_result.append({
'house_type': row['house_type'],
'avg_price': round(row['avg_price'], 2),
'count': row['count'],
'avg_area': round(row['avg_area'], 2),
'price_per_sqm': round(price_per_sqm, 2),
'max_price': row['max_price'],
'min_price': row['min_price']
})
room_distribution = df.groupBy("rooms").agg(count("*").alias("count"), avg("price").alias("avg_price")).orderBy("rooms").collect()
room_data = [{'rooms': row['rooms'], 'count': row['count'], 'avg_price': round(row['avg_price'], 2)} for row in room_distribution]
area_ranges = [(0, 60), (60, 90), (90, 120), (120, 150), (150, float('inf'))]
area_analysis = []
for min_area, max_area in area_ranges:
if max_area == float('inf'):
filtered_df = df.filter(df.area >= min_area)
range_label = f"{min_area}㎡以上"
else:
filtered_df = df.filter((df.area >= min_area) & (df.area < max_area))
range_label = f"{min_area}-{max_area}㎡"
stats = filtered_df.agg(count("*").alias("count"), avg("price").alias("avg_price")).collect()[0]
if stats['count'] > 0:
area_analysis.append({
'range': range_label,
'count': stats['count'],
'avg_price': round(stats['avg_price'], 2)
})
popular_type = max(type_result, key=lambda x: x['count'])
expensive_type = max(type_result, key=lambda x: x['avg_price'])
return JsonResponse({
'house_type_data': type_result,
'room_distribution': room_data,
'area_analysis': area_analysis,
'popular_type': popular_type,
'expensive_type': expensive_type,
'total_types': len(type_result)
})
六.系统文档展示
结束
💕💕文末获取源码联系 计算机程序员小杨