💖💖作者:计算机毕业设计江挽 💙💙个人简介:曾长期从事计算机专业培训教学,本人也热爱上课教学,语言擅长Java、微信小程序、Python、Golang、安卓Android等,开发项目包括大数据、深度学习、网站、小程序、安卓、算法。平常会做一些项目定制化开发、代码讲解、答辩教学、文档编写、也懂一些降重方面的技巧。平常喜欢分享一些自己开发中遇到的问题的解决办法,也喜欢交流技术,大家有技术代码这一块的问题可以问我! 💛💛想说的话:感谢大家的关注与支持! 💜💜 网站实战项目 安卓/小程序实战项目 大数据实战项目 深度学习实战项目
基于大数据的北京二手房数据分析与可视化系统介绍
本系统是一个基于大数据技术的北京二手房数据分析与可视化平台,采用Hadoop+Spark分布式计算框架处理海量房产数据,通过Python语言结合Django后端框架和Vue前端技术栈构建完整的数据分析解决方案。系统核心功能涵盖二手房数据管理、宏观市场分析、户型面积分析、建筑特征分析和房产价值分析等模块,能够对北京地区的二手房交易数据进行深度挖掘和多维度分析。通过Spark SQL进行大规模数据查询和统计计算,利用Pandas和NumPy进行数据预处理和特征工程,最终通过Echarts图表库实现直观的数据可视化展示。系统采用前后端分离架构,后端提供RESTful API接口,前端使用Vue+ElementUI构建响应式用户界面,支持用户管理、数据管理和多种分析报表功能,为房地产市场研究和投资决策提供数据支撑。
基于大数据的北京二手房数据分析与可视化系统演示视频
基于大数据的北京二手房数据分析与可视化系统演示图片
基于大数据的北京二手房数据分析与可视化系统代码展示
from pyspark.sql import SparkSession
from pyspark.sql.functions import *
from pyspark.sql.types import *
import pandas as pd
import numpy as np
from django.http import JsonResponse
from django.views.decorators.csrf import csrf_exempt
import json
spark = SparkSession.builder.appName("BeijingHouseAnalysis").config("spark.sql.adaptive.enabled", "true").config("spark.sql.adaptive.coalescePartitions.enabled", "true").getOrCreate()
@csrf_exempt
def house_data_management(request):
if request.method == 'POST':
data = json.loads(request.body)
house_data = data.get('house_data', [])
schema = StructType([
StructField("house_id", StringType(), True),
StructField("title", StringType(), True),
StructField("price", DoubleType(), True),
StructField("unit_price", DoubleType(), True),
StructField("area", DoubleType(), True),
StructField("room_type", StringType(), True),
StructField("district", StringType(), True),
StructField("community", StringType(), True),
StructField("floor", StringType(), True),
StructField("build_year", IntegerType(), True),
StructField("orientation", StringType(), True),
StructField("decoration", StringType(), True),
StructField("create_time", TimestampType(), True)
])
df = spark.createDataFrame(house_data, schema)
cleaned_df = df.filter(col("price").isNotNull() & (col("price") > 0) & col("area").isNotNull() & (col("area") > 0))
cleaned_df = cleaned_df.withColumn("price_level", when(col("unit_price") < 50000, "low").when(col("unit_price") < 80000, "medium").otherwise("high"))
cleaned_df = cleaned_df.withColumn("area_level", when(col("area") < 60, "small").when(col("area") < 120, "medium").otherwise("large"))
cleaned_df = cleaned_df.withColumn("age", year(current_date()) - col("build_year"))
cleaned_df.write.mode("overwrite").option("header", "true").csv("/data/house_cleaned")
total_count = cleaned_df.count()
avg_price = cleaned_df.agg(avg("unit_price")).collect()[0][0]
district_stats = cleaned_df.groupBy("district").agg(count("*").alias("house_count"), avg("unit_price").alias("avg_price")).orderBy(desc("house_count"))
result_data = {
"status": "success",
"total_processed": total_count,
"average_unit_price": round(avg_price, 2),
"district_distribution": district_stats.collect()
}
return JsonResponse(result_data)
@csrf_exempt
def macro_market_analysis(request):
if request.method == 'GET':
house_df = spark.read.option("header", "true").csv("/data/house_cleaned")
house_df = house_df.withColumn("price", col("price").cast("double")).withColumn("unit_price", col("unit_price").cast("double")).withColumn("area", col("area").cast("double"))
monthly_trend = house_df.withColumn("month", date_format(col("create_time"), "yyyy-MM")).groupBy("month").agg(avg("unit_price").alias("avg_unit_price"), count("*").alias("transaction_count"), avg("price").alias("avg_total_price")).orderBy("month")
district_analysis = house_df.groupBy("district").agg(avg("unit_price").alias("avg_unit_price"), count("*").alias("house_count"), min("unit_price").alias("min_price"), max("unit_price").alias("max_price"), stddev("unit_price").alias("price_volatility")).orderBy(desc("avg_unit_price"))
price_distribution = house_df.withColumn("price_range", when(col("unit_price") < 40000, "0-4万").when(col("unit_price") < 60000, "4-6万").when(col("unit_price") < 80000, "6-8万").when(col("unit_price") < 100000, "8-10万").otherwise("10万+")).groupBy("price_range").count().orderBy("price_range")
market_heat = house_df.groupBy("district").agg((count("*") / avg("area")).alias("density_index"), avg("unit_price").alias("avg_price")).withColumn("heat_score", col("density_index") * col("avg_price") / 1000000).orderBy(desc("heat_score"))
correlation_analysis = house_df.select("unit_price", "area", "build_year").toPandas()
correlation_matrix = correlation_analysis.corr().to_dict()
monthly_data = [{"month": row["month"], "avg_price": round(row["avg_unit_price"], 2), "count": row["transaction_count"]} for row in monthly_trend.collect()]
district_data = [{"district": row["district"], "avg_price": round(row["avg_unit_price"], 2), "count": row["house_count"], "volatility": round(row["price_volatility"], 2)} for row in district_analysis.collect()]
distribution_data = [{"range": row["price_range"], "count": row["count"]} for row in price_distribution.collect()]
heat_data = [{"district": row["district"], "heat_score": round(row["heat_score"], 2)} for row in market_heat.collect()]
analysis_result = {
"monthly_trend": monthly_data,
"district_analysis": district_data,
"price_distribution": distribution_data,
"market_heat": heat_data,
"correlation_matrix": correlation_matrix
}
return JsonResponse(analysis_result)
@csrf_exempt
def house_value_analysis(request):
if request.method == 'POST':
data = json.loads(request.body)
target_houses = data.get('house_ids', [])
house_df = spark.read.option("header", "true").csv("/data/house_cleaned")
house_df = house_df.withColumn("price", col("price").cast("double")).withColumn("unit_price", col("unit_price").cast("double")).withColumn("area", col("area").cast("double")).withColumn("build_year", col("build_year").cast("integer"))
target_df = house_df.filter(col("house_id").isin(target_houses))
district_avg = house_df.groupBy("district").agg(avg("unit_price").alias("district_avg_price"))
room_type_avg = house_df.groupBy("room_type").agg(avg("unit_price").alias("room_avg_price"))
age_avg = house_df.withColumn("age_group", when(col("build_year") > 2015, "new").when(col("build_year") > 2005, "medium").otherwise("old")).groupBy("age_group").agg(avg("unit_price").alias("age_avg_price"))
enriched_df = target_df.join(district_avg, "district").join(room_type_avg, "room_type").join(age_avg, when(col("build_year") > 2015, "new").when(col("build_year") > 2005, "medium").otherwise("old") == col("age_group"))
value_df = enriched_df.withColumn("district_premium", (col("unit_price") - col("district_avg_price")) / col("district_avg_price") * 100).withColumn("room_premium", (col("unit_price") - col("room_avg_price")) / col("room_avg_price") * 100).withColumn("age_premium", (col("unit_price") - col("age_avg_price")) / col("age_avg_price") * 100)
value_df = value_df.withColumn("value_score", (col("district_premium") * 0.4 + col("room_premium") * 0.3 + col("age_premium") * 0.3)).withColumn("value_level", when(col("value_score") > 20, "高价值").when(col("value_score") > 0, "合理价值").when(col("value_score") > -20, "一般价值").otherwise("低价值"))
comparable_houses = house_df.filter((col("district") == target_df.select("district").first()[0]) & (col("room_type") == target_df.select("room_type").first()[0]) & (abs(col("area") - target_df.select("area").first()[0]) < 20)).select("house_id", "title", "unit_price", "area").orderBy(abs(col("unit_price") - target_df.select("unit_price").first()[0])).limit(10)
investment_potential = house_df.filter(col("district") == target_df.select("district").first()[0]).groupBy(year(col("create_time")).alias("year")).agg(avg("unit_price").alias("yearly_avg")).orderBy("year")
growth_rates = investment_potential.collect()
growth_rate = 0.0
if len(growth_rates) > 1:
recent_price = growth_rates[-1]["yearly_avg"]
previous_price = growth_rates[-2]["yearly_avg"]
growth_rate = (recent_price - previous_price) / previous_price * 100
value_results = []
for row in value_df.collect():
value_results.append({
"house_id": row["house_id"],
"title": row["title"],
"current_price": row["unit_price"],
"district_premium": round(row["district_premium"], 2),
"room_premium": round(row["room_premium"], 2),
"age_premium": round(row["age_premium"], 2),
"value_score": round(row["value_score"], 2),
"value_level": row["value_level"],
"growth_rate": round(growth_rate, 2)
})
comparable_data = [{"house_id": row["house_id"], "title": row["title"], "unit_price": row["unit_price"], "area": row["area"]} for row in comparable_houses.collect()]
return JsonResponse({"value_analysis": value_results, "comparable_houses": comparable_data, "district_growth_rate": round(growth_rate, 2)})
基于大数据的北京二手房数据分析与可视化系统文档展示
💖💖作者:计算机毕业设计江挽 💙💙个人简介:曾长期从事计算机专业培训教学,本人也热爱上课教学,语言擅长Java、微信小程序、Python、Golang、安卓Android等,开发项目包括大数据、深度学习、网站、小程序、安卓、算法。平常会做一些项目定制化开发、代码讲解、答辩教学、文档编写、也懂一些降重方面的技巧。平常喜欢分享一些自己开发中遇到的问题的解决办法,也喜欢交流技术,大家有技术代码这一块的问题可以问我! 💛💛想说的话:感谢大家的关注与支持! 💜💜 网站实战项目 安卓/小程序实战项目 大数据实战项目 深度学习实战项目