一、个人简介
💖💖作者:计算机编程果茶熊 💙💙个人简介:曾长期从事计算机专业培训教学,担任过编程老师,同时本人也热爱上课教学,擅长Java、微信小程序、Python、Golang、安卓Android等多个IT方向。会做一些项目定制化开发、代码讲解、答辩教学、文档编写、也懂一些降重方面的技巧。平常喜欢分享一些自己开发中遇到的问题的解决办法,也喜欢交流技术,大家有技术代码这一块的问题可以问我! 💛💛想说的话:感谢大家的关注与支持! 💜💜 网站实战项目 安卓/小程序实战项目 大数据实战项目 计算机毕业设计选题 💕💕文末获取源码联系计算机编程果茶熊
二、系统介绍
大数据框架:Hadoop+Spark(Hive需要定制修改) 开发语言:Java+Python(两个版本都支持) 数据库:MySQL 后端框架:SpringBoot(Spring+SpringMVC+Mybatis)+Django(两个版本都支持) 前端:Vue+Echarts+HTML+CSS+JavaScript+jQuery
《北京二手房数据分析与可视化系统》是一套基于大数据技术的房地产市场分析平台,采用Hadoop+Spark分布式计算框架处理海量房产交易数据,通过Python语言构建完整的数据处理链路。系统后端基于Django框架设计,前端采用Vue+ElementUI+Echarts技术栈构建用户界面,实现数据的采集、存储、分析与可视化展示。系统核心功能涵盖用户管理、二手房数据管理、宏观市场分析、户型面积分析、建筑特征分析、房产价值分析以及可视化大屏等模块,通过Spark SQL进行高效的数据查询与统计分析,结合Pandas、NumPy等科学计算库实现复杂的数据处理算法,将分析结果以直观的图表形式呈现,为房地产投资决策、市场趋势研判和价格评估提供数据支撑,帮助用户深入了解北京二手房市场的发展规律和价格变动趋势。
三、视频解说
四、部分功能展示
五、部分代码展示
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, avg, count, sum, when, desc, asc, year, month
from django.http import JsonResponse
from django.views.decorators.csrf import csrf_exempt
import json
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
spark = SparkSession.builder.appName("BeijingHouseAnalysis").config("spark.sql.adaptive.enabled", "true").config("spark.sql.adaptive.coalescePartitions.enabled", "true").getOrCreate()
def market_trend_analysis(request):
if request.method == 'POST':
data = json.loads(request.body)
start_date = data.get('start_date')
end_date = data.get('end_date')
district = data.get('district', 'all')
house_df = spark.read.format("jdbc").option("url", "jdbc:mysql://localhost:3306/house_db").option("dbtable", "house_transaction").option("user", "root").option("password", "password").load()
filtered_df = house_df.filter((col("transaction_date") >= start_date) & (col("transaction_date") <= end_date))
if district != 'all':
filtered_df = filtered_df.filter(col("district") == district)
monthly_stats = filtered_df.withColumn("year_month", year(col("transaction_date")).cast("string").concat(month(col("transaction_date")).cast("string"))).groupBy("year_month").agg(avg("price_per_sqm").alias("avg_price"), count("*").alias("transaction_count"), sum("total_price").alias("total_amount"))
monthly_stats = monthly_stats.orderBy("year_month")
price_trend = monthly_stats.select("year_month", "avg_price").collect()
volume_trend = monthly_stats.select("year_month", "transaction_count").collect()
price_volatility = monthly_stats.select("avg_price").rdd.map(lambda x: x[0]).collect()
volatility_coefficient = np.std(price_volatility) / np.mean(price_volatility) if len(price_volatility) > 1 else 0
district_comparison = house_df.filter((col("transaction_date") >= start_date) & (col("transaction_date") <= end_date)).groupBy("district").agg(avg("price_per_sqm").alias("district_avg_price"), count("*").alias("district_count")).orderBy(desc("district_avg_price"))
hot_districts = district_comparison.limit(10).collect()
market_heat_index = (sum([row['district_count'] for row in hot_districts]) / house_df.count()) * 100
trend_direction = "上升" if len(price_trend) > 1 and price_trend[-1]['avg_price'] > price_trend[0]['avg_price'] else "下降"
result = {"price_trend": [{"month": row["year_month"], "price": float(row["avg_price"])} for row in price_trend], "volume_trend": [{"month": row["year_month"], "volume": row["transaction_count"]} for row in volume_trend], "volatility": round(volatility_coefficient, 4), "hot_districts": [{"district": row["district"], "avg_price": float(row["district_avg_price"]), "count": row["district_count"]} for row in hot_districts], "market_heat": round(market_heat_index, 2), "trend_direction": trend_direction}
return JsonResponse(result)
def house_type_analysis(request):
if request.method == 'POST':
data = json.loads(request.body)
analysis_type = data.get('analysis_type', 'room_distribution')
price_range = data.get('price_range', [0, 50000])
area_range = data.get('area_range', [0, 300])
house_df = spark.read.format("jdbc").option("url", "jdbc:mysql://localhost:3306/house_db").option("dbtable", "house_info").option("user", "root").option("password", "password").load()
filtered_df = house_df.filter((col("price_per_sqm") >= price_range[0]) & (col("price_per_sqm") <= price_range[1]) & (col("area") >= area_range[0]) & (col("area") <= area_range[1]))
if analysis_type == 'room_distribution':
room_stats = filtered_df.groupBy("room_count", "hall_count").agg(count("*").alias("house_count"), avg("price_per_sqm").alias("avg_price_per_sqm"), avg("area").alias("avg_area")).orderBy("room_count", "hall_count")
room_distribution = room_stats.collect()
popular_types = room_stats.orderBy(desc("house_count")).limit(5).collect()
result_data = [{"type": f"{row['room_count']}室{row['hall_count']}厅", "count": row["house_count"], "avg_price": float(row["avg_price_per_sqm"]), "avg_area": float(row["avg_area"])} for row in room_distribution]
popular_data = [{"type": f"{row['room_count']}室{row['hall_count']}厅", "count": row["house_count"], "proportion": round(row["house_count"] / filtered_df.count() * 100, 2)} for row in popular_types]
elif analysis_type == 'area_distribution':
area_bins = [(0, 50), (50, 80), (80, 120), (120, 160), (160, 200), (200, 300)]
area_analysis = []
for min_area, max_area in area_bins:
bin_df = filtered_df.filter((col("area") >= min_area) & (col("area") < max_area))
bin_count = bin_df.count()
bin_avg_price = bin_df.agg(avg("price_per_sqm")).collect()[0][0] if bin_count > 0 else 0
area_analysis.append({"range": f"{min_area}-{max_area}㎡", "count": bin_count, "avg_price": float(bin_avg_price) if bin_avg_price else 0, "proportion": round(bin_count / filtered_df.count() * 100, 2) if filtered_df.count() > 0 else 0})
result_data = area_analysis
popular_data = sorted(area_analysis, key=lambda x: x['count'], reverse=True)[:3]
price_efficiency = filtered_df.withColumn("price_efficiency", col("area") / col("price_per_sqm")).groupBy("room_count").agg(avg("price_efficiency").alias("avg_efficiency")).orderBy(desc("avg_efficiency")).collect()
efficiency_ranking = [{"room_type": f"{row['room_count']}室", "efficiency": float(row['avg_efficiency'])} for row in price_efficiency]
result = {"distribution_data": result_data, "popular_types": popular_data, "price_efficiency": efficiency_ranking, "total_analyzed": filtered_df.count()}
return JsonResponse(result)
def property_value_assessment(request):
if request.method == 'POST':
data = json.loads(request.body)
house_id = data.get('house_id')
assessment_factors = data.get('factors', ['location', 'age', 'floor', 'orientation', 'decoration'])
house_df = spark.read.format("jdbc").option("url", "jdbc:mysql://localhost:3306/house_db").option("dbtable", "house_detail").option("user", "root").option("password", "password").load()
target_house = house_df.filter(col("id") == house_id).collect()[0]
district_avg = house_df.filter(col("district") == target_house['district']).agg(avg("price_per_sqm")).collect()[0][0]
similar_houses = house_df.filter((col("district") == target_house['district']) & (col("room_count") == target_house['room_count']) & (col("area") >= target_house['area'] - 10) & (col("area") <= target_house['area'] + 10))
similar_avg = similar_houses.agg(avg("price_per_sqm")).collect()[0][0]
location_score = 0.8 if target_house['subway_distance'] <= 500 else (0.6 if target_house['subway_distance'] <= 1000 else 0.4)
age_score = max(0.3, 1.0 - (target_house['building_age'] / 30))
floor_total = target_house['total_floors']
current_floor = target_house['current_floor']
if current_floor <= floor_total * 0.3:
floor_score = 0.7
elif current_floor <= floor_total * 0.8:
floor_score = 1.0
else:
floor_score = 0.8
orientation_scores = {'南': 1.0, '东南': 0.9, '西南': 0.85, '东': 0.8, '西': 0.75, '北': 0.6}
orientation_score = orientation_scores.get(target_house['orientation'], 0.7)
decoration_scores = {'精装': 1.0, '简装': 0.8, '毛坯': 0.6}
decoration_score = decoration_scores.get(target_house['decoration'], 0.7)
factor_weights = {'location': 0.3, 'age': 0.2, 'floor': 0.15, 'orientation': 0.2, 'decoration': 0.15}
factor_scores = {'location': location_score, 'age': age_score, 'floor': floor_score, 'orientation': orientation_score, 'decoration': decoration_score}
weighted_score = sum(factor_scores[factor] * factor_weights[factor] for factor in assessment_factors if factor in factor_scores)
base_price = similar_avg if similar_avg else district_avg
estimated_price = base_price * weighted_score
price_range = [estimated_price * 0.9, estimated_price * 1.1]
market_comparison = "高于市场均价" if estimated_price > district_avg else ("低于市场均价" if estimated_price < district_avg else "接近市场均价")
investment_potential = "较高" if weighted_score >= 0.8 else ("中等" if weighted_score >= 0.6 else "较低")
result = {"estimated_price": round(estimated_price, 2), "price_range": [round(price_range[0], 2), round(price_range[1], 2)], "district_avg": round(district_avg, 2), "similar_avg": round(similar_avg, 2), "factor_scores": {k: round(v, 2) for k, v in factor_scores.items()}, "overall_score": round(weighted_score, 2), "market_comparison": market_comparison, "investment_potential": investment_potential}
return JsonResponse(result)
六、部分文档展示
七、END
💕💕文末获取源码联系计算机编程果茶熊