前言
💖💖作者:计算机程序员小杨 💙💙个人简介:我是一名计算机相关专业的从业者,擅长Java、微信小程序、Python、Golang、安卓Android等多个IT方向。会做一些项目定制化开发、代码讲解、答辩教学、文档编写、也懂一些降重方面的技巧。热爱技术,喜欢钻研新工具和框架,也乐于通过代码解决实际问题,大家有技术代码这一块的问题可以问我! 💛💛想说的话:感谢大家的关注与支持! 💕💕文末获取源码联系 计算机程序员小杨 💜💜 网站实战项目 安卓/小程序实战项目 大数据实战项目 深度学习实战项目 计算机毕业设计选题 💜💜
一.开发工具简介
大数据框架:Hadoop+Spark(本次没用Hive,支持定制) 开发语言:Python+Java(两个版本都支持) 后端框架:Django+Spring Boot(Spring+SpringMVC+Mybatis)(两个版本都支持) 前端:Vue+ElementUI+Echarts+HTML+CSS+JavaScript+jQuery 详细技术点:Hadoop、HDFS、Spark、Spark SQL、Pandas、NumPy 数据库:MySQL
二.系统内容简介
本系统是一套基于大数据技术的气象地质灾害综合分析平台,采用Hadoop+Spark分布式计算框架处理海量灾害数据,通过Django后端和Vue前端实现数据的采集、存储、分析与可视化展示。系统利用Spark SQL进行结构化数据查询,结合Pandas和NumPy进行数据清洗与统计分析,支持对气象地质灾害信息的多维度管理。核心功能涵盖灾害事件的关联对比分析、特征提取分析、空间分布规律挖掘、时间演变趋势追踪以及基于自然语言处理的文本挖掘分析,通过Echarts图表库实现数据的动态可视化呈现。系统提供用户权限管理、灾害信息录入与查询、多维度统计分析、可视化大屏展示等完整功能模块,可为气象地质灾害的监测预警、规律分析和决策支持提供数据技术支撑,帮助相关部门更直观地掌握灾害发生发展规律,提升灾害防治的科学性和时效性。
三.系统功能演示
四.系统界面展示
五.系统源码展示
from pyspark.sql import SparkSession
from pyspark.sql.functions import col,count,avg,sum,year,month,dayofmonth,to_date,when,lag,unix_timestamp,explode,split
from pyspark.sql.window import Window
from django.http import JsonResponse
from django.views import View
import pandas as pd
import numpy as np
import json
import jieba
from collections import Counter
spark=SparkSession.builder.appName("DisasterAnalysis").config("spark.sql.warehouse.dir","/user/hive/warehouse").config("spark.executor.memory","2g").config("spark.driver.memory","1g").getOrCreate()
class DisasterCorrelationAnalysis(View):
def post(self,request):
params=json.loads(request.body)
disaster_type_1=params.get('disaster_type_1')
disaster_type_2=params.get('disaster_type_2')
start_date=params.get('start_date')
end_date=params.get('end_date')
region=params.get('region')
df=spark.read.format("jdbc").option("url","jdbc:mysql://localhost:3306/disaster_db").option("driver","com.mysql.cj.jdbc.Driver").option("dbtable","disaster_events").option("user","root").option("password","123456").load()
df=df.filter((col("event_date")>=start_date)&(col("event_date")<=end_date))
if region:
df=df.filter(col("region")==region)
df_type1=df.filter(col("disaster_type")==disaster_type_1).select("event_date","region","disaster_level","economic_loss").withColumnRenamed("disaster_level","level_1").withColumnRenamed("economic_loss","loss_1")
df_type2=df.filter(col("disaster_type")==disaster_type_2).select("event_date","region","disaster_level","economic_loss").withColumnRenamed("disaster_level","level_2").withColumnRenamed("economic_loss","loss_2")
joined_df=df_type1.join(df_type2,["event_date","region"],"inner")
correlation_count=joined_df.count()
total_type1=df_type1.count()
total_type2=df_type2.count()
correlation_rate=correlation_count/max(total_type1,total_type2) if max(total_type1,total_type2)>0 else 0
level_map={"轻微":1,"一般":2,"严重":3,"特别严重":4}
joined_pd=joined_df.toPandas()
joined_pd['level_1_num']=joined_pd['level_1'].map(level_map)
joined_pd['level_2_num']=joined_pd['level_2'].map(level_map)
level_corr=joined_pd[['level_1_num','level_2_num']].corr().iloc[0,1] if len(joined_pd)>1 else 0
loss_corr=joined_pd[['loss_1','loss_2']].corr().iloc[0,1] if len(joined_pd)>1 else 0
time_gap_df=joined_df.withColumn("time_diff",unix_timestamp("event_date")-lag(unix_timestamp("event_date")).over(Window.partitionBy("region").orderBy("event_date")))
avg_time_gap=time_gap_df.select(avg("time_diff")).collect()[0][0] if time_gap_df.count()>0 else 0
avg_time_gap_days=avg_time_gap/86400 if avg_time_gap else 0
region_correlation=joined_df.groupBy("region").agg(count("*").alias("correlation_count")).orderBy(col("correlation_count").desc()).limit(10).toPandas().to_dict('records')
return JsonResponse({"correlation_count":correlation_count,"correlation_rate":round(correlation_rate,4),"level_correlation":round(level_corr,4),"loss_correlation":round(loss_corr,4),"avg_time_gap_days":round(avg_time_gap_days,2),"region_correlation":region_correlation})
class DisasterSpatialDistribution(View):
def post(self,request):
params=json.loads(request.body)
disaster_type=params.get('disaster_type')
start_date=params.get('start_date')
end_date=params.get('end_date')
analysis_level=params.get('analysis_level','province')
df=spark.read.format("jdbc").option("url","jdbc:mysql://localhost:3306/disaster_db").option("driver","com.mysql.cj.jdbc.Driver").option("dbtable","disaster_events").option("user","root").option("password","123456").load()
df=df.filter((col("event_date")>=start_date)&(col("event_date")<=end_date))
if disaster_type:
df=df.filter(col("disaster_type")==disaster_type)
if analysis_level=='province':
spatial_df=df.groupBy("province").agg(count("*").alias("event_count"),sum("economic_loss").alias("total_loss"),avg("economic_loss").alias("avg_loss"),sum("affected_population").alias("total_affected"),avg("affected_population").alias("avg_affected"))
elif analysis_level=='city':
spatial_df=df.groupBy("province","city").agg(count("*").alias("event_count"),sum("economic_loss").alias("total_loss"),avg("economic_loss").alias("avg_loss"),sum("affected_population").alias("total_affected"),avg("affected_population").alias("avg_affected"))
else:
spatial_df=df.groupBy("region").agg(count("*").alias("event_count"),sum("economic_loss").alias("total_loss"),avg("economic_loss").alias("avg_loss"),sum("affected_population").alias("total_affected"),avg("affected_population").alias("avg_affected"))
spatial_df=spatial_df.fillna(0)
spatial_pd=spatial_df.toPandas()
spatial_pd['loss_density']=spatial_pd['total_loss']/spatial_pd['event_count']
spatial_pd['affected_density']=spatial_pd['total_affected']/spatial_pd['event_count']
high_risk_regions=spatial_pd.nlargest(10,'event_count')
high_loss_regions=spatial_pd.nlargest(10,'total_loss')
disaster_level_df=df.groupBy("region","disaster_level").agg(count("*").alias("level_count"))
level_pivot=disaster_level_df.groupBy("region").pivot("disaster_level",["轻微","一般","严重","特别严重"]).sum("level_count").fillna(0)
level_pd=level_pivot.toPandas()
spatial_with_level=spatial_pd.merge(level_pd,on='region',how='left') if analysis_level=='region' else spatial_pd
cluster_features=spatial_pd[['event_count','total_loss','total_affected']].values
cluster_normalized=(cluster_features-np.mean(cluster_features,axis=0))/np.std(cluster_features,axis=0)
from sklearn.cluster import KMeans
kmeans=KMeans(n_clusters=min(5,len(spatial_pd)),random_state=42)
spatial_pd['risk_cluster']=kmeans.fit_predict(cluster_normalized) if len(spatial_pd)>=5 else 0
spatial_result=spatial_pd.to_dict('records')
return JsonResponse({"spatial_distribution":spatial_result,"high_risk_regions":high_risk_regions.to_dict('records'),"high_loss_regions":high_loss_regions.to_dict('records'),"total_analysis_count":len(spatial_pd)})
class DisasterTextMining(View):
def post(self,request):
params=json.loads(request.body)
start_date=params.get('start_date')
end_date=params.get('end_date')
disaster_type=params.get('disaster_type')
top_n=params.get('top_n',50)
df=spark.read.format("jdbc").option("url","jdbc:mysql://localhost:3306/disaster_db").option("driver","com.mysql.cj.jdbc.Driver").option("dbtable","disaster_events").option("user","root").option("password","123456").load()
df=df.filter((col("event_date")>=start_date)&(col("event_date")<=end_date))
if disaster_type:
df=df.filter(col("disaster_type")==disaster_type)
text_df=df.select("event_description","disaster_report","event_id")
text_pd=text_df.toPandas()
text_pd['combined_text']=text_pd['event_description'].fillna('')+" "+text_pd['disaster_report'].fillna('')
all_text=" ".join(text_pd['combined_text'].tolist())
stopwords={'的','了','在','是','我','有','和','就','不','人','都','一','一个','上','也','很','到','说','要','去','你','会','着','没有','看','好','自己','这'}
words=jieba.cut(all_text)
filtered_words=[w for w in words if len(w)>1 and w not in stopwords and not w.isdigit()]
word_freq=Counter(filtered_words)
top_words=word_freq.most_common(top_n)
keyword_list=[{"word":w,"freq":f} for w,f in top_words]
disaster_keywords={'滑坡':['滑坡','滑动','山体','土石'],'泥石流':['泥石流','泥流','冲击','淤积'],'暴雨':['暴雨','强降雨','积水','内涝'],'台风':['台风','风暴','大风','强风'],'地震':['地震','震级','震中','余震']}
keyword_stats={}
for disaster,keywords in disaster_keywords.items():
keyword_count=sum([word_freq.get(kw,0) for kw in keywords])
keyword_stats[disaster]=keyword_count
text_pd['word_count']=text_pd['combined_text'].apply(lambda x:len(list(jieba.cut(x))))
avg_word_count=text_pd['word_count'].mean()
sentiment_positive_words={'成功','有效','及时','安全','稳定','恢复','救援','保障'}
sentiment_negative_words={'损失','破坏','伤亡','严重','危险','威胁','受灾','灾难'}
text_pd['positive_score']=text_pd['combined_text'].apply(lambda x:sum([1 for w in jieba.cut(x) if w in sentiment_positive_words]))
text_pd['negative_score']=text_pd['combined_text'].apply(lambda x:sum([1 for w in jieba.cut(x) if w in sentiment_negative_words]))
text_pd['sentiment']=text_pd.apply(lambda row:'positive' if row['positive_score']>row['negative_score'] else ('negative' if row['negative_score']>row['positive_score'] else 'neutral'),axis=1)
sentiment_dist=text_pd['sentiment'].value_counts().to_dict()
entity_pattern=['(?P<location>[^,。;,;\s]{2,8}(省|市|县|区|乡|镇|村))','(?P<time>\d{4}年\d{1,2}月\d{1,2}日|\d{1,2}月\d{1,2}日)','(?P<number>\d+\.?\d*(人|万人|户|万户|元|万元|亿元))']
import re
entities={"location":[],"time":[],"number":[]}
for pattern in entity_pattern:
matches=re.finditer(pattern,all_text)
for match in matches:
for key,value in match.groupdict().items():
if value:
entities[key].append(value)
entity_freq={k:Counter(v).most_common(20) for k,v in entities.items()}
return JsonResponse({"top_keywords":keyword_list,"disaster_keyword_stats":keyword_stats,"avg_word_count":round(avg_word_count,2),"sentiment_distribution":sentiment_dist,"entity_extraction":entity_freq,"total_documents":len(text_pd)})
六.系统文档展示
结束
💕💕文末获取源码联系 计算机程序员小杨