一、个人简介
💖💖作者:计算机编程果茶熊 💙💙个人简介:曾长期从事计算机专业培训教学,担任过编程老师,同时本人也热爱上课教学,擅长Java、微信小程序、Python、Golang、安卓Android等多个IT方向。会做一些项目定制化开发、代码讲解、答辩教学、文档编写、也懂一些降重方面的技巧。平常喜欢分享一些自己开发中遇到的问题的解决办法,也喜欢交流技术,大家有技术代码这一块的问题可以问我! 💛💛想说的话:感谢大家的关注与支持! 💜💜 网站实战项目 安卓/小程序实战项目 大数据实战项目 计算机毕业设计选题 💕💕文末获取源码联系计算机编程果茶熊
二、系统介绍
大数据框架:Hadoop+Spark(Hive需要定制修改) 开发语言:Java+Python(两个版本都支持) 数据库:MySQL 后端框架:SpringBoot(Spring+SpringMVC+Mybatis)+Django(两个版本都支持) 前端:Vue+Echarts+HTML+CSS+JavaScript+jQuery
《旅游保险数据可视化分析系统》是一个基于大数据技术的保险数据分析平台,采用Hadoop+Spark分布式计算框架处理海量旅游保险数据。系统通过Django后端框架构建稳定的数据处理服务,结合Vue+ElementUI+Echarts技术栈打造直观的前端交互界面。平台核心功能涵盖用户管理、保险数据管理、图像分析、风险评估、销售统计、业绩监控、特征挖掘以及可视化大屏展示等模块。系统运用Spark SQL进行高效的数据查询与分析,通过Pandas和NumPy进行数据处理与统计计算,将复杂的保险业务数据转化为清晰的图表和报告。MySQL数据库确保数据的可靠存储与快速检索,HDFS分布式文件系统支撑大规模数据的存储需求。整个系统为保险公司提供了从数据采集、处理、分析到展示的完整解决方案,帮助决策者快速洞察旅游保险市场趋势和业务运营状况。
三、视频解说
四、部分功能展示
五、部分代码展示
from pyspark.sql import SparkSession
from pyspark.sql.functions import *
from pyspark.sql.types import *
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
from django.http import JsonResponse
from django.views.decorators.csrf import csrf_exempt
import json
spark = SparkSession.builder.appName("TourismInsuranceAnalysis").config("spark.sql.adaptive.enabled", "true").getOrCreate()
@csrf_exempt
def risk_analysis(request):
if request.method == 'POST':
data = json.loads(request.body)
start_date = data.get('start_date')
end_date = data.get('end_date')
insurance_df = spark.read.format("jdbc").option("url", "jdbc:mysql://localhost:3306/tourism_insurance").option("dbtable", "insurance_policies").option("user", "root").option("password", "password").load()
claim_df = spark.read.format("jdbc").option("url", "jdbc:mysql://localhost:3306/tourism_insurance").option("dbtable", "insurance_claims").option("user", "root").option("password", "password").load()
filtered_claims = claim_df.filter((col("claim_date") >= start_date) & (col("claim_date") <= end_date))
risk_by_destination = filtered_claims.groupBy("destination").agg(count("claim_id").alias("claim_count"), avg("claim_amount").alias("avg_claim_amount"), sum("claim_amount").alias("total_claim_amount"))
risk_by_age_group = filtered_claims.join(insurance_df, "policy_id").withColumn("age_group", when(col("customer_age") < 25, "青年").when(col("customer_age") < 45, "中年").otherwise("老年")).groupBy("age_group").agg(count("claim_id").alias("claim_count"), avg("claim_amount").alias("avg_claim_amount"))
risk_by_activity = filtered_claims.groupBy("activity_type").agg(count("claim_id").alias("claim_count"), (count("claim_id") / insurance_df.count() * 100).alias("claim_rate"))
seasonal_risk = filtered_claims.withColumn("month", month(col("claim_date"))).groupBy("month").agg(count("claim_id").alias("monthly_claims"), avg("claim_amount").alias("monthly_avg_amount"))
high_risk_customers = filtered_claims.groupBy("customer_id").agg(count("claim_id").alias("claim_frequency")).filter(col("claim_frequency") > 2).join(insurance_df.select("customer_id", "customer_name", "phone"), "customer_id")
destination_risk_pd = risk_by_destination.toPandas()
age_risk_pd = risk_by_age_group.toPandas()
activity_risk_pd = risk_by_activity.toPandas()
seasonal_risk_pd = seasonal_risk.toPandas()
high_risk_pd = high_risk_customers.toPandas()
risk_score_calculation = destination_risk_pd['total_claim_amount'] / destination_risk_pd['claim_count']
destination_risk_pd['risk_score'] = (risk_score_calculation - risk_score_calculation.min()) / (risk_score_calculation.max() - risk_score_calculation.min()) * 100
result = {
'destination_risk': destination_risk_pd.to_dict('records'),
'age_group_risk': age_risk_pd.to_dict('records'),
'activity_risk': activity_risk_pd.to_dict('records'),
'seasonal_trend': seasonal_risk_pd.to_dict('records'),
'high_risk_customers': high_risk_pd.to_dict('records')
}
return JsonResponse(result)
@csrf_exempt
def sales_analysis(request):
if request.method == 'POST':
data = json.loads(request.body)
analysis_period = data.get('period', 'monthly')
product_type = data.get('product_type', 'all')
sales_df = spark.read.format("jdbc").option("url", "jdbc:mysql://localhost:3306/tourism_insurance").option("dbtable", "policy_sales").option("user", "root").option("password", "password").load()
product_df = spark.read.format("jdbc").option("url", "jdbc:mysql://localhost:3306/tourism_insurance").option("dbtable", "insurance_products").option("user", "root").option("password", "password").load()
joined_df = sales_df.join(product_df, "product_id")
if product_type != 'all':
joined_df = joined_df.filter(col("product_category") == product_type)
if analysis_period == 'monthly':
time_grouped = joined_df.withColumn("time_period", date_format(col("sale_date"), "yyyy-MM")).groupBy("time_period")
elif analysis_period == 'quarterly':
time_grouped = joined_df.withColumn("time_period", concat(year(col("sale_date")), lit("-Q"), quarter(col("sale_date")))).groupBy("time_period")
else:
time_grouped = joined_df.withColumn("time_period", year(col("sale_date"))).groupBy("time_period")
sales_summary = time_grouped.agg(count("policy_id").alias("policy_count"), sum("premium_amount").alias("total_premium"), avg("premium_amount").alias("avg_premium"))
product_performance = joined_df.groupBy("product_name").agg(count("policy_id").alias("sales_count"), sum("premium_amount").alias("revenue"), avg("premium_amount").alias("avg_price")).orderBy(desc("revenue"))
regional_sales = joined_df.groupBy("customer_region").agg(count("policy_id").alias("policies_sold"), sum("premium_amount").alias("region_revenue"))
sales_channel_analysis = joined_df.groupBy("sales_channel").agg(count("policy_id").alias("channel_sales"), sum("premium_amount").alias("channel_revenue"), avg("premium_amount").alias("channel_avg_premium"))
customer_segment = joined_df.join(sales_df.groupBy("customer_id").agg(count("policy_id").alias("purchase_frequency")), "customer_id").withColumn("customer_type", when(col("purchase_frequency") == 1, "新客户").when(col("purchase_frequency") <= 3, "普通客户").otherwise("忠实客户")).groupBy("customer_type").agg(count("customer_id").alias("customer_count"), sum("premium_amount").alias("segment_revenue"))
growth_analysis = sales_summary.withColumn("prev_period_revenue", lag("total_premium").over(Window.orderBy("time_period"))).withColumn("growth_rate", ((col("total_premium") - col("prev_period_revenue")) / col("prev_period_revenue") * 100))
sales_pd = sales_summary.toPandas()
product_pd = product_performance.toPandas()
regional_pd = regional_sales.toPandas()
channel_pd = sales_channel_analysis.toPandas()
segment_pd = customer_segment.toPandas()
growth_pd = growth_analysis.toPandas()
result = {
'time_series_sales': sales_pd.to_dict('records'),
'product_ranking': product_pd.to_dict('records'),
'regional_distribution': regional_pd.to_dict('records'),
'channel_performance': channel_pd.to_dict('records'),
'customer_segments': segment_pd.to_dict('records'),
'growth_trends': growth_pd.to_dict('records')
}
return JsonResponse(result)
@csrf_exempt
def performance_analysis(request):
if request.method == 'POST':
data = json.loads(request.body)
employee_id = data.get('employee_id')
department = data.get('department')
time_range = data.get('time_range', 30)
end_date = datetime.now()
start_date = end_date - timedelta(days=time_range)
performance_df = spark.read.format("jdbc").option("url", "jdbc:mysql://localhost:3306/tourism_insurance").option("dbtable", "employee_performance").option("user", "root").option("password", "password").load()
sales_df = spark.read.format("jdbc").option("url", "jdbc:mysql://localhost:3306/tourism_insurance").option("dbtable", "policy_sales").option("user", "root").option("password", "password").load()
employee_df = spark.read.format("jdbc").option("url", "jdbc:mysql://localhost:3306/tourism_insurance").option("dbtable", "employees").option("user", "root").option("password", "password").load()
filtered_performance = performance_df.filter((col("performance_date") >= start_date.strftime('%Y-%m-%d')) & (col("performance_date") <= end_date.strftime('%Y-%m-%d')))
if employee_id:
filtered_performance = filtered_performance.filter(col("employee_id") == employee_id)
if department:
filtered_performance = filtered_performance.join(employee_df, "employee_id").filter(col("department") == department)
individual_performance = filtered_performance.groupBy("employee_id").agg(sum("policies_sold").alias("total_policies"), sum("premium_generated").alias("total_premium"), avg("customer_satisfaction").alias("avg_satisfaction"), count("performance_date").alias("active_days"))
department_performance = filtered_performance.join(employee_df, "employee_id").groupBy("department").agg(sum("policies_sold").alias("dept_policies"), sum("premium_generated").alias("dept_premium"), avg("customer_satisfaction").alias("dept_satisfaction"), countDistinct("employee_id").alias("active_employees"))
top_performers = individual_performance.orderBy(desc("total_premium")).limit(10).join(employee_df.select("employee_id", "employee_name", "position"), "employee_id")
performance_trends = filtered_performance.withColumn("week", weekofyear(col("performance_date"))).groupBy("week").agg(sum("policies_sold").alias("weekly_policies"), sum("premium_generated").alias("weekly_premium"), avg("customer_satisfaction").alias("weekly_satisfaction"))
target_achievement = filtered_performance.join(employee_df, "employee_id").withColumn("achievement_rate", col("premium_generated") / col("monthly_target") * 100).groupBy("employee_id", "employee_name").agg(avg("achievement_rate").alias("avg_achievement_rate")).filter(col("avg_achievement_rate").isNotNull())
commission_calculation = individual_performance.join(employee_df, "employee_id").withColumn("commission_amount", col("total_premium") * col("commission_rate")).select("employee_id", "employee_name", "total_premium", "commission_amount")
performance_ranking = individual_performance.join(employee_df, "employee_id").select("employee_id", "employee_name", "department", "total_policies", "total_premium", "avg_satisfaction").withColumn("rank", row_number().over(Window.orderBy(desc("total_premium"))))
individual_pd = individual_performance.toPandas()
department_pd = department_performance.toPandas()
top_performers_pd = top_performers.toPandas()
trends_pd = performance_trends.toPandas()
targets_pd = target_achievement.toPandas()
commission_pd = commission_calculation.toPandas()
ranking_pd = performance_ranking.toPandas()
kpi_summary = {
'total_employees': individual_pd.shape[0],
'total_policies_sold': int(individual_pd['total_policies'].sum()),
'total_premium_generated': float(individual_pd['total_premium'].sum()),
'average_satisfaction': float(individual_pd['avg_satisfaction'].mean())
}
result = {
'kpi_summary': kpi_summary,
'individual_performance': individual_pd.to_dict('records'),
'department_performance': department_pd.to_dict('records'),
'top_performers': top_performers_pd.to_dict('records'),
'performance_trends': trends_pd.to_dict('records'),
'target_achievement': targets_pd.to_dict('records'),
'commission_details': commission_pd.to_dict('records'),
'performance_ranking': ranking_pd.to_dict('records')
}
return JsonResponse(result)
六、部分文档展示
七、END
💕💕文末获取源码联系计算机编程果茶熊