分层
- ODS层 :原始数据层:101张表:AVRO
- DWD层:明细数据层:101张表:ORC
- DWS层:维度数据层:维度表
- DWB层:轻度汇总层:Join + 构建基础指标
呼叫中心事实指标
需求
- 基于基础的时间、受理方式、来电类型等事实维度统计工单数量、电话数量、回访数量、投诉数量等
指标构建
eos_dict_type:字典状态类别表
eos_dict_entry:字典状态明细表
-- eos_dict_type:字典状态类别表,记录所有需要使用字典标记的表
/*DICTTYPEID |DICTTYPENAME|RANK|PARENTID|SEQNO |dt |
--------------+------------+----+--------+----------------+--------+
BUSS_CALL_TYPE|来电类型 | 1| |.BUSS_CALL_TYPE.|20210101|*/
select * from ONE_MAKE_DWD.eos_dict_type where dicttypename = '来电类型';
/*DICTTYPEID |DICTTYPENAME|RANK|PARENTID|SEQNO |dt |
----------------+------------+----+--------+------------------+--------+
BUSS_PROCESS_WAY|来电受理单--处理方式 | 1| |.BUSS_PROCESS_WAY.|20210101|*/
select * from ONE_MAKE_DWD.eos_dict_type where dicttypename = '来电受理单--处理方式';
-- eos_dict_entry:字典状态明细表,记录所有具体的状态或者类别信息
/*DICTTYPEID |DICTID|DICTNAME|STATUS|SORTNO|RANK|PARENTID|SEQNO|FILTER1|FILTER2|dt |
--------------+------+--------+------+------+----+--------+-----+-------+-------+--------+
BUSS_CALL_TYPE|5 |电话支持 | 1| 5| 1| |.5. | | |20210101|
BUSS_CALL_TYPE|1 |安装调试 | 1| 1| 1| |.1. | | |20210101|*/
select * from ONE_MAKE_DWD.eos_dict_entry where dicttypeid = 'BUSS_CALL_TYPE';
/*DICTTYPEID |DICTID|DICTNAME|STATUS|SORTNO|RANK|PARENTID|SEQNO|FILTER1|FILTER2|dt |
----------------+------+--------+------+------+----+--------+-----+-------+-------+--------+
BUSS_PROCESS_WAY|1 |自己处理 | 1| 1| 1| |.1. | | |20210101|
BUSS_PROCESS_WAY|5 |转派工 | 1| 2| 1| |.5. | | |20210101|*/
select * from ONE_MAKE_DWD.eos_dict_entry where dicttypeid = 'BUSS_PROCESS_WAY';
构建数据字典表:tmp_dict
-- 构建数据字典表【每次都要两张表关联,比较麻烦,合并为一张表】
create table if not exists ONE_MAKE_DWD.tmp_dict
stored as orc
as
SELECT
dt.DICTTYPEID -- 类型ID
,dt.DICTTYPENAME -- 类型名称
,de.DICTID -- 字典ID
,de.DICTNAME -- 字典名称
,dt.dt -- 日期
FROM
(select * from ONE_MAKE_DWD.eos_dict_type) AS dt
JOIN
(select * from ONE_MAKE_DWD.eos_dict_entry) AS de
ON dt.DICTTYPEID = de.DICTTYPEID
AND dt.dt = '20210101'
AND de.dt = '20210101'
ORDER BY de.DICTID
SELECT * FROM ONE_MAKE_DWD.tmp_dict WHERE dicttypeid = 'BUSS_CALL_TYPE'
SELECT * FROM ONE_MAKE_DWD.tmp_dict WHERE dicttypename = '来电类型'
SELECT * FROM ONE_MAKE_DWD.tmp_dict WHERE dicttypeid = 'BUSS_PROCESS_WAY'
SELECT * FROM ONE_MAKE_DWD.tmp_dict WHERE dicttypename = '来电受理单--处理方式'
实现SQL
SELECT
ID -- 来电受理单id
,CODE -- 受理编号
,date_format(FROM_UNIXTIME(call.call_time / 1000), 'yyyyMMdd') AS call_date -- 来电日期(日期id)
,hour(FROM_UNIXTIME(call.call_time / 1000)) AS call_hour -- 来电时间(小时)
,FLOOR(
to_unix_timestamp(FROM_UNIXTIME(call.call_time / 1000), 'yyyy-MM-dd HH:mm:ss') -
to_unix_timestamp(FROM_UNIXTIME(call.PROCESS_TIME / 1000), 'yyyy-MM-dd HH:mm:ss')
) AS `受理时长(秒)` -- 受理时长(单位:秒)
,call.CALL_TYPE -- 来电类型id
,call_dict.DICTNAME -- 来电类型名称
,call.PROCESS_WAY -- 受理方式id
,process_dict.DICTNAME -- 处理方式名称
,CALL_OILSTATION_ID -- 来电油站id
,ACCEPT_USERID -- 一线受理员id
,1 -- 单据数量(指标列,默认1)
,CASE WHEN CALL.PROCESS_WAY=5 THEN 1 ELSE 0 END -- 派工数量。根据受理方式判断,5:自己处理;其他:产生派工
,CASE WHEN workorder.status=-1 THEN 1 ELSE 0 END -- 派工单作废数量。根据状态判断,-1:作废;其他:不作废
,CASE WHEN workorder.status=-2 THEN 1 ELSE 0 END -- 派工单退单数量。根据状态判断,-2:退单;其他:不退单
,CASE WHEN call.CALL_TYPE=5 THEN 1 ELSE 0 END -- 电话支持数量。根据来单类型判断,5:电话支持;其他:非电话支持
,CASE WHEN call.CALL_TYPE IN (1,2,3,4) THEN 1 ELSE 0 END -- 现场安装、维修、改造、巡检数量。根据来单类型判断,(1,2,3,4):分别对应现场安装、维修、改造、巡检
,CASE WHEN call.CALL_TYPE=7 THEN 1 ELSE 0 END -- 客户回访数量。根据来单类型判断,7:客户回访;其他:非客户回访
,CASE WHEN call.CALL_TYPE=8 THEN 1 ELSE 0 END -- 投诉数量。根据来单类型判断,8:投诉;其他:非投诉
,CASE WHEN call.CALL_TYPE=9 THEN 1 ELSE 0 END -- 其他业务数量。根据来单类型判断,9:其他业务单据;
FROM ONE_MAKE_DWD.CISS_SERVICE_CALLACCEPT AS call
JOIN
-- 字典信息表:得到 来电类型名称
(SELECT DICTID,DICTNAME FROM ONE_MAKE_DWD.tmp_dict WHERE dicttypeid = 'BUSS_CALL_TYPE') AS call_dict
ON call.CALL_TYPE = call_dict.DICTID
JOIN
-- 字典信息表:得到 来电受理单--处理方式
(SELECT DICTID,DICTNAME FROM ONE_MAKE_DWD.tmp_dict WHERE dicttypeid = 'BUSS_PROCESS_WAY') AS process_dict
ON call.PROCESS_WAY = process_dict.DICTID
JOIN
-- 工单信息表:得到工单状态:-2:退单。-1:作废
(select * from ONE_MAKE_DWD.ciss_service_workorder) AS workorder
ON CALL.id = workorder.callaccept_id
AND workorder.dt = '20210101'
油站事实指标
需求
- 基于油站信息及设备数据构建油站主题事实的油站个数、停用个数、新增个数、设备个数等
-
油站数量:1个油站就是一条数据,这个值默认就为1
-
已停用油站数量:停用状态,判断油站的状态是什么状态
-
有效油站数量:使用状态,判断油站的状态是什么状态
-
当日新增油站:判断之前有没有这个油站
-
历史记录表:oil_history:记录了当前所有油站的信息
- id、name
-
今日新数据:oil_current:记录了今天所有油站的信息
- id、name
-
left join
oil_current a left join oil_history b on a.id = b.id where b.id is null
-
-
当日停用油站:判断当日状态
-
油站设备数量:得到这个油站的所有设备信息,按照油站id分组统计设备个数
构建
ciss_base_oilstation:油站信息表
-- 获取当前的油站是否是一个新增油站
SELECT
oil.id
,CASE when oil.id = oil_his.id THEN 0 else 1 end AS current_new_os_num
FROM
-- 今日油站数据表
ONE_MAKE_DWD.CISS_BASE_OILSTATION AS oil
LEFT JOIN
-- 历史油站数据表
ONE_MAKE_DWD.CISS_BASE_OILSTATION_HISTORY AS oil_his
ON oil.id = oil_his.id
WHERE oil.dt = '20210101'
ciss_base_oilstation_history:油站历史记录表
-- 统计停用油站数量
SELECT
oil.id,
COUNT(oil.id) AS current_invalid_os_num
FROM ONE_MAKE_DWD.CISS_BASE_OILSTATION oil
WHERE oil.dt = '20210101'
AND oil.status = 2
GROUP BY oil.id
ciss_base_device_detail:油站设备信息表
-- 设备信息表中按照油站id分组聚合设备id:每个油站的设备个数
SELECT
oilstation_id
,COUNT(1) AS device_num
FROM one_make_dwd.ciss_base_device_detail dev
group by dev.oilstation_id
执行SQL
SELECT
CBO.id AS os_id -- 油站id
,name AS os_name -- 油站名称
,code AS os_code -- 油站编码
,province AS province_id -- 油站省份
,city AS city_id -- 油站城市
,region AS region_id -- 油站区域(县)
,status AS status_id -- 油站状态
,customer_classify AS cstm_type_id -- 客户分类id
,CASE WHEN status = 1 THEN 1 ELSE 0 END AS valid_os_num -- 有效油站数量:1-有效,0-无效
,CASE WHEN status = 2 THEN 1 ELSE 0 END AS invalid_os_num -- 停用油站数量:1-停用,0-启用
,1 AS os_num -- 油站数量
-- 缺当日新增油站、当日停用油站数量、油站设备数量
,current_new_os_num -- 当日新增油站。1:新增、0:老油站
,CASE when current_invalid_os_num IS NULL
then 0
else current_invalid_os_num
end AS current_invalid_os_num -- 当日停用油站。0:无停用油站、current_new_os_num:停用油站数量
,dev.device_num -- 油站设备数量
FROM
--油站信息表
ONE_MAKE_DWD.CISS_BASE_OILSTATION CBO
LEFT JOIN
-- 关联历史油站表,判断是否为新增油站
(SELECT
oil.id
,CASE when oil.id = oil_his.id THEN 0 else 1 end AS current_new_os_num
FROM ONE_MAKE_DWD.CISS_BASE_OILSTATION AS oil
LEFT JOIN ONE_MAKE_DWD.CISS_BASE_OILSTATION_HISTORY AS oil_his
ON oil.id = oil_his.id
WHERE oil.dt = '20210101') AS oilnewhis
ON CBO.id = oilnewhis.id
LEFT JOIN
-- 关联停用油站数据,统计今日停用油站个数
(SELECT
oil.id,
COUNT(oil.id) AS current_invalid_os_num
FROM ONE_MAKE_DWD.CISS_BASE_OILSTATION oil
WHERE oil.dt = '20210101'
AND oil.status = 2
GROUP BY oil.id) AS invalidos
ON CBO.id = invalidos.id
LEFT JOIN
-- 关联油站设备信息表,统计油站设备个数
(SELECT
oilstation_id
,COUNT(1) AS device_num
FROM one_make_dwd.ciss_base_device_detail dev
group by dev.oilstation_id) dev
ON CBO.id = dev.oilstation_id
客户回访事实指标
需求
- 基于客户回访数据统计工单满意数量、不满意数量、返修数量等指标
指标构建
ciss_service_return_visit:回访信息表
SELECT
id -- 回访id
,code -- 回访编号
,WORKORDER_ID -- 工单id
,CREATE_USERID -- 回访人员id
,SERVICE_CONTENT -- 服务内容
,RESPONSE_SPEED -- 服务响应速度
,REPAIR_LEVEL -- 服务维修水平
,IS_REPAIR -- 是否返修
FROM ONE_MAKE_DWD.CISS_SERVICE_RETURN_VISIT visit ;
ciss_service_workorder:服务工单信息表
SELECT
ID -- 派工单id
,SERVICE_USERID -- 服务工程师id
,OIL_STATION_ID -- 油站id
,SERVICE_STATION_ID -- 服务网点id
FROM ONE_MAKE_DWD.CISS_SERVICE_WORKORDER swo ;
其他指标
SELECT
WORKORDER_ID -- 工单id
,SUM(CASE WHEN visit.SERVICE_ATTITUDE = 1
AND visit.RESPONSE_SPEED = 1
AND visit.REPAIR_LEVEL =1
THEN 1
ELSE 0
END) AS satisfied_num -- 满意数量
,SUM(CASE WHEN visit.SERVICE_ATTITUDE = 0 THEN 1
WHEN visit.RESPONSE_SPEED = 0 THEN 1
WHEN visit.REPAIR_LEVEL =0 THEN 1
WHEN visit.YAWP_PROBLEM_TYPE = 0 THEN 1
ELSE 0
END) AS unsatisfied_num -- 不满意数量
,SUM(CASE WHEN visit.SERVICE_ATTITUDE = 1
THEN 1
ELSE 0
END) AS srv_atu_num -- 服务态度满意数量
,SUM(CASE WHEN visit.SERVICE_ATTITUDE = 0
THEN 1
ELSE 0
END) AS srv_bad_atu_num -- 服务态度不满意数量
,SUM(CASE WHEN visit.REPAIR_LEVEL = 1
THEN 1
ELSE 0
END) AS srv_rpr_prof_num -- 服务水平满意数量
,SUM(CASE WHEN visit.REPAIR_LEVEL = 0
THEN 1
ELSE 0
END) AS srv_rpr_unprof_num -- 服务水平不满意数量
,SUM(CASE WHEN visit.RESPONSE_SPEED = 1
THEN 1
ELSE 0
END) AS srv_high_res_num -- 服务响应速度满意数量
,SUM(CASE WHEN visit.RESPONSE_SPEED = 0
THEN 1
ELSE 0
END) AS srv_low_res_num -- 服务响应速度不满意数量
,SUM(CASE WHEN visit.IS_REPAIR = 1
THEN 1
ELSE 0
END) AS rtn_rpr_num -- 返修数量
FROM
ONE_MAKE_DWD.CISS_SERVICE_RETURN_VISIT AS visit
执行sql
SELECT
visit.id -- 回访id
,visit.code -- 回访编号
,visit.WORKORDER_ID -- 工单id
,visit.CREATE_USERID -- 回访人员id
,swo.SERVICE_USERID -- 服务工程师id
,swo.OIL_STATION_ID -- 油站id
,swo.SERVICE_STATION_ID -- 服务网点id
,satisfied_num -- 满意数量
,unsatisfied_num -- 不满意数量
,srv_atu_num -- 服务态度满意数量
,srv_bad_atu_num -- 服务态度不满意数量
,srv_rpr_prof_num -- 服务水平满意数量
,srv_rpr_unprof_num -- 服务水平不满意数量
,srv_high_res_num -- 服务响应速度满意数量
,srv_low_res_num -- 服务响应速度不满意数量
,rtn_rpr_num -- 返修数量
FROM
-- 回访信息表
ONE_MAKE_DWD.CISS_SERVICE_RETURN_VISIT AS visit
LEFT JOIN
-- 工单信息表
ONE_MAKE_DWD.CISS_SERVICE_WORKORDER AS swo
ON visit.WORKORDER_ID = swo.id
LEFT JOIN
(SELECT
WORKORDER_ID -- 工单id
,SUM(CASE WHEN visit.SERVICE_ATTITUDE = 1
AND visit.RESPONSE_SPEED = 1
AND visit.REPAIR_LEVEL =1
THEN 1
ELSE 0
END) AS satisfied_num -- 满意数量
,SUM(CASE WHEN visit.SERVICE_ATTITUDE = 0 THEN 1
WHEN visit.RESPONSE_SPEED = 0 THEN 1
WHEN visit.REPAIR_LEVEL =0 THEN 1
WHEN visit.YAWP_PROBLEM_TYPE = 0 THEN 1
ELSE 0
END) AS unsatisfied_num -- 不满意数量
,SUM(CASE WHEN visit.SERVICE_ATTITUDE = 1
THEN 1
ELSE 0
END) AS srv_atu_num -- 服务态度满意数量
,SUM(CASE WHEN visit.SERVICE_ATTITUDE = 0
THEN 1
ELSE 0
END) AS srv_bad_atu_num -- 服务态度不满意数量
,SUM(CASE WHEN visit.REPAIR_LEVEL = 1
THEN 1
ELSE 0
END) AS srv_rpr_prof_num -- 服务水平满意数量
,SUM(CASE WHEN visit.REPAIR_LEVEL = 0
THEN 1
ELSE 0
END) AS srv_rpr_unprof_num -- 服务水平不满意数量
,SUM(CASE WHEN visit.RESPONSE_SPEED = 1
THEN 1
ELSE 0
END) AS srv_high_res_num -- 服务响应速度满意数量
,SUM(CASE WHEN visit.RESPONSE_SPEED = 0
THEN 1
ELSE 0
END) AS srv_low_res_num -- 服务响应速度不满意数量
,SUM(CASE WHEN visit.IS_REPAIR = 1
THEN 1
ELSE 0
END) AS rtn_rpr_num -- 返修数量
FROM
ONE_MAKE_DWD.CISS_SERVICE_RETURN_VISIT AS visit
LEFT JOIN
ONE_MAKE_DWD.CISS_SERVICE_WORKORDER AS swo
ON visit.WORKORDER_ID = swo.id
WHERE visit.dt = '20210101'
GROUP BY visit.WORKORDER_ID) AS vstswo
ON visit.workorder_id = vstswo.workorder_id
where visit.dt = '20210101'
费用报销事实指标
需求
- 基于差率报销信息统计交通费用、住宿费用、油费金额等报销费用指标
指标构建
ciss_service_expense_report:费用信息表
-- *ciss_service_expense_report**:费用信息表
select
id, --报销单id
submoney5, --报销金额
create_user_id, --创建人id
create_org_id --创建部门id
from ONE_MAKE_DWD.ciss_service_expense_report AS exp;
ciss_base_servicestation:服务网点信息表
select
id, --服务网点id
org_id --部门id
from ONE_MAKE_DWD.ciss_base_servicestation AS ss;
ciss_service_exp_report_dtl:费用明细表
select
exp_report_id --报销单id
,item_id --费用项目id
,submoney5 --项目报销实际金额
from ONE_MAKE_DWD.ciss_service_exp_report_dtl AS EXP_DTL
tmp_dict:数据字典表
select
dictid --项目id
,dictname --项目名称
from ONE_MAKE_DWD.tmp_dict
where dicttypename = '费用报销项目' AS DICT
构建数据字典表
create table if not exists ONE_MAKE_DWD.tmp_dict
stored as orc
as
SELECT
dt.DICTTYPEID -- 类型ID
,dt.DICTTYPENAME -- 类型名称
,de.DICTID -- 字典ID
,de.DICTNAME -- 字典名称
,dt.dt -- 日期
FROM
(select * from ONE_MAKE_DWD.eos_dict_type) AS dt
JOIN
(select * from ONE_MAKE_DWD.eos_dict_entry) AS de
ON dt.DICTTYPEID = de.DICTTYPEID
AND dt.dt = '20210101'
AND de.dt = '20210101'
ORDER BY de.DICTID
执行sql
SELECT
EXP.id AS exp_id -- 报销单id
,SS.id AS ss_id -- 服务网点id
,EXP.create_user_id AS srv_user_id -- 创建人id
,EXP.submoney5 AS actual_exp_money -- 实际报销金额
,EXP_DTL.item_id AS exp_item -- 费用项目id
,DICT.dictname AS dict_name -- 费用项目名称
,EXP_DTL.submoney5 AS exp_item_money -- 费用项目金额
FROM
-- 费用明细表
(select
id -- 报销单id
,submoney5 -- 报销金额
,create_user_id -- 创建人id
,create_org_id -- 创建部门id
from ONE_MAKE_DWD.ciss_service_expense_report
WHERE status = 9 -- 只取制证会计已审核状态
AND dt = '20210101') AS EXP
LEFT JOIN
(select
id --服务网点id
,org_id --部门id
from ONE_MAKE_DWD.ciss_base_servicestation) AS SS
ON EXP.create_org_id = SS.org_id
LEFT JOIN
(select
exp_report_id --报销单id
,item_id --费用项目id
,submoney5 --项目报销实际金额
from ONE_MAKE_DWD.ciss_service_exp_report_dtl) AS EXP_DTL
ON EXP.id = EXP_DTL.exp_report_id
LEFT JOIN
(select
dictid --项目id
,dictname --项目名称
from ONE_MAKE_DWD.tmp_dict
where dicttypename = '费用报销项目') AS DICT
ON EXP_DTL.item_id = DICT.dictid
补充知识
日期函数
hive 与 spark 的差异
-- hive、spark执行同一sql
SELECT
DATE_FORMAT(timestamp(1493627804000),'yyyyMMdd') as call_date,
HOUR (timestamp(1493627804000)) as hour
-- hive 执行结果,时间戳转换成功
call_date|hour|
---------+----+
20170501 | 8|
-- spark 执行结果,时间戳转换失败
call_date|HOUR|
---------+----+
493010307| 20|
Hive 和 Spark SQL 在处理日期和时间戳时的函数和语法有所不同。
在 Hive 中,date_format 可以直接处理时间戳,而在 Spark SQL 中,需要使用 FROM_UNIXTIME 将毫秒时间戳转换为标准日期时间格式。
SELECT
date_format(FROM_UNIXTIME(1493627804000 / 1000), 'yyyyMMdd') AS call_date
,hour(FROM_UNIXTIME(1493627804000 / 1000)) AS call_hour
-- spark执行结果,时间戳转换成功
call_date|call_hour|
---------+---------+
20170501 | 8|
CASE
-- CASE:用于开始一个条件表达式
-- WHEN:指定条件。可以有多个 WHEN 子句,SQL 会依次检查每个条件
-- THEN:如果对应的 WHEN 条件为真,后面跟随的值将被返回
-- ELSE:可选部分。如果所有的 WHEN 条件都不满足,则返回 ELSE 后的值
-- END:用于结束 CASE 表达式
-- 当 call.CALL_TYPE 的值为 5 ,条件为真,返回 1 ,否则返回 0
CASE WHEN call.CALL_TYPE=5 THEN 1 ELSE 0 END