历史拉链表用途:数据量大的表,更新频率低,完整的保存业务过程
demo1:不更新历史数据每天只新增新的数据
7.2号业务数据库:
drop table if exists tmp.luoxiaoxin2;
create table if not exists tmp.luoxiaoxin2 as
select 1 as order_id,'下单' as order_status,'2021-07-01 00:00:00' as create_time
union all
select 2 as order_id,'下单' as order_status,'2021-07-01 00:00:00' as create_time
union all
select 1 as order_id,'发货' as order_status,'2021-07-02 00:00:00' as create_time;
数仓历史拉链表:drop table if exists tmp.luoxiaoxin3;
create table if not exists tmp.luoxiaoxin3 as
select order_id
,order_status
,create_time as stat_time
,lead(create_time,1,'9999-99-99 00:00:00') over(partition by order_id order by create_time ) as end_time
from tmp.luoxiaoxin2;
7.3号业务数据库drop table if exists tmp.luoxiaoxin2;
create table if not exists tmp.luoxiaoxin2 as
select 1 as order_id,'下单' as order_status,'2021-07-01 00:00:00' as create_time
union all
select 2 as order_id,'下单' as order_status,'2021-07-01 00:00:00' as create_time
union all
select 1 as order_id,'发货' as order_status,'2021-07-02 00:00:00' as create_time
union all
select 1 as order_id,'收货' as order_status,'2021-07-03 00:00:00' as create_time
union all
select 2 as order_id,'发货' as order_status,'2021-07-03 01:00:00' as create_time
union all
select 2 as order_id,'收货' as order_status,'2021-07-03 05:00:00' as create_time
union all
select 3 as order_id,'下单' as order_status,'2021-07-03 08:00:00' as create_time
union all
select 3 as order_id,'发货' as order_status,'2021-07-03 09:00:00' as create_time
union all
select 3 as order_id,'收货' as order_status,'2021-07-03 10:00:00' as create_time;
7.3号数仓历史拉链表更新
create table if not exists tmp.luoxiaoxin4 as
select order_id
,order_status
,stat_time
,lead(stat_time,1,'9999-99-99 00:00:00') over(partition by order_id order by stat_time) as end_time
from(select order_id
,order_status
,stat_time
from tmp.luoxiaoxin3
union all
select order_id
,order_status
,create_time as stat_time
from tmp.luoxiaoxin2
where date(create_time)='2021-07-03'
) tmp
demo2:既新增历史新数据也更新历史数据
7.1号业务数据库:drop table if exists tmp.luoxiaoxin5;
create table if not exists tmp.luoxiaoxin5 as
select 1 as order_id,'下单' as order_status
,'2021-07-01 00:00:00' as create_time
,'2021-07-01 00:00:00' as change_time
union all
select 2 as order_id,
'下单' as order_status
,'2021-07-01 00:00:00' as create_time
,'2021-07-01 00:00:00' as change_time;
数仓历史拉链表:drop table if exists tmp.luoxiaoxin6;
create table if not exists tmp.luoxiaoxin6 asselect order_id ,order_status ,create_time ,change_time ,date(create_time) as stat_date ,'9999-99-99 00:00:00' as end_date ,1 as is_validfrom tmp.luoxiaoxin5;
7.2号业务数据库drop table if exists tmp.luoxiaoxin7;
create table if not exists tmp.luoxiaoxin7 as
select 1 as order_id,'发货' as order_status,'2021-07-01 00:00:00' as create_time,'2021-07-02 10:00:00' as change_time
union all
select 2 as order_id,'下单' as order_status,'2021-07-01 00:00:00' as create_time,'2021-07-01 00:00:00' as change_time
union all
select 3 as order_id,'下单' as order_status,'2021-07-02 10:00:00' as create_time,'2021-07-02 10:00:00' as change_time
union all
select 4 as order_id,'收货' as order_status,'2021-07-02 00:00:00' as create_time,'2021-07-02 11:00:00' as change_time;
7.2号数仓历史拉链表更新
昨日历史拉链表 luoxiaoxin6
今日业务数据库 luoxiaoxin7
今日变化的数据:
SELECT * from tmp.luoxiaoxin7 where date(change_time)>='2021-07-02'
今日新增的数据:
SELECT * from tmp.luoxiaoxin7 where date(create_time)>='2021-07-02'
create table if not exists tmp.luoxiaoxin8 as
--更新历史拉链表定档状态
select t1.order_id
,t1.order_status
,t1.create_time
,t1.change_time
,t1.stat_date
,if(t2.change_time is null,t1.end_date,t2.change_time)
,if(t2.change_time is null,1,0) as is_validfrom tmp.luoxiaoxin6 t1
left join(select t2.* from
tmp.luoxiaoxin7 t2
where date(change_time)>='2021-07-02'
) t2
on t1.order_id=t2.order_id
union all --7.2号发生过改变的数据
select order_id
,order_status
,create_time
,change_time
,date(create_time) as stat_date
,'9999-99-99 00:00:00' as end_date
,1 as is_valid
from tmp.luoxiaoxin7
where date(change_time)>='2021-07-02'