面试:历史拉链表

455 阅读3分钟

历史拉链表用途:数据量大的表,更新频率低,完整的保存业务过程

demo1:不更新历史数据每天只新增新的数据

7.2号业务数据库:
drop table if exists tmp.luoxiaoxin2;
create table if not exists tmp.luoxiaoxin2 as
select 1 as order_id,'下单' as order_status,'2021-07-01 00:00:00' as create_time
union all
select 2 as order_id,'下单' as order_status,'2021-07-01 00:00:00' as create_time
union all
select 1 as order_id,'发货' as order_status,'2021-07-02 00:00:00' as create_time;

图片

数仓历史拉链表:drop table if exists tmp.luoxiaoxin3;
create table if not exists tmp.luoxiaoxin3 as
select order_id  
       ,order_status  
       ,create_time as stat_time  
       ,lead(create_time,1,'9999-99-99 00:00:00') over(partition by order_id order by create_time ) as end_time  
from tmp.luoxiaoxin2;

图片

7.3号业务数据库drop table if exists tmp.luoxiaoxin2;
create table if not exists tmp.luoxiaoxin2 as
select 1 as order_id,'下单' as order_status,'2021-07-01 00:00:00' as create_time
union all
select 2 as order_id,'下单' as order_status,'2021-07-01 00:00:00' as create_time
union all
select 1 as order_id,'发货' as order_status,'2021-07-02 00:00:00' as create_time
union all
select 1 as order_id,'收货' as order_status,'2021-07-03 00:00:00' as create_time
union all
select 2 as order_id,'发货' as order_status,'2021-07-03 01:00:00' as create_time
union all
select 2 as order_id,'收货' as order_status,'2021-07-03 05:00:00' as create_time
union all
select 3 as order_id,'下单' as order_status,'2021-07-03 08:00:00' as create_time
union all
select 3 as order_id,'发货' as order_status,'2021-07-03 09:00:00' as create_time
union all
select 3 as order_id,'收货' as order_status,'2021-07-03 10:00:00' as create_time;

图片

7.3号数仓历史拉链表更新
create table if not exists tmp.luoxiaoxin4 as
select order_id  
       ,order_status 
       ,stat_time  
       ,lead(stat_time,1,'9999-99-99 00:00:00') over(partition by order_id order by stat_time) as end_time
from(select order_id    
           ,order_status    
           ,stat_time  
    from tmp.luoxiaoxin3  
    union all  
    select order_id     
           ,order_status    
           ,create_time as stat_time  
from tmp.luoxiaoxin2 
where date(create_time)='2021-07-03'
) tmp

图片

demo2:既新增历史新数据也更新历史数据

7.1号业务数据库:drop table if exists tmp.luoxiaoxin5;
create table if not exists tmp.luoxiaoxin5 as
select 1 as order_id,'下单' as order_status
       ,'2021-07-01 00:00:00' as create_time
       ,'2021-07-01 00:00:00' as change_time
union all
select 2 as order_id,
       '下单' as order_status
       ,'2021-07-01 00:00:00' as create_time
       ,'2021-07-01 00:00:00' as change_time;

图片

数仓历史拉链表:drop table if exists tmp.luoxiaoxin6;
create table if not exists tmp.luoxiaoxin6 asselect order_id  ,order_status  ,create_time  ,change_time  ,date(create_time) as stat_date  ,'9999-99-99 00:00:00' as end_date  ,1 as is_validfrom tmp.luoxiaoxin5;

图片

7.2号业务数据库drop table if exists tmp.luoxiaoxin7;
create table if not exists tmp.luoxiaoxin7 as
select 1 as order_id,'发货' as order_status,'2021-07-01 00:00:00' as create_time,'2021-07-02 10:00:00' as change_time
union all
select 2 as order_id,'下单' as order_status,'2021-07-01 00:00:00' as create_time,'2021-07-01 00:00:00' as change_time
union all
select 3 as order_id,'下单' as order_status,'2021-07-02 10:00:00' as create_time,'2021-07-02 10:00:00' as change_time
union all
select 4 as order_id,'收货' as order_status,'2021-07-02 00:00:00' as create_time,'2021-07-02 11:00:00' as change_time;

图片

7.2号数仓历史拉链表更新
昨日历史拉链表 luoxiaoxin6
今日业务数据库 luoxiaoxin7
今日变化的数据:
SELECT * from tmp.luoxiaoxin7 where date(change_time)>='2021-07-02'
今日新增的数据:
SELECT * from tmp.luoxiaoxin7 where date(create_time)>='2021-07-02'
create table if not exists tmp.luoxiaoxin8 as
--更新历史拉链表定档状态
select t1.order_id  
       ,t1.order_status  
       ,t1.create_time  
       ,t1.change_time  
       ,t1.stat_date  
       ,if(t2.change_time is null,t1.end_date,t2.change_time)  
       ,if(t2.change_time is null,1,0) as is_validfrom tmp.luoxiaoxin6 t1
left join(select t2.* from 
          tmp.luoxiaoxin7 t2 
          where date(change_time)>='2021-07-02'
          ) t2
on t1.order_id=t2.order_id
union all --7.2号发生过改变的数据
select order_id  
       ,order_status  
       ,create_time  
       ,change_time  
       ,date(create_time) as stat_date  
       ,'9999-99-99 00:00:00' as end_date  
       ,1 as is_valid 
from tmp.luoxiaoxin7 
where date(change_time)>='2021-07-02'

图片