本文已参与「开源摘星计划」,欢迎正在阅读的你加入。活动链接:github.com/weopenproje…
kylin维度表(lookup table)更新后,是否需要重新构建所有历史cube?
前提:维度表字段为 Derived 类型
猜想
不需要。
kylin在创建模型时,可以指定事实表(fact table)以及维度表(lookup table),当维度表数据有更新时,最新日期的cube更新之后,维度表数据理论上也会更新为最新的,那么,历史数据是否需要重新构建是一个值得思考的问题。理论上,维度表在更新一次之后,不需要重新构建所有的历史数据。
以下下为验证步骤。
参与测试验证kylin版本
2.6.5
测试步骤
1.创建事实表和维度表
--创建测试事实表
create table if not exists tmp.fact_test
(
tel string comment '电话号码' ,
metric int comment '度量值-无实际意义'
) comment '测试kylin维度更新-事实表'
partitioned by (dt string comment '日期yyyyMMdd')
row format delimited fields terminated by '\t'
stored as orc tblproperties('orc.compress'='SNAPPY');
--创建测试维度表
create table if not exists tmp.dim_test
(
tel string comment '电话号码' ,
id_card string comment '身份证号码' ,
user_name string comment '姓名'
) comment '测试kylin维度更新-维度表'
row format delimited fields terminated by '\t'
stored as orc tblproperties('orc.compress'='SNAPPY');
2.先往事实表插入20200101的测试数据,维度表插入测试维度数据
--添加测试事实表数据 20220101
insert overwrite table tmp.fact_test partition (dt = '20220101')
select * from
(select '13662052023',cast(ceiling(rand() * 100) as int) union all
select '15297767438',cast(ceiling(rand() * 100) as int) union all
select '13448166906',cast(ceiling(rand() * 100) as int) union all
select '18761301136',cast(ceiling(rand() * 100) as int) union all
select '15002957977',cast(ceiling(rand() * 100) as int) union all
select '13479003370',cast(ceiling(rand() * 100) as int) union all
select '15290025770',cast(ceiling(rand() * 100) as int) union all
select '15893995861',cast(ceiling(rand() * 100) as int) union all
select '13779034035',cast(ceiling(rand() * 100) as int) union all
select '18499348035',cast(ceiling(rand() * 100) as int) union all
select '13517368998',cast(ceiling(rand() * 100) as int) union all
select '15779531492',cast(ceiling(rand() * 100) as int) union all
select '18803558821',cast(ceiling(rand() * 100) as int) union all
select '15958352586',cast(ceiling(rand() * 100) as int) union all
select '15882414878',cast(ceiling(rand() * 100) as int) union all
select '18341489428',cast(ceiling(rand() * 100) as int) union all
select '18394366640',cast(ceiling(rand() * 100) as int) union all
select '15232521687',cast(ceiling(rand() * 100) as int) union all
select '13824261587',cast(ceiling(rand() * 100) as int) union all
select '15834685512',cast(ceiling(rand() * 100) as int) union all
select '18332271773',cast(ceiling(rand() * 100) as int) union all
select '18877691441',cast(ceiling(rand() * 100) as int) union all
select '18208413183',cast(ceiling(rand() * 100) as int) union all
select '13956254975',cast(ceiling(rand() * 100) as int) union all
select '14743548078',cast(ceiling(rand() * 100) as int) union all
select '13857838595',cast(ceiling(rand() * 100) as int) union all
select '15917583611',cast(ceiling(rand() * 100) as int) union all
select '18456354103',cast(ceiling(rand() * 100) as int) union all
select '17891947651',cast(ceiling(rand() * 100) as int) union all
select '18422036142',cast(ceiling(rand() * 100) as int) union all
select '18760137082',cast(ceiling(rand() * 100) as int) union all
select '13956546444',cast(ceiling(rand() * 100) as int) union all
select '16507557714',cast(ceiling(rand() * 100) as int) union all
select '16571288424',cast(ceiling(rand() * 100) as int) union all
select '15754232708',cast(ceiling(rand() * 100) as int) union all
select '18418334823',cast(ceiling(rand() * 100) as int) union all
select '18852948674',cast(ceiling(rand() * 100) as int) union all
select '13773697394',cast(ceiling(rand() * 100) as int) union all
select '17866756967',cast(ceiling(rand() * 100) as int) union all
select '16559463240',cast(ceiling(rand() * 100) as int) union all
select '13978035899',cast(ceiling(rand() * 100) as int) union all
select '18889712632',cast(ceiling(rand() * 100) as int) union all
select '18291029052',cast(ceiling(rand() * 100) as int) union all
select '13402816661',cast(ceiling(rand() * 100) as int) union all
select '18261114028',cast(ceiling(rand() * 100) as int) union all
select '13971785792',cast(ceiling(rand() * 100) as int) union all
select '15272922457',cast(ceiling(rand() * 100) as int) union all
select '19893587120',cast(ceiling(rand() * 100) as int) union all
select '13886432258',cast(ceiling(rand() * 100) as int) union all
select '19879865504',cast(ceiling(rand() * 100) as int) union all
select '15006487377',cast(ceiling(rand() * 100) as int) union all
select '18438674581',cast(ceiling(rand() * 100) as int) union all
select '15049728065',cast(ceiling(rand() * 100) as int) union all
select '16561423528',cast(ceiling(rand() * 100) as int) union all
select '17241666629',cast(ceiling(rand() * 100) as int) union all
select '15840954741',cast(ceiling(rand() * 100) as int) union all
select '13405779964',cast(ceiling(rand() * 100) as int) union all
select '16509557151',cast(ceiling(rand() * 100) as int) union all
select '18496471497',cast(ceiling(rand() * 100) as int) union all
select '13470786775',cast(ceiling(rand() * 100) as int) union all
select '15091609760',cast(ceiling(rand() * 100) as int) union all
select '18409525671',cast(ceiling(rand() * 100) as int) union all
select '13998936553',cast(ceiling(rand() * 100) as int) union all
select '15212871052',cast(ceiling(rand() * 100) as int) union all
select '15294864565',cast(ceiling(rand() * 100) as int) union all
select '16532664287',cast(ceiling(rand() * 100) as int) union all
select '13530919083',cast(ceiling(rand() * 100) as int) union all
select '18234556314',cast(ceiling(rand() * 100) as int) union all
select '13474733470',cast(ceiling(rand() * 100) as int) union all
select '18296458453',cast(ceiling(rand() * 100) as int) union all
select '18873817751',cast(ceiling(rand() * 100) as int) union all
select '18818192628',cast(ceiling(rand() * 100) as int) union all
select '14729982237',cast(ceiling(rand() * 100) as int) union all
select '15149008859',cast(ceiling(rand() * 100) as int) union all
select '15052185012',cast(ceiling(rand() * 100) as int) union all
select '13734432749',cast(ceiling(rand() * 100) as int) union all
select '15288811679',cast(ceiling(rand() * 100) as int) union all
select '18856687180',cast(ceiling(rand() * 100) as int) union all
select '13614296678',cast(ceiling(rand() * 100) as int) union all
select '13488977885',cast(ceiling(rand() * 100) as int) union all
select '13974628089',cast(ceiling(rand() * 100) as int) union all
select '14730949474',cast(ceiling(rand() * 100) as int) union all
select '18832303823',cast(ceiling(rand() * 100) as int) union all
select '16564065353',cast(ceiling(rand() * 100) as int) union all
select '15862051776',cast(ceiling(rand() * 100) as int) union all
select '18335101787',cast(ceiling(rand() * 100) as int) union all
select '13814847515',cast(ceiling(rand() * 100) as int) union all
select '13534089467',cast(ceiling(rand() * 100) as int) union all
select '13400136794',cast(ceiling(rand() * 100) as int) union all
select '18313175544',cast(ceiling(rand() * 100) as int) union all
select '19810499467',cast(ceiling(rand() * 100) as int) union all
select '13667354115',cast(ceiling(rand() * 100) as int) union all
select '19801403242',cast(ceiling(rand() * 100) as int) union all
select '15298676065',cast(ceiling(rand() * 100) as int) union all
select '13408291585',cast(ceiling(rand() * 100) as int) union all
select '15163545011',cast(ceiling(rand() * 100) as int) union all
select '13618944074',cast(ceiling(rand() * 100) as int) union all
select '14727919796',cast(ceiling(rand() * 100) as int) union all
select '13886277789',cast(ceiling(rand() * 100) as int) union all
select '16502037132',cast(ceiling(rand() * 100) as int))t ;
--添加测试维表数据
insert overwrite table tmp.dim_test
select * from(
select '13662052023', '510000197308182618' , '王五' union all
select '15297767438', '510000197308182618' , '王五' union all
select '13448166906', '510000197308182618' , '王五' union all
select '18761301136', '510000197308182618' , '王五' union all
select '15002957977', '510000197308182618' , '王五' union all
select '13479003370', '510000197308182618' , '王五' union all
select '15290025770', '510000197308182618' , '王五' union all
select '15893995861', '510000197308182618' , '王五' union all
select '13779034035', '510000197308182618' , '王五' union all
select '18499348035', '510000197308182618' , '王五' union all
select '13517368998', '510000197308182618' , '王五' union all
select '15779531492', '510000197308182618' , '王五' union all
select '18803558821', '510000197308182618' , '王五' union all
select '15958352586', '510000197308182618' , '王五' union all
select '15882414878', '510000197308182618' , '王五' union all
select '18341489428', '510000197308182618' , '王五' union all
select '18394366640', '510000197308182618' , '王五' union all
select '15232521687', '510000197308182618' , '王五' union all
select '13824261587', '510000197308182618' , '王五' union all
select '15834685512', '510000197308182618' , '王五' union all
select '18332271773', '510000197308182618' , '王五' union all
select '18877691441', '510000197308182618' , '王五' union all
select '18208413183', '510000197308182618' , '王五' union all
select '13956254975', '510000197308182618' , '王五' union all
select '14743548078', '510000197308182618' , '王五' union all
select '13857838595', '510000197308182618' , '王五' union all
select '15917583611', '510000197308182618' , '王五' union all
select '18456354103', '510000197308182618' , '王五' union all
select '17891947651', '510000197308182618' , '王五' union all
select '18422036142', '510000197308182618' , '王五' union all
select '18760137082', '510000197308182618' , '王五' union all
select '13956546444', '510000197308182618' , '王五' union all
select '16507557714', '510000197308182618' , '王五' union all
select '16571288424', '510000197308182617' , '李四' union all
select '15754232708', '510000197308182617' , '李四' union all
select '18418334823', '510000197308182617' , '李四' union all
select '18852948674', '510000197308182617' , '李四' union all
select '13773697394', '510000197308182617' , '李四' union all
select '17866756967', '510000197308182617' , '李四' union all
select '16559463240', '510000197308182617' , '李四' union all
select '13978035899', '510000197308182617' , '李四' union all
select '18889712632', '510000197308182617' , '李四' union all
select '18291029052', '510000197308182617' , '李四' union all
select '13402816661', '510000197308182617' , '李四' union all
select '18261114028', '510000197308182617' , '李四' union all
select '13971785792', '510000197308182617' , '李四' union all
select '15272922457', '510000197308182617' , '李四' union all
select '19893587120', '510000197308182617' , '李四' union all
select '13886432258', '510000197308182617' , '李四' union all
select '19879865504', '510000197308182617' , '李四' union all
select '15006487377', '510000197308182617' , '李四' union all
select '18438674581', '510000197308182617' , '李四' union all
select '15049728065', '510000197308182617' , '李四' union all
select '16561423528', '510000197308182617' , '李四' union all
select '17241666629', '510000197308182617' , '李四' union all
select '15840954741', '510000197308182617' , '李四' union all
select '13405779964', '510000197308182617' , '李四' union all
select '16509557151', '510000197308182617' , '李四' union all
select '18496471497', '510000197308182617' , '李四' union all
select '13470786775', '510000197308182617' , '李四' union all
select '15091609760', '510000197308182617' , '李四' union all
select '18409525671', '510000197308182617' , '李四' union all
select '13998936553', '510000197308182617' , '李四' union all
select '15212871052', '510000197308182617' , '李四' union all
select '15294864565', '510000197308182617' , '李四' union all
select '16532664287', '510000197308182617' , '李四' union all
select '13530919083', '510000197308182617' , '李四' union all
select '18234556314', '510000197308182617' , '李四' union all
select '13474733470', '510000197308182617' , '李四' union all
select '18296458453', '510000197308182617' , '李四' union all
select '18873817751', '510000197308182617' , '李四' union all
select '18818192628', '510000197308182617' , '李四' union all
select '14729982237', '510000197308182616' , '张三' union all
select '15149008859', '510000197308182616' , '张三' union all
select '15052185012', '510000197308182616' , '张三' union all
select '13734432749', '510000197308182616' , '张三' union all
select '15288811679', '510000197308182616' , '张三' union all
select '18856687180', '510000197308182616' , '张三' union all
select '13614296678', '510000197308182616' , '张三' union all
select '13488977885', '510000197308182616' , '张三' union all
select '13974628089', '510000197308182616' , '张三' union all
select '14730949474', '510000197308182616' , '张三' union all
select '18832303823', '510000197308182616' , '张三' union all
select '16564065353', '510000197308182616' , '张三' union all
select '15862051776', '510000197308182616' , '张三' union all
select '18335101787', '510000197308182616' , '张三' union all
select '13814847515', '510000197308182616' , '张三' union all
select '13534089467', '510000197308182616' , '张三' union all
select '13400136794', '510000197308182616' , '张三' union all
select '18313175544', '510000197308182616' , '张三' union all
select '19810499467', '510000197308182616' , '张三' union all
select '13667354115', '510000197308182616' , '张三' union all
select '19801403242', '510000197308182616' , '张三' union all
select '15298676065', '510000197308182616' , '张三' union all
select '13408291585', '510000197308182616' , '张三' union all
select '15163545011', '510000197308182616' , '张三' union all
select '13618944074', '510000197308182616' , '张三' union all
select '14727919796', '510000197308182616' , '张三' union all
select '13886277789', '510000197308182616' , '张三' union all
select '16502037132', '510000197308182616' , '张三' )t;
3.kylin设计模型、CUBE
略
4.构建20220101的CUBE
略
5.查询20220101的结果
--查询结果
select b.id_card,b.user_name ,sum(a.metric) as metric
from tmp.fact_test a
left join tmp.dim_test b
on a.tel = b.tel
where a.dt='20220101'
group by b.id_card,b.user_name
hive查询结果
kylin查询结果
4.事实表插入20220102的测试数据,修改测试维度数据
(张三三没变,李四四有一个变成了王五五,王五五有几个变成了王六六,2020102的数据都对应给了无名名)
--添加测试事实表数据 20200102
insert overwrite table tmp.fact_test partition (dt = '20220102')
select * from(
select '13662052000',cast(ceiling(rand() * 100) as int) union all
select '15297767400',cast(ceiling(rand() * 100) as int) union all
select '13448166900',cast(ceiling(rand() * 100) as int) union all
select '18761301100',cast(ceiling(rand() * 100) as int) union all
select '15002957900',cast(ceiling(rand() * 100) as int) union all
select '13479003300',cast(ceiling(rand() * 100) as int) union all
select '15290025700',cast(ceiling(rand() * 100) as int) union all
select '15893995800',cast(ceiling(rand() * 100) as int))t;
--更新维表 张三三没变,李四四有一个变成了王五五,王五五有几个变成了王六六,2022102的数据都对应给了无名名
insert overwrite table tmp.dim_test
select * from(
select '13662052023', '520000197308182618' , '王五五' union all
select '15297767438', '520000197308182618' , '王五五' union all
select '13448166906', '520000197308182618' , '王五五' union all
select '18761301136', '520000197308182618' , '王五五' union all
select '15002957977', '520000197308182618' , '王五五' union all
select '13479003370', '520000197308182618' , '王五五' union all
select '15290025770', '520000197308182618' , '王五五' union all
select '15893995861', '520000197308182618' , '王五五' union all
select '13779034035', '520000197308182618' , '王五五' union all
select '18499348035', '520000197308182618' , '王五五' union all
select '13517368998', '520000197308182618' , '王五五' union all
select '15779531492', '520000197308182618' , '王五五' union all
select '18803558821', '520000197308182618' , '王五五' union all
select '15958352586', '520000197308182618' , '王五五' union all
select '15882414878', '520000197308182618' , '王五五' union all
select '18341489428', '520000197308182618' , '王五五' union all
select '18394366640', '520000197308182618' , '王五五' union all
select '15232521687', '520000197308182618' , '王五五' union all
select '13824261587', '520000197308182618' , '王五五' union all
select '15834685512', '520000197308182618' , '王五五' union all
select '18332271773', '520000197308182618' , '王五五' union all
select '18877691441', '520000197308182618' , '王五五' union all
select '18208413183', '520000197308182618' , '王五五' union all
select '13956254975', '520000197308182618' , '王五五' union all
select '14743548078', '520000197308182618' , '王五五' union all
select '13857838595', '520000197308182618' , '王五五' union all
select '15917583611', '520000197308182618' , '王五五' union all
select '18456354103', '520000197308182618' , '王五五' union all
select '17891947651', '520000197308182618' , '王五五' union all
select '18422036142', '520000197308182619' , '王六六' union all
select '18760137082', '520000197308182619' , '王六六' union all
select '13956546444', '520000197308182619' , '王六六' union all
select '16507557714', '520000197308182619' , '王六六' union all
select '16571288424', '520000197308182618' , '王五五' union all
select '15754232708', '520000197308182617' , '李四四' union all
select '18418334823', '520000197308182617' , '李四四' union all
select '18852948674', '520000197308182617' , '李四四' union all
select '13773697394', '520000197308182617' , '李四四' union all
select '17866756967', '520000197308182617' , '李四四' union all
select '16559463240', '520000197308182617' , '李四四' union all
select '13978035899', '520000197308182617' , '李四四' union all
select '18889712632', '520000197308182617' , '李四四' union all
select '18291029052', '520000197308182617' , '李四四' union all
select '13402816661', '520000197308182617' , '李四四' union all
select '18261114028', '520000197308182617' , '李四四' union all
select '13971785792', '520000197308182617' , '李四四' union all
select '15272922457', '520000197308182617' , '李四四' union all
select '19893587120', '520000197308182617' , '李四四' union all
select '13886432258', '520000197308182617' , '李四四' union all
select '19879865504', '520000197308182617' , '李四四' union all
select '15006487377', '520000197308182617' , '李四四' union all
select '18438674581', '520000197308182617' , '李四四' union all
select '15049728065', '520000197308182617' , '李四四' union all
select '16561423528', '520000197308182617' , '李四四' union all
select '17241666629', '520000197308182617' , '李四四' union all
select '15840954741', '520000197308182617' , '李四四' union all
select '13405779964', '520000197308182617' , '李四四' union all
select '16509557151', '520000197308182617' , '李四四' union all
select '18496471497', '520000197308182617' , '李四四' union all
select '13470786775', '520000197308182617' , '李四四' union all
select '15091609760', '520000197308182617' , '李四四' union all
select '18409525671', '520000197308182617' , '李四四' union all
select '13998936553', '520000197308182617' , '李四四' union all
select '15212871052', '520000197308182617' , '李四四' union all
select '15294864565', '520000197308182617' , '李四四' union all
select '16532664287', '520000197308182617' , '李四四' union all
select '13530919083', '520000197308182617' , '李四四' union all
select '18234556314', '520000197308182617' , '李四四' union all
select '13474733470', '520000197308182617' , '李四四' union all
select '18296458453', '520000197308182617' , '李四四' union all
select '18873817751', '520000197308182617' , '李四四' union all
select '18818192628', '520000197308182617' , '李四四' union all
select '14729982237', '520000197308182616' , '张三三' union all
select '15149008859', '520000197308182616' , '张三三' union all
select '15052185012', '520000197308182616' , '张三三' union all
select '13734432749', '520000197308182616' , '张三三' union all
select '15288811679', '520000197308182616' , '张三三' union all
select '18856687180', '520000197308182616' , '张三三' union all
select '13614296678', '520000197308182616' , '张三三' union all
select '13488977885', '520000197308182616' , '张三三' union all
select '13974628089', '520000197308182616' , '张三三' union all
select '14730949474', '520000197308182616' , '张三三' union all
select '18832303823', '520000197308182616' , '张三三' union all
select '16564065353', '520000197308182616' , '张三三' union all
select '15862051776', '520000197308182616' , '张三三' union all
select '18335101787', '520000197308182616' , '张三三' union all
select '13814847515', '520000197308182616' , '张三三' union all
select '13534089467', '520000197308182616' , '张三三' union all
select '13400136794', '520000197308182616' , '张三三' union all
select '18313175544', '520000197308182616' , '张三三' union all
select '19810499467', '520000197308182616' , '张三三' union all
select '13667354115', '520000197308182616' , '张三三' union all
select '19801403242', '520000197308182616' , '张三三' union all
select '15298676065', '520000197308182616' , '张三三' union all
select '13408291585', '520000197308182616' , '张三三' union all
select '15163545011', '520000197308182616' , '张三三' union all
select '13618944074', '520000197308182616' , '张三三' union all
select '14727919796', '520000197308182616' , '张三三' union all
select '13886277789', '520000197308182616' , '张三三' union all
select '16502037132', '520000197308182616' , '张三三' union all
select '13662052000', '610000197308182616' , '无名名' union all
select '15297767400', '610000197308182616' , '无名名' union all
select '13448166900', '610000197308182616' , '无名名' union all
select '18761301100', '610000197308182616' , '无名名' union all
select '15002957900', '610000197308182616' , '无名名' union all
select '13479003300', '610000197308182616' , '无名名' union all
select '15290025700', '610000197308182616' , '无名名' union all
select '15893995800', '610000197308182616' , '无名名')t;
7.构建20220102的CUBE
略
8.查询20220101的结果,并与第5步的结果进行比较
select b.id_card,b.user_name ,sum(a.metric) as metric,count(1) as ct
from tmp.fact_test a
left join tmp.dim_test b
on a.tel = b.tel
where a.dt='20220101'
group by b.id_card,b.user_name
hive查询结果
kylin查询结果
查询结果中的身份证号码,姓名,指标结果与第5步比较都发生了变化。而且身份证号码和姓名都来自于新的维表。
结论
维表变化之后,历史数据无需全部重新构建(只要有一个日期分区使用了新的维表构建即可),查询结果会以新的维表进行计算。