1.概述
使用clickhouse的聚合函数,可以实现数据的有限更新,从而减少使用optimize table带来的性能开销。
2.测试数据表
CREATE TABLE temp.iop_user_profile_list on cluster ck_cluster
(
`uid` Int64 COMMENT '用户唯一标识',
`reg_time` DateTime COMMENT '注册时间',
`nickname` String COMMENT '昵称',
`phone` String COMMENT '手机号码',
`part` Int64
)
ENGINE = ReplicatedAggregatingMergeTree('/clickhouse/temp/tables/{shard}/iop_user_profile_list', '{replica}')
partition by part
order by uid
SETTINGS index_granularity = 8192;
CREATE TABLE temp.iop_user_profile_list_all on cluster ck_cluster
as temp.iop_user_profile_list
ENGINE = Distributed('ck_cluster', 'temp', 'iop_user_profile_list', intHash64(uid));
insert into temp.iop_user_profile_list_all(uid, reg_time, nickname, phone, part)
select 123, now(), '小明', '123', 1;
insert into temp.iop_user_profile_list_all(uid, reg_time, nickname, phone, part)
select 123, now(), '小明', '1234', 2;
insert into temp.iop_user_profile_list_all(uid, reg_time, nickname, phone, part)
select 124, now(), '小明', '1234', 2;
3.使用聚合函数
CREATE TABLE temp.iop_user_profile_test on cluster ck_cluster
(
`uid` Int64 COMMENT '用户唯一标识',
`reg_time` AggregateFunction(argMaxIf, DateTime, DateTime, UInt8) COMMENT '注册时间',
`nickname` AggregateFunction(argMaxIf, String, DateTime, UInt8) COMMENT '昵称',
`phone` AggregateFunction(argMaxIf, String, DateTime, UInt8) COMMENT '手机号码'
)
ENGINE = ReplicatedAggregatingMergeTree('/clickhouse/temp/tables/{shard}/iop_user_profile_test', '{replica}')
ORDER BY uid
SETTINGS index_granularity = 8192;
CREATE TABLE temp.iop_user_profile_test_all on cluster ck_cluster
as temp.iop_user_profile_test
ENGINE = Distributed('ck_cluster', 'temp', 'iop_user_profile_test', intHash64(uid));
从测试表写入数据:
写入也要用聚合函数来实现,选取最大版本保留,数据写入和顺序没有关系,If函数可以用来判断空值然后不写入,写入即可查询,不用optimize table操作。
insert into temp.iop_user_profile_test_all(uid, nickname, reg_time, phone)
SELECT
uid,
argMaxIfState(nickname, reg_time, nickname!='') nickname,
argMaxIfState(reg_time, reg_time, 1=1) reg,
argMaxIfState(phone,reg_time,phone!='') phone
FROM temp.iop_user_profile_list_all
group by uid;
4.查询结果
select uid,argMaxIfMerge(nickname),
argMaxIfMerge(reg_time),
argMaxIfMerge(phone)
from temp.iop_user_profile_test_all
group by uid;
uid=123,数据已经更新为最新版本的了