1.groupBitmap
用于存储UInt64数值,且自动去重,这对于我们存储uid非常的适合。
2.使用
通过配置宽表,我们可以得出一些基本的画像规则,用来制定一些常用的标签属性,如性别,地理位置,星座等。
--字符型标签
select uid,
case
when birthday in ('1970-01-01', '1990-03-21', '2000-01-01') then '未知'
when birthday >= toDate(concat(toString(toYear(birthday)) as yy, '-01-20')) and
birthday <= toDate(concat(yy, '-02-18')) then '水瓶座'
when birthday >= toDate(concat(yy, '-02-19')) and birthday <= toDate(concat(yy, '-03-20')) then '双鱼座'
when birthday >= toDate(concat(yy, '-03-21')) and birthday <= toDate(concat(yy, '-04-19')) then '白羊座'
when birthday >= toDate(concat(yy, '-04-20')) and birthday <= toDate(concat(yy, '-05-20')) then '金牛座'
when birthday >= toDate(concat(yy, '-05-21')) and birthday <= toDate(concat(yy, '-06-21')) then '双子座'
when birthday >= toDate(concat(yy, '-06-22')) and birthday <= toDate(concat(yy, '-07-22')) then '巨蟹座'
when birthday >= toDate(concat(yy, '-07-23')) and birthday <= toDate(concat(yy, '-08-22')) then '狮子座'
when birthday >= toDate(concat(yy, '-08-23')) and birthday <= toDate(concat(yy, '-09-22')) then '处女座'
when birthday >= toDate(concat(yy, '-09-23')) and birthday <= toDate(concat(yy, '-10-23')) then '天秤座'
when birthday >= toDate(concat(yy, '-10-24')) and birthday <= toDate(concat(yy, '-11-22')) then '天蝎座'
when birthday >= toDate(concat(yy, '-11-23')) and birthday <= toDate(concat(yy, '-12-21')) then '射手座'
when (birthday >= toDate(concat(yy, '-12-22')) and birthday <= toDate(concat(yy, '-12-31'))) or
(birthday >= toDate(concat(yy, '-01-01')) and birthday <= toDate(concat(yy, '-01-19'))) then '摩羯座'
else '未知' end as constellation
from default.user_wide_all;
新建画像的存储表
create table user_label_string on_cluster ck_cluster
(
label_name String,
label_value String,
bitmap_uid AggregateFunction(groupBitmap, UInt64)
)
engine = ReplicatedAggregatingMergeTree('/clickhouse/dsp_iop/tables/{shard}/user_label_string', '{replica}')
PARTITION BY label_name
ORDER BY (label_name, label_value)
SETTINGS index_granularity = 128;
create table user_label_string_all
(
label_name String,
label_value String,
bitmap_uid AggregateFunction(groupBitmap, UInt64)
)
engine = Distributed('ck_cluster', 'dsp_iop', 'user_label_string', rand());
写入数据:
insert into user_label_string(bitmap_uid, label_name ,label_value)
SELECT groupBitmapState(toUInt64(uid)) as bm,
'constellation',
constellation
from (
select uid,
case
when birthday in ('1970-01-01', '1990-03-21', '2000-01-01') then '未知'
when birthday >= toDate(concat(toString(toYear(birthday)) as yy, '-01-20')) and
birthday <= toDate(concat(yy, '-02-18')) then '水瓶座'
when birthday >= toDate(concat(yy, '-02-19')) and birthday <= toDate(concat(yy, '-03-20')) then '双鱼座'
when birthday >= toDate(concat(yy, '-03-21')) and birthday <= toDate(concat(yy, '-04-19')) then '白羊座'
when birthday >= toDate(concat(yy, '-04-20')) and birthday <= toDate(concat(yy, '-05-20')) then '金牛座'
when birthday >= toDate(concat(yy, '-05-21')) and birthday <= toDate(concat(yy, '-06-21')) then '双子座'
when birthday >= toDate(concat(yy, '-06-22')) and birthday <= toDate(concat(yy, '-07-22')) then '巨蟹座'
when birthday >= toDate(concat(yy, '-07-23')) and birthday <= toDate(concat(yy, '-08-22')) then '狮子座'
when birthday >= toDate(concat(yy, '-08-23')) and birthday <= toDate(concat(yy, '-09-22')) then '处女座'
when birthday >= toDate(concat(yy, '-09-23')) and birthday <= toDate(concat(yy, '-10-23')) then '天秤座'
when birthday >= toDate(concat(yy, '-10-24')) and birthday <= toDate(concat(yy, '-11-22')) then '天蝎座'
when birthday >= toDate(concat(yy, '-11-23')) and birthday <= toDate(concat(yy, '-12-21')) then '射手座'
when (birthday >= toDate(concat(yy, '-12-22')) and birthday <= toDate(concat(yy, '-12-31'))) or
(birthday >= toDate(concat(yy, '-01-01')) and birthday <= toDate(concat(yy, '-01-19'))) then '摩羯座'
else '未知' end as constellation
from default.dwd_user_wide_all
) group by constellation;
3.查询
SELECT
label_name,
label_value,
bitmapCardinality(bitmap_uid) AS bm
FROM dsp_iop.user_label_string_all
WHERE label_name = 'constellation'
Query id: dfc31066-c7dd-419a-ba5d-110339c2785d
┌─label_name────┬─label_value─┬───────bm─┐
│ constellation │ 双子座 │ 796268 │
│ constellation │ 双鱼座 │ 725469 │
│ constellation │ 处女座 │ 877766 │
│ constellation │ 天秤座 │ 828114 │
│ constellation │ 天蝎座 │ 931031 │
│ constellation │ 射手座 │ 724913 │
│ constellation │ 巨蟹座 │ 831805 │
│ constellation │ 摩羯座 │ 1464983 │
│ constellation │ 未知 │ 3401269 │
│ constellation │ 水瓶座 │ 760884 │
│ constellation │ 狮子座 │ 733547 │
│ constellation │ 白羊座 │ 628192 │
│ constellation │ 金牛座 │ 690814 │
└───────────────┴─────────────┴──────────┘