Hive cube / rollup / grouping sets/GROUPING__ID用法

1,009 阅读2分钟

表结构如下:

CREATE EXTERNAL TABLE `tmp.shop_detail_tb`(
`id` string COMMENT '',
`name` string COMMENT '',
`type` string COMMENT '',
`amt` string COMMENT '',
`month` string COMMENT ''
)
PARTITIONED BY (
`dt` string)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\t'
NULL DEFINED AS ''
STORED AS INPUTFORMAT
'org.apache.hadoop.mapred.TextInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat';

总结:

group by a,b grouping sets((a,b),(a),(b),()) 等价于 group by a,b union all group by a union all group by b union all sum()无group by

group by a,b with cube 等价于 group by a,b union all group by a union all group by b union all sum()无group by 【cube 2个字段 ,是 union all group by 4次;cube 3个字段 ,是 union all group by 8次;指数级的】

group by a,b with rollup 等价于 group by a,b union all group by a union all sum()无group by --【无union all group by b】【rollup 2个字段 ,是 union all group by 3次;rollup 3个字段 ,是 union all group by 4次; 线性的】

cube 是 grouping sets的特例;

rollup是cube的特例【rollup只group by 第一个字段开头的字段组合】【rullup函数是cube的子集,以最左侧维度为主,按照顺序依次进行聚合.】

原表:

select * from tmp.shop_detail_tb where dt = '2021-01-30' order by id ; -- res5

1 grouping sets 用法:

select

name,

type,

sum(amt)

from tmp.shop_detail_tb

where dt = '2021-01-30' group by name,type

grouping sets((name),(type)) ; -- res3

2 GROUPING__ID 用法:

select

name,

type,

sum(amt),

GROUPING__ID

from tmp.shop_detail_tb

where dt = '2021-01-30' group by name,type

grouping sets((name),(type))

order by GROUPING__ID ; -- 两个_ ,res4

3 cube用法 第一次: NULL是总计

select

grouping__id,

name,

type,

sum(amt)

from tmp.shop_detail_tb

where dt = '2021-01-30' group by name,type WITH CUBE; -- res11

3 cube用法 第二次:

select

grouping__id as id,

COALESCE(name,'总计') as name,

COALESCE(type,'总计') as type,

sum(amt)

from tmp.shop_detail_tb

where dt = '2021-01-30' group by name,type WITH CUBE

order by id ; -- res10

4 rollup用法:

select

name,

type,

sum(amt)

from tmp.shop_detail_tb

where dt = '2021-01-30' group by name,type WITH ROLLUP; -- res9

以下用法是错误的:

select 
if(grouping(name)=1, 'ALL', name) name, -- Hive2.0 没有grouping 用法
if(grouping(type)=1, 'ALL', type) type,
      sum(amt)
from tmp.shop_detail_tb
where dt = '2021-01-30' group by name,type  
grouping sets((name),(type)) ; --  FAILED: SemanticException Line 0:-1 Invalid function 'grouping'
 
 
 
select 
      name,
      type,
      sum(amt),
      grouping_id -- 是两个_
    from tmp.shop_detail_tb
    where dt = '2021-01-30' group by name,type  
    grouping sets((name),(type)) ; -- FAILED: SemanticException [Error 10025]: Line 5:6 Expression not in GROUP BY key 'grouping_id'
 
 
 select 
      name,
      type,
      sum(amt),
      GROUPING__ID
    from tmp.shop_detail_tb
    where dt = '2021-01-30'
    grouping sets((name),(type)) 
    group by name,type   -- 先写group by ,后写grouping sets
    order by GROUPING__ID ; -- FAILED: ParseException line 8:4 missing EOF at 'grouping' near ''2021-01-30''
 
 
 
select 
    if(grouping(name)=1, 'ALL', name) name,
    if(grouping(type)=1, 'ALL', type) type,
    count(1)
    from tmp.shop_detail_tb
where dt = '2021-01-30' 
group by
    cube(name, type) --- cube无此用法, FAILED: SemanticException Line 0:-1 Invalid function 'cube'
 

Hive 2.0.0