表结构如下:
CREATE EXTERNAL TABLE `tmp.shop_detail_tb`(
`id` string COMMENT '',
`name` string COMMENT '',
`type` string COMMENT '',
`amt` string COMMENT '',
`month` string COMMENT ''
)
PARTITIONED BY (
`dt` string)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\t'
NULL DEFINED AS ''
STORED AS INPUTFORMAT
'org.apache.hadoop.mapred.TextInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat';
总结:
group by a,b grouping sets((a,b),(a),(b),()) 等价于 group by a,b union all group by a union all group by b union all sum()无group by
group by a,b with cube 等价于 group by a,b union all group by a union all group by b union all sum()无group by 【cube 2个字段 ,是 union all group by 4次;cube 3个字段 ,是 union all group by 8次;指数级的】
group by a,b with rollup 等价于 group by a,b union all group by a union all sum()无group by --【无union all group by b】【rollup 2个字段 ,是 union all group by 3次;rollup 3个字段 ,是 union all group by 4次; 线性的】
cube 是 grouping sets的特例;
rollup是cube的特例【rollup只group by 第一个字段开头的字段组合】【rullup函数是cube的子集,以最左侧维度为主,按照顺序依次进行聚合.】
原表:
select * from tmp.shop_detail_tb where dt = '2021-01-30' order by id ; -- res5
1 grouping sets 用法:
select
name,
type,
sum(amt)
from tmp.shop_detail_tb
where dt = '2021-01-30' group by name,type
grouping sets((name),(type)) ; -- res3
2 GROUPING__ID 用法:
select
name,
type,
sum(amt),
GROUPING__ID
from tmp.shop_detail_tb
where dt = '2021-01-30' group by name,type
grouping sets((name),(type))
order by GROUPING__ID ; -- 两个_ ,res4
3 cube用法 第一次: NULL是总计
select
grouping__id,
name,
type,
sum(amt)
from tmp.shop_detail_tb
where dt = '2021-01-30' group by name,type WITH CUBE; -- res11
3 cube用法 第二次:
select
grouping__id as id,
COALESCE(name,'总计') as name,
COALESCE(type,'总计') as type,
sum(amt)
from tmp.shop_detail_tb
where dt = '2021-01-30' group by name,type WITH CUBE
order by id ; -- res10
4 rollup用法:
select
name,
type,
sum(amt)
from tmp.shop_detail_tb
where dt = '2021-01-30' group by name,type WITH ROLLUP; -- res9
以下用法是错误的:
select
if(grouping(name)=1, 'ALL', name) name, -- Hive2.0 没有grouping 用法
if(grouping(type)=1, 'ALL', type) type,
sum(amt)
from tmp.shop_detail_tb
where dt = '2021-01-30' group by name,type
grouping sets((name),(type)) ; -- FAILED: SemanticException Line 0:-1 Invalid function 'grouping'
select
name,
type,
sum(amt),
grouping_id -- 是两个_
from tmp.shop_detail_tb
where dt = '2021-01-30' group by name,type
grouping sets((name),(type)) ; -- FAILED: SemanticException [Error 10025]: Line 5:6 Expression not in GROUP BY key 'grouping_id'
select
name,
type,
sum(amt),
GROUPING__ID
from tmp.shop_detail_tb
where dt = '2021-01-30'
grouping sets((name),(type))
group by name,type -- 先写group by ,后写grouping sets
order by GROUPING__ID ; -- FAILED: ParseException line 8:4 missing EOF at 'grouping' near ''2021-01-30''
select
if(grouping(name)=1, 'ALL', name) name,
if(grouping(type)=1, 'ALL', type) type,
count(1)
from tmp.shop_detail_tb
where dt = '2021-01-30'
group by
cube(name, type) --- cube无此用法, FAILED: SemanticException Line 0:-1 Invalid function 'cube'
Hive 2.0.0