编写一个HiveSQL实现查找所有至少连续三次出现的数字的三种方法

49 阅读2分钟

编写一个SQL实现查找所有至少连续三次出现的数字的三种方法

题干
已知t1.txt存储了以下数据
20230801,1
20230802,1
20230803,1
20230804,2
20230805,1
20230806,2
20230807,2
20230808,3
20230809,3
20230810,3
20230811,3
20230812,4
需求
编写一个SQL实现查找所有至少连续三次出现的数字
number
1
3
答案
-- 建表
create table if not exists t1(
    id string,
    number int
)
    row format delimited
    fields terminated by ",";
    
-- 加载数据
load data inpath "/input/d1.txt" into table t1;

-- 查看数据
select * from t1;

-- 需求:编写一个SQL实现查找所有至少连续三次出现的数字
-- 思路1: 使用(max、min)窗口函数,查找窗口内的最大值和最小值,如果最大值等于最小值,所以窗口内的数相等
with tmp1 as (
    select id,number,
    max(number) over(partition by null order by id rows between 1 preceding and 1 following) as max_num,
    min(number) over(partition by null order by id rows between 1 preceding and 1 following) as min_num
    from t1
    )
    
select distinct number
from  tmp1
where max_num = min_num;

-- 思路2: 使用(lag)窗口函数,算出number的上一行,和上上行,如果三行相等则连续出现三次
with tmp2 as (
    select id,
           number,
           lag(number) over(partition by null order by id ) as f1_num
    from t1
    ),  -- 求出number的上一行
    
tmp3 as (
    select id,
           number,
           f1_num,
           lag(f1_num) over(partition by null order by id ) as f2_num
    from tmp2
    ) -- 求出number的上一行的上一行
    
select distinct number
from  tmp3
where number = f1_num and number = f2_num; -- 三行相等,则number连续出现三次

-- 思路3: 就是让把当前行和后两行的相同列的数据放置在同一行中,然后进行过滤

-- 隐式内连接方式
select distinct (a.number) from test_data4 a, test_data4 b, test_data4 c 
where a.id = b.id - 1 and b.id = c.id - 1
and a.number=b.number and b.number = c.number;

-- 显示内连接方式
select distinct(a.number) as continuenum from test_data4 a
join test_data4 b on a.id = b.id - 1
join test_data4 c on b.id = c.id - 1