建表
--切换数据库
use db_function;
--建表
create table tb_login(
userid string,
logintime string
) row format delimited fields terminated by '\t';
创建数据:vim /export/data/login.log
A 2021-03-22
B 2021-03-22
C 2021-03-22
A 2021-03-23
C 2021-03-23
A 2021-03-24
B 2021-03-24
加载数据
load data local inpath '/export/data/login.log' into table tb_login;
查询数据
select * from tb_login;
统计连续两天登录
select
userid,
logintime,
--本次登陆日期的第二天
date_add(logintime,1) as nextday,
--按照用户id分区,按照登陆日期排序,取下一次登陆时间,取不到就为0
lead(logintime,1,0) over (partition by userid order by logintime) as nextlogin
from tb_login;
with t1 as (
select
userid,
logintime,
--本次登陆日期的第二天
date_add(logintime,1) as nextday,
--按照用户id分区,按照登陆日期排序,取下一次登陆时间,取不到就为0
lead(logintime,1,0) over (partition by userid order by logintime) as nextlogin
from tb_login )
select distinct userid from t1 where nextday = nextlogin;
统计连续N天登录
select
userid,
logintime,
--本次登陆日期的第N天
date_add(logintime,N-1) as nextday,
--按照用户id分区,按照登陆日期排序,取下下一次登陆时间,取不到就为0
lead(logintime,N-1,0) over (partition by userid order by logintime) as nextlogin
from tb_login;
在这张表中,只要nextlogin和nextday相等则表示符合连续N天登录,再进行用户id去重即可得到连续N天登录的用户id,
所以将上面的表用with封装成临时表t1,再对临时表进行筛选即可得出答案。
with t1 as (
select
userid,
logintime,
--本次登陆日期的第三天
date_add(logintime,2) as nextday,
--按照用户id分区,按照登陆日期排序,取下下一次登陆时间,取不到就为0
lead(logintime,2,0) over (partition by userid order by logintime) as nextlogin
from tb_login )
select distinct userid from t1 where nextday = nextlogin;