经典sql练习题
1、数据和建表语句
shop,month,电子类销售额,服装类销售额,食品类销售额
shop,mth, dz, fz, sp
a,2019-01,3000,5000,2000
a,2019-02,6000,4000,3000
a,2019-03,3000,4000,2000
a,2019-04,3500,5500,4000
a,2019-05,3800,5200,6000
a,2019-06,3900,3000,4000
a,2019-07,2000,2500,2800
a,2019-08,1000,4000,2600
b,2019-01,3000,5000,2000
b,2019-02,6000,4000,3000
b,2019-03,3000,4000,2000
b,2019-04,3500,5500,4000
b,2019-05,3800,5200,6000
b,2019-06,3600,3000,4000
b,2019-07,2000,2500,2800
b,2019-08,1000,4000,2600
c,2019-01,3000,5000,2000
c,2019-02,1000,4000,3000
c,2019-03,3000,4000,2000
c,2019-04,3500,5500,4000
c,2019-05,3800,5200,6000
c,2019-06,3900,3000,4000
c,2019-07,2000,2500,2800
c,2019-08,1000,4000,2600
-- 建表
CREATE TABLE DEMO_SALE(
shop STRING,
mth STRING,
dz double,
fz double,
sp double
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
;
-- 加载数据
load data local inpath '/root/sale.txt' into table DEMO_SALE;
1、求所有‘月销售额’>12000的销售记录
select * from ods.DEMO_SALE where dz+fz+sp>12000;
+-------+----------+---------+---------+---------+--+
| shop | mth | dz | fz | sp |
+-------+----------+---------+---------+---------+--+
| a | 2019-02 | 6000.0 | 4000.0 | 3000.0 |
| a | 2019-04 | 3500.0 | 5500.0 | 4000.0 |
| a | 2019-05 | 3800.0 | 5200.0 | 6000.0 |
| b | 2019-02 | 6000.0 | 4000.0 | 3000.0 |
| b | 2019-04 | 3500.0 | 5500.0 | 4000.0 |
| b | 2019-05 | 3800.0 | 5200.0 | 6000.0 |
| c | 2019-04 | 3500.0 | 5500.0 | 4000.0 |
| c | 2019-05 | 3800.0 | 5200.0 | 6000.0 |
+-------+----------+---------+---------+---------+--+
2、求每个店铺的个品类全年销售总额
select shop,sum(dz) as dz_total,sum(fz) as fz_total,sum(sp) as sp_total from ods.DEMO_SALE group by shop;
+-------+-----------+-----------+-----------+--+
| shop | dz_total | fz_total | sp_total |
+-------+-----------+-----------+-----------+--+
| a | 26200.0 | 33200.0 | 26400.0 |
| b | 25900.0 | 33200.0 | 26400.0 |
| c | 21200.0 | 33200.0 | 26400.0 |
+-------+-----------+-----------+-----------+--+
3、求每个店铺的各品类各季度销售额
select shop,jd,
sum(dz) as dz_jd_total,
sum(fz) as fz_jd_total,
sum(sp) as sp_jd_total
from
(
select
shop,
mth,
case when mth between '2019-01' and '2019-03' then '1季度'
when mth between '2019-04' and '2019-06' then '2季度'
when mth between '2019-07' and '2019-09' then '3季度'
else '4季度' end as jd,
dz,
fz,
sp
from
ods.DEMO_SALE
) tt group by shop,jd
+-------+------+--------------+--------------+--------------+--+
| shop | jd | dz_jd_total | fz_jd_total | sp_jd_total |
+-------+------+--------------+--------------+--------------+--+
| a | 1季度 | 12000.0 | 13000.0 | 7000.0 |
| a | 2季度 | 11200.0 | 13700.0 | 14000.0 |
| a | 3季度 | 3000.0 | 6500.0 | 5400.0 |
| b | 1季度 | 12000.0 | 13000.0 | 7000.0 |
| b | 2季度 | 10900.0 | 13700.0 | 14000.0 |
| b | 3季度 | 3000.0 | 6500.0 | 5400.0 |
| c | 1季度 | 7000.0 | 13000.0 | 7000.0 |
| c | 2季度 | 11200.0 | 13700.0 | 14000.0 |
| c | 3季度 | 3000.0 | 6500.0 | 5400.0 |
+-------+------+--------------+--------------+--------------+--+
4、求每个店铺的每月销售额
SELECT
shop,mth,(dz+fz+sp) as ze
FROM ods.DEMO_SALE
+-------+----------+----------+--+
| shop | mth | ze |
+-------+----------+----------+--+
| a | 2019-01 | 10000.0 |
| a | 2019-02 | 13000.0 |
| a | 2019-03 | 9000.0 |
| a | 2019-04 | 13000.0 |
| a | 2019-05 | 15000.0 |
| a | 2019-06 | 10900.0 |
| a | 2019-07 | 7300.0 |
| a | 2019-08 | 7600.0 |
| b | 2019-01 | 10000.0 |
| b | 2019-02 | 13000.0 |
| b | 2019-03 | 9000.0 |
| b | 2019-04 | 13000.0 |
| b | 2019-05 | 15000.0 |
| b | 2019-06 | 10600.0 |
| b | 2019-07 | 7300.0 |
| b | 2019-08 | 7600.0 |
| c | 2019-01 | 10000.0 |
| c | 2019-02 | 8000.0 |
| c | 2019-03 | 9000.0 |
| c | 2019-04 | 13000.0 |
| c | 2019-05 | 15000.0 |
| c | 2019-06 | 10900.0 |
| c | 2019-07 | 7300.0 |
| c | 2019-08 | 7600.0 |
+-------+----------+----------+--+
5、求每个店铺的年销售总额
1、第一种方法。注意 如果列的别名是汉语 需要使用 飘号 `
SELECT
shop as `店铺`,sum(dz+fz+sp) as `全年总额`
FROM ods.DEMO_SALE
GROUP BY shop
2、
SELECT
shop as `店铺`,sum(dz)+sum(fz)+sum(sp) as `全年总额`
FROM ods.DEMO_SALE
GROUP BY shop
+-----+----------+--+
| 店铺 | 全年总额 |
+-----+----------+--+
| a | 85800.0 |
| b | 85500.0 |
| c | 80800.0 |
+-----+----------+--+
6、求每个店铺每个月份中、销售额最大的品类销售额及其月份
greatest
SELECT
shop,mth,greatest(dz,fz,sp) as zde
FROM ods.DEMO_SALE
+-------+----------+-------+--+
| shop | mth | zde |
+-------+----------+-------+--+
| a | 2019-01 | 5000 |
| a | 2019-02 | 6000 |
| a | 2019-03 | 4000 |
| a | 2019-04 | 5500 |
| a | 2019-05 | 6000 |
| a | 2019-06 | 4000 |
| a | 2019-07 | 2800 |
| a | 2019-08 | 4000 |
| b | 2019-01 | 5000 |
| b | 2019-02 | 6000 |
| b | 2019-03 | 4000 |
| b | 2019-04 | 5500 |
| b | 2019-05 | 6000 |
| b | 2019-06 | 4000 |
| b | 2019-07 | 2800 |
| b | 2019-08 | 4000 |
| c | 2019-01 | 5000 |
| c | 2019-02 | 4000 |
| c | 2019-03 | 4000 |
| c | 2019-04 | 5500 |
| c | 2019-05 | 6000 |
| c | 2019-06 | 4000 |
| c | 2019-07 | 2800 |
| c | 2019-08 | 4000 |
+-------+----------+-------+--+
7、求每个店铺每个月份中,销售额最大的品类、及其销售额、及其月份
SELECT
shop,mth,
case
when dz>=fz and dz>=sp then '电子'
when fz>=dz and fz>=sp then '服装'
when sp>=dz and sp>=fz then '食品'
end as `最大品类`,
greatest(cast(dz as int),cast(fz as int),cast(sp as int)) as `最大额`
FROM ods.DEMO_SALE
+-------+----------+-------+-------+--+
| shop | mth | 最大品类 | 最大额 |
+-------+----------+-------+-------+--+
| a | 2019-01 | 服装 | 5000 |
| a | 2019-02 | 电子 | 6000 |
| a | 2019-03 | 服装 | 4000 |
| a | 2019-04 | 服装 | 5500 |
| a | 2019-05 | 食品 | 6000 |
| a | 2019-06 | 食品 | 4000 |
| a | 2019-07 | 食品 | 2800 |
| a | 2019-08 | 服装 | 4000 |
| b | 2019-01 | 服装 | 5000 |
| b | 2019-02 | 电子 | 6000 |
| b | 2019-03 | 服装 | 4000 |
| b | 2019-04 | 服装 | 5500 |
| b | 2019-05 | 食品 | 6000 |
| b | 2019-06 | 食品 | 4000 |
| b | 2019-07 | 食品 | 2800 |
| b | 2019-08 | 服装 | 4000 |
| c | 2019-01 | 服装 | 5000 |
| c | 2019-02 | 服装 | 4000 |
| c | 2019-03 | 服装 | 4000 |
| c | 2019-04 | 服装 | 5500 |
| c | 2019-05 | 食品 | 6000 |
| c | 2019-06 | 食品 | 4000 |
| c | 2019-07 | 食品 | 2800 |
| c | 2019-08 | 服装 | 4000 |
+-------+----------+-------+-------+--+
插播数据分析函数。
数据
cookie1,2015-04-10,1
cookie1,2015-04-11,5
cookie1,2015-04-12,7
cookie1,2015-04-13,3
cookie1,2015-04-14,2
cookie1,2015-04-15,4
cookie1,2015-04-16,4
create database if not exists cookie;
use cookie;
drop table if exists cookie1;
create table cookie1(cookieid string, createtime string, pv int) row format delimited fields terminated by ',';
load data local inpath "/home/dw/codersu/cookie1.txt" into table cookie1;
select * from cookie1;
数据分析函数 sum()
select
cookieid,
createtime,
pv,
sum(pv) over (partition by cookieid order by createtime rows between unbounded preceding and current row) as pv1,
sum(pv) over (partition by cookieid order by createtime) as pv2,
sum(pv) over (partition by cookieid) as pv3,
sum(pv) over (partition by cookieid order by createtime rows between 3 preceding and current row) as pv4,
sum(pv) over (partition by cookieid order by createtime rows between 3 preceding and 1 following) as pv5,
sum(pv) over (partition by cookieid order by createtime rows between current row and unbounded following) as pv6
from cookie1;
+-----------+-------------+-----+------+------+------+------+------+------+--+
| cookieid | createtime | pv | pv1 | pv2 | pv3 | pv4 | pv5 | pv6 |
+-----------+-------------+-----+------+------+------+------+------+------+--+
| cookie1 | 2015-04-16 | 4 | 26 | 26 | 26 | 13 | 13 | 4 |
| cookie1 | 2015-04-15 | 4 | 22 | 22 | 26 | 16 | 20 | 8 |
| cookie1 | 2015-04-14 | 2 | 18 | 18 | 26 | 17 | 21 | 10 |
| cookie1 | 2015-04-13 | 3 | 16 | 16 | 26 | 16 | 18 | 13 |
| cookie1 | 2015-04-12 | 7 | 13 | 13 | 26 | 13 | 16 | 20 |
| cookie1 | 2015-04-11 | 5 | 6 | 6 | 26 | 6 | 13 | 25 |
| cookie1 | 2015-04-10 | 1 | 1 | 1 | 26 | 1 | 6 | 26 |
+-----------+-------------+-----+------+------+------+------+------+------+--+
pv1: 分组内从起点到当前行的pv累积,如,11号的pv1=10号的pv+11号的pv, 12号=10号+11号+12号
pv2: 同pv1
pv3: 分组内(cookie1)所有的pv累加
pv4: 分组内当前行+往前3行,如,11号=10号+11号, 12号=10号+11号+12号, 13号=10号+11号+12号+13号, 14号=11号+12号+13号+14号
pv5: 分组内当前行+往前3行+往后1行,如,14号=11号+12号+13号+14号+15号=5+7+3+2+4=21
pv6: 分组内当前行+往后所有行,如,13号=13号+14号+15号+16号=3+2+4+4=13,14号=14号+15号+16号=2+4+4=10
如果不指定ROWS BETWEEN,默认为从起点到当前行;
如果不指定ORDER BY,则将分组内所有值累加;
关键是理解ROWS BETWEEN含义,也叫做WINDOW子句:
PRECEDING:往前
FOLLOWING:往后
CURRENT ROW:当前行
UNBOUNDED:起点,
UNBOUNDED PRECEDING 表示从前面的起点,
UNBOUNDED FOLLOWING:表示到后面的终点
–其他AVG,MIN,MAX,和SUM用法一样。
引用blog地址:https://www.cnblogs.com/ZackSun/p/9713435.html
8.求"电子类"商品如下报表:
店铺,月份,月额,累计到月额
a,2019-01,3000,3000
a,2019-02,6000,9000
.....
SELECT
shop,
mth,
dz,
sum(dz) over(partition by shop order by mth rows between unbounded preceding and current row)
-- rows between x preceding and current row
-- rows between unbounded preceding and current row
-- rows between unbounded preceding and unbounded following
-- rows between x preceding and y following
FROM ods.DEMO_SALE
+-------+----------+---------+---------------+--+
| shop | mth | dz | sum_window_0 |
+-------+----------+---------+---------------+--+
| a | 2019-01 | 3000.0 | 3000.0 |
| a | 2019-02 | 6000.0 | 9000.0 |
| a | 2019-03 | 3000.0 | 12000.0 |
| a | 2019-04 | 3500.0 | 15500.0 |
| a | 2019-05 | 3800.0 | 19300.0 |
| a | 2019-06 | 3900.0 | 23200.0 |
| a | 2019-07 | 2000.0 | 25200.0 |
| a | 2019-08 | 1000.0 | 26200.0 |
| b | 2019-01 | 3000.0 | 3000.0 |
| b | 2019-02 | 6000.0 | 9000.0 |
| b | 2019-03 | 3000.0 | 12000.0 |
| b | 2019-04 | 3500.0 | 15500.0 |
| b | 2019-05 | 3800.0 | 19300.0 |
| b | 2019-06 | 3600.0 | 22900.0 |
| b | 2019-07 | 2000.0 | 24900.0 |
| b | 2019-08 | 1000.0 | 25900.0 |
| c | 2019-01 | 3000.0 | 3000.0 |
| c | 2019-02 | 1000.0 | 4000.0 |
| c | 2019-03 | 3000.0 | 7000.0 |
| c | 2019-04 | 3500.0 | 10500.0 |
| c | 2019-05 | 3800.0 | 14300.0 |
| c | 2019-06 | 3900.0 | 18200.0 |
| c | 2019-07 | 2000.0 | 20200.0 |
| c | 2019-08 | 1000.0 | 21200.0 |
+-------+----------+---------+---------------+--+
变形
店铺,月份,月额,累计到月额 ,全年总额
a,2019-01,3000,3000, ...
a,2019-02,6000,9000, ...
SELECT
shop,
mth,
dz,
sum(dz) over(partition by shop order by mth rows between unbounded preceding and current row) as `月累计额`,
sum(dz) over(partition by shop order by mth rows between unbounded preceding and unbounded following) as `年总额`
FROM ods.DEMO_SALE
+-------+----------+---------+----------+----------+--+
| shop | mth | dz | 月累计额 | 年总额 |
+-------+----------+---------+----------+----------+--+
| a | 2019-01 | 3000.0 | 3000.0 | 26200.0 |
| a | 2019-02 | 6000.0 | 9000.0 | 26200.0 |
| a | 2019-03 | 3000.0 | 12000.0 | 26200.0 |
| a | 2019-04 | 3500.0 | 15500.0 | 26200.0 |
| a | 2019-05 | 3800.0 | 19300.0 | 26200.0 |
| a | 2019-06 | 3900.0 | 23200.0 | 26200.0 |
| a | 2019-07 | 2000.0 | 25200.0 | 26200.0 |
| a | 2019-08 | 1000.0 | 26200.0 | 26200.0 |
| b | 2019-01 | 3000.0 | 3000.0 | 25900.0 |
| b | 2019-02 | 6000.0 | 9000.0 | 25900.0 |
| b | 2019-03 | 3000.0 | 12000.0 | 25900.0 |
| b | 2019-04 | 3500.0 | 15500.0 | 25900.0 |
| b | 2019-05 | 3800.0 | 19300.0 | 25900.0 |
| b | 2019-06 | 3600.0 | 22900.0 | 25900.0 |
| b | 2019-07 | 2000.0 | 24900.0 | 25900.0 |
| b | 2019-08 | 1000.0 | 25900.0 | 25900.0 |
| c | 2019-01 | 3000.0 | 3000.0 | 21200.0 |
| c | 2019-02 | 1000.0 | 4000.0 | 21200.0 |
| c | 2019-03 | 3000.0 | 7000.0 | 21200.0 |
| c | 2019-04 | 3500.0 | 10500.0 | 21200.0 |
| c | 2019-05 | 3800.0 | 14300.0 | 21200.0 |
| c | 2019-06 | 3900.0 | 18200.0 | 21200.0 |
| c | 2019-07 | 2000.0 | 20200.0 | 21200.0 |
| c | 2019-08 | 1000.0 | 21200.0 | 21200.0 |
+-------+----------+---------+----------+----------+--+
9、求每个店铺的月销售总额和月累计销售总额
+-------+----------+-----------------+-------------+
| shop | mth | 月销售总额 | 月累计总额 |
+-------+----------+-----------------+-------------+
| a | 2019-01 | 10000.0 | 10000.0 |
| a | 2019-02 | 13000.0 | 23000.0 |
| a | 2019-03 | 9000.0 | 32000.0 |
SELECT
shop,
mth,
dz+fz+sp as `月销售总额`,
sum(dz+fz+sp) over(partition by shop order by mth rows between unbounded preceding and current row) as `月累计总额`
FROM ods.DEMO_SALE
+-------+----------+----------+----------+--+
| shop | mth | 月销售总额 | 月累计总额 |
+-------+----------+----------+----------+--+
| a | 2019-01 | 10000.0 | 10000.0 |
| a | 2019-02 | 13000.0 | 23000.0 |
| a | 2019-03 | 9000.0 | 32000.0 |
| a | 2019-04 | 13000.0 | 45000.0 |
| a | 2019-05 | 15000.0 | 60000.0 |
| a | 2019-06 | 10900.0 | 70900.0 |
| a | 2019-07 | 7300.0 | 78200.0 |
| a | 2019-08 | 7600.0 | 85800.0 |
| b | 2019-01 | 10000.0 | 10000.0 |
| b | 2019-02 | 13000.0 | 23000.0 |
| b | 2019-03 | 9000.0 | 32000.0 |
| b | 2019-04 | 13000.0 | 45000.0 |
| b | 2019-05 | 15000.0 | 60000.0 |
| b | 2019-06 | 10600.0 | 70600.0 |
| b | 2019-07 | 7300.0 | 77900.0 |
| b | 2019-08 | 7600.0 | 85500.0 |
| c | 2019-01 | 10000.0 | 10000.0 |
| c | 2019-02 | 8000.0 | 18000.0 |
| c | 2019-03 | 9000.0 | 27000.0 |
| c | 2019-04 | 13000.0 | 40000.0 |
| c | 2019-05 | 15000.0 | 55000.0 |
| c | 2019-06 | 10900.0 | 65900.0 |
| c | 2019-07 | 7300.0 | 73200.0 |
| c | 2019-08 | 7600.0 | 80800.0 |
+-------+----------+----------+----------+--+
10、求每个品类的月销售总额和每个品类的累计月销售总额
+-------+----------+---------+--------------------+----------+---------------+----------+---------------+
| shop | mth | dz | 电子月累计 | fz | 服装月累计 | sp | 食品月累计 |
+-------+----------+---------+--------------------+----------+---------------+----------+---------------+
| a | 2019-01 | 3000.0 | 3000.0 | 5000.0 | 5000.0 | 2000.0 | 2000.0 |
| a | 2019-02 | 6000.0 | 9000.0 | 4000.0 | 9000.0 | 3000.0 | 5000.0 |
| a | 2019-03 | 3000.0 | 12000.0 | 4000.0 | 13000.0 | 2000.0 | 7000.0 |
| a | 2019-04 | 3500.0 | 15500.0 | 5500.0 | 18500.0 | 4000.0 | 11000.0 |
| a | 2019-05 | 3800.0 | 19300.0 | 5200.0 | 23700.0 | 6000.0 | 17000.0 |
SELECT
shop,
mth,
dz,
sum(dz) over(partition by shop order by mth rows between unbounded preceding and current row) as `电子月累计`,
fz,
sum(fz) over(partition by shop order by mth rows between unbounded preceding and current row) as `服装月累计`,
sp,
sum(sp) over(partition by shop order by mth rows between unbounded preceding and current row) as `食品月累计`
FROM ods.DEMO_SALE
+-------+----------+---------+----------+---------+----------+---------+----------+--+
| shop | mth | dz | 电子月累计 | fz | 服装月累计 | sp | 食品月累计 |
+-------+----------+---------+----------+---------+----------+---------+----------+--+
| a | 2019-01 | 3000.0 | 3000.0 | 5000.0 | 5000.0 | 2000.0 | 2000.0 |
| a | 2019-02 | 6000.0 | 9000.0 | 4000.0 | 9000.0 | 3000.0 | 5000.0 |
| a | 2019-03 | 3000.0 | 12000.0 | 4000.0 | 13000.0 | 2000.0 | 7000.0 |
| a | 2019-04 | 3500.0 | 15500.0 | 5500.0 | 18500.0 | 4000.0 | 11000.0 |
| a | 2019-05 | 3800.0 | 19300.0 | 5200.0 | 23700.0 | 6000.0 | 17000.0 |
| a | 2019-06 | 3900.0 | 23200.0 | 3000.0 | 26700.0 | 4000.0 | 21000.0 |
| a | 2019-07 | 2000.0 | 25200.0 | 2500.0 | 29200.0 | 2800.0 | 23800.0 |
| a | 2019-08 | 1000.0 | 26200.0 | 4000.0 | 33200.0 | 2600.0 | 26400.0 |
| b | 2019-01 | 3000.0 | 3000.0 | 5000.0 | 5000.0 | 2000.0 | 2000.0 |
| b | 2019-02 | 6000.0 | 9000.0 | 4000.0 | 9000.0 | 3000.0 | 5000.0 |
| b | 2019-03 | 3000.0 | 12000.0 | 4000.0 | 13000.0 | 2000.0 | 7000.0 |
| b | 2019-04 | 3500.0 | 15500.0 | 5500.0 | 18500.0 | 4000.0 | 11000.0 |
| b | 2019-05 | 3800.0 | 19300.0 | 5200.0 | 23700.0 | 6000.0 | 17000.0 |
| b | 2019-06 | 3600.0 | 22900.0 | 3000.0 | 26700.0 | 4000.0 | 21000.0 |
| b | 2019-07 | 2000.0 | 24900.0 | 2500.0 | 29200.0 | 2800.0 | 23800.0 |
| b | 2019-08 | 1000.0 | 25900.0 | 4000.0 | 33200.0 | 2600.0 | 26400.0 |
| c | 2019-01 | 3000.0 | 3000.0 | 5000.0 | 5000.0 | 2000.0 | 2000.0 |
| c | 2019-02 | 1000.0 | 4000.0 | 4000.0 | 9000.0 | 3000.0 | 5000.0 |
| c | 2019-03 | 3000.0 | 7000.0 | 4000.0 | 13000.0 | 2000.0 | 7000.0 |
| c | 2019-04 | 3500.0 | 10500.0 | 5500.0 | 18500.0 | 4000.0 | 11000.0 |
| c | 2019-05 | 3800.0 | 14300.0 | 5200.0 | 23700.0 | 6000.0 | 17000.0 |
| c | 2019-06 | 3900.0 | 18200.0 | 3000.0 | 26700.0 | 4000.0 | 21000.0 |
| c | 2019-07 | 2000.0 | 20200.0 | 2500.0 | 29200.0 | 2800.0 | 23800.0 |
| c | 2019-08 | 1000.0 | 21200.0 | 4000.0 | 33200.0 | 2600.0 | 26400.0 |
+-------+----------+---------+----------+---------+----------+---------+----------+--+
11、那些店铺电子类产品的销售有过连续4月增长
需要的函数介绍 上面引用的blog也有介绍这个函数
LEAD(col,n,DEFAULT) 用于统计窗口内往下第n行值
第一个参数为列名,
第二个参数为往下第n行(可选,默认为1),
第三个参数为默认值(当往下第n行为NULL时候,取默认值,如不指定,则为NULL)
1、在一行中,取出下一行的电子类产品的销售额。
SELECT
shop,mth,dz,lead(dz,1,null) over(partition by shop order by mth) as next_dz
FROM ods.DEMO_SALE
+-------+----------+---------+----------+--+
| shop | mth | dz | next_dz |
+-------+----------+---------+----------+--+
| a | 2019-01 | 3000.0 | 6000.0 |
| a | 2019-02 | 6000.0 | 3000.0 |
| a | 2019-03 | 3000.0 | 3500.0 |
| a | 2019-04 | 3500.0 | 3800.0 |
| a | 2019-05 | 3800.0 | 3900.0 |
| a | 2019-06 | 3900.0 | 2000.0 |
| a | 2019-07 | 2000.0 | 1000.0 |
| a | 2019-08 | 1000.0 | NULL |
| b | 2019-01 | 3000.0 | 6000.0 |
| b | 2019-02 | 6000.0 | 3000.0 |
| b | 2019-03 | 3000.0 | 3500.0 |
| b | 2019-04 | 3500.0 | 3800.0 |
| b | 2019-05 | 3800.0 | 3600.0 |
| b | 2019-06 | 3600.0 | 2000.0 |
| b | 2019-07 | 2000.0 | 1000.0 |
| b | 2019-08 | 1000.0 | NULL |
| c | 2019-01 | 3000.0 | 1000.0 |
| c | 2019-02 | 1000.0 | 3000.0 |
| c | 2019-03 | 3000.0 | 3500.0 |
| c | 2019-04 | 3500.0 | 3800.0 |
| c | 2019-05 | 3800.0 | 3900.0 |
| c | 2019-06 | 3900.0 | 2000.0 |
| c | 2019-07 | 2000.0 | 1000.0 |
| c | 2019-08 | 1000.0 | NULL |
+-------+----------+---------+----------+--+
2、找出有增长的月份,并标记连续的序号
with as tmp as (
select
shop,mth,row_number() over(partition by shop order by mth) as rn
from
(
SELECT
shop,mth,dz,lead(dz,1,null) over(partition by shop order by mth) as next_dz
FROM ods.DEMO_SALE
) o1
where dz<next_dz
);
+-------+----------+-----+--+
| shop | mth | rn |
+-------+----------+-----+--+
| a | 2019-01 | 1 |
| a | 2019-03 | 2 |
| a | 2019-04 | 3 |
| a | 2019-05 | 4 |
| b | 2019-01 | 1 |
| b | 2019-03 | 2 |
| b | 2019-04 | 3 |
| c | 2019-02 | 1 |
| c | 2019-03 | 2 |
| c | 2019-04 | 3 |
| c | 2019-05 | 4 |
+-------+----------+-----+--+
3、使用月份减去 排列序号。 如果连这增长,月份减去排列学号 是一个相同的值。 例如上面的例子
+-------+----------+-----+--+
| shop | mth | rn | mth - rn
+-------+----------+-----+--+
| a | 2019-01 | 1 | 0
| a | 2019-03 | 2 | 1
| a | 2019-04 | 3 | 1
| a | 2019-05 | 4 | 1
假设再有一列 为 a 2019-08 5
+-------+----------+-----+--+
| shop | mth | rn | mth - rn
+-------+----------+-----+--+
| a | 2019-01 | 1 | 0
| a | 2019-03 | 2 | 1
| a | 2019-04 | 3 | 1
| a | 2019-05 | 4 | 1
| a | 2019-08 | 5 | 3
此时月份减去 排列序号 为 3 8-5 = 3 则证明 中间肯定没有连续增长。 其实也就是根据这个技巧来判断的。
求解 月份 - 排列序列号
create temporary table default.tmp_monty_sub_xh
select
shop,
cast(substr(mth,6) as int)-rn as zz
from default.temp
+-------+------+--+
| shop | _c1 |
+-------+------+--+
| a | 0 | => a,1 a连续一个月增长
| a | 1 |
| a | 1 | => a,4 a连续4个月增长
| a | 1 |
| b | 0 | => b,1
| b | 1 | => b,2
| b | 1 |
| c | 1 | => c,4
| c | 1 |
| c | 1 |
| c | 1 |
+-------+------+--+
4、按照店铺加差值的组合,留下个数>=3的店铺
select
distinct shop --一个店铺存在多次连着4个月增长.所以需要distinct
from
(
select
shop,
count(1) as cnts
from default.tmp_monty_sub_xh
group by shop,zz
)
where cnts>=3
group by shop
3和4可以合成一步来操作
select
distinct shop --一个店铺存在多次连着4个月增长.所以需要distinct
from
from
(
select
shop,
count(1) as cnts
from tmp
group by shop,cast(substr(mth,6) as int)-rn
)
where cnts>=3
group by shop