CASE003-窗口函数综合练习

317 阅读5分钟

练习一

  • SQL类型:Hive
  • 测试数据
CREATE TABLE business (
  name STRING COMMENT '顾客姓名',
  order_date STRING COMMENT '购买日期',
  cost INT COMMENT '消费金额'
) 
ROW FORMAT DELIMITED FIELDS TERMINATED BY ',';

INSERT INTO business VALUES
('JACK', '2017-01-01', 10),
('TONY', '2017-01-02', 15),
('JACK', '2017-02-03', 23),
('TONY', '2017-01-04', 29),
('JACK', '2017-01-05', 46),
('JACK', '2017-04-06', 42),
('TONY', '2017-01-07', 50),
('JACK', '2017-01-08', 55),
('MART', '2017-04-08', 62),
('MART', '2017-04-09', 68),
('NEIL', '2017-05-10', 12),
('MART', '2017-04-11', 75),
('NEIL', '2017-06-12', 80),
('MART', '2017-04-13', 94);

SELECT * FROM business;

+----------------+----------------------+----------------+
| business.name  | business.order_date  | business.cost  |
+----------------+----------------------+----------------+
| JACK           | 2017-01-01           | 10             |
| TONY           | 2017-01-02           | 15             |
| JACK           | 2017-02-03           | 23             |
| TONY           | 2017-01-04           | 29             |
| JACK           | 2017-01-05           | 46             |
| JACK           | 2017-04-06           | 42             |
| TONY           | 2017-01-07           | 50             |
| JACK           | 2017-01-08           | 55             |
| MART           | 2017-04-08           | 62             |
| MART           | 2017-04-09           | 68             |
| NEIL           | 2017-05-10           | 12             |
| MART           | 2017-04-11           | 75             |
| NEIL           | 2017-06-12           | 80             |
| MART           | 2017-04-13           | 94             |
+----------------+----------------------+----------------+
  • 需求:
  1. 查询在2017年4月份消费过的顾客及总人数
-- COUNT(name) OVER():设置COUNT()作用的数据范围为符合条件的全部数据
-- GROUP BY name:相当于对name去重
SELECT name, COUNT(name) OVER() AS cost_count
FROM business
WHERE SUBSTR(order_date, 1, 7) = '2017-04'
GROUP BY name;

+-------+-------------+
| name  | cost_count  |
+-------+-------------+
| MART  | 2           |
| JACK  | 2           |
+-------+-------------+
  1. 查询顾客的购买明细、每个顾客的月购买总额、所有顾客的月购买总额
SELECT 
    name, 
    order_date, 
    cost,
    SUM(cost) OVER(PARTITION BY name, SUBSTR(order_date, 1, 7)) AS name_month_cost,
    SUM(cost) OVER(PARTITION BY SUBSTR(order_date, 1, 7)) AS month_cost
FROM business;

+-------+-------------+-------+------------------+-------------+
| name  | order_date  | cost  | name_month_cost  | month_cost  |
+-------+-------------+-------+------------------+-------------+
| JACK  | 2017-01-05  | 46    | 111              | 205         |
| TONY  | 2017-01-02  | 15    | 94               | 205         |
| TONY  | 2017-01-04  | 29    | 94               | 205         |
| TONY  | 2017-01-07  | 50    | 94               | 205         |
| JACK  | 2017-01-01  | 10    | 111              | 205         |
| JACK  | 2017-01-08  | 55    | 111              | 205         |
| JACK  | 2017-02-03  | 23    | 23               | 23          |
| MART  | 2017-04-09  | 68    | 299              | 341         |
| MART  | 2017-04-11  | 75    | 299              | 341         |
| MART  | 2017-04-13  | 94    | 299              | 341         |
| JACK  | 2017-04-06  | 42    | 42               | 341         |
| MART  | 2017-04-08  | 62    | 299              | 341         |
| NEIL  | 2017-05-10  | 12    | 12               | 12          |
| NEIL  | 2017-06-12  | 80    | 80               | 80          |
+-------+-------------+-------+------------------+-------------+
  1. 将每个顾客的消费金额按照日期进行累加
SELECT 
    name, 
    order_date, 
    cost,
    SUM(cost) OVER(PARTITION BY name ORDER BY order_date) AS sum_cost
FROM business;

+-------+-------------+-------+-----------+
| name  | order_date  | cost  | sum_cost  |
+-------+-------------+-------+-----------+
| JACK  | 2017-01-01  | 10    | 10        |
| JACK  | 2017-01-05  | 46    | 56        |
| JACK  | 2017-01-08  | 55    | 111       |
| JACK  | 2017-02-03  | 23    | 134       |
| JACK  | 2017-04-06  | 42    | 176       |
| MART  | 2017-04-08  | 62    | 62        |
| MART  | 2017-04-09  | 68    | 130       |
| MART  | 2017-04-11  | 75    | 205       |
| MART  | 2017-04-13  | 94    | 299       |
| NEIL  | 2017-05-10  | 12    | 12        |
| NEIL  | 2017-06-12  | 80    | 92        |
| TONY  | 2017-01-02  | 15    | 15        |
| TONY  | 2017-01-04  | 29    | 44        |
| TONY  | 2017-01-07  | 50    | 94        |
+-------+-------------+-------+-----------+
  1. 查询每个顾客的上次消费日期
SELECT 
    name, 
    order_date, 
    cost,
    LAG(order_date) OVER(PARTITION BY name ORDER BY order_date) AS last_order_date
FROM business;

+-------+-------------+-------+------------------+
| name  | order_date  | cost  | last_order_date  |
+-------+-------------+-------+------------------+
| JACK  | 2017-01-01  | 10    | NULL             |
| JACK  | 2017-01-05  | 46    | 2017-01-01       |
| JACK  | 2017-01-08  | 55    | 2017-01-05       |
| JACK  | 2017-02-03  | 23    | 2017-01-08       |
| JACK  | 2017-04-06  | 42    | 2017-02-03       |
| MART  | 2017-04-08  | 62    | NULL             |
| MART  | 2017-04-09  | 68    | 2017-04-08       |
| MART  | 2017-04-11  | 75    | 2017-04-09       |
| MART  | 2017-04-13  | 94    | 2017-04-11       |
| NEIL  | 2017-05-10  | 12    | NULL             |
| NEIL  | 2017-06-12  | 80    | 2017-05-10       |
| TONY  | 2017-01-02  | 15    | NULL             |
| TONY  | 2017-01-04  | 29    | 2017-01-02       |
| TONY  | 2017-01-07  | 50    | 2017-01-04       |
+-------+-------------+-------+------------------+
  1. 查询日期最早的前20%的明细记录
SELECT * FROM (
    SELECT 
        name, 
        order_date, 
        cost,
        NTILE(5) OVER(ORDER BY order_date) AS sorted
    FROM business) AS t
WHERE sorted = 1;

+---------+---------------+---------+-----------+
| t.name  | t.order_date  | t.cost  | t.sorted  |
+---------+---------------+---------+-----------+
| JACK    | 2017-01-01    | 10      | 1         |
| TONY    | 2017-01-02    | 15      | 1         |
| TONY    | 2017-01-04    | 29      | 1         |
+---------+---------------+---------+-----------+

练习二

  • SQL类型:Hive
  • 测试数据
CREATE TABLE score_w (
    name STRING,
    subject STRING,
    score INT
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ',';

INSERT INTO score_w VALUES
('悟空', '语文', 87),
('悟空', '数学', 95),
('悟空', '英语', 68),
('大海', '语文', 94),
('大海', '数学', 56),
('大海', '英语', 84),
('宋宋', '语文', 64),
('宋宋', '数学', 86),
('宋宋', '英语', 84),
('小婷', '语文', 65),
('小婷', '数学', 85),
('小婷', '英语', 78);

SELECT * FROM score_w;

+---------------+------------------+----------------+
| score_w.name  | score_w.subject  | score_w.score  |
+---------------+------------------+----------------+
| 悟空           | 语文              | 87             |
| 悟空           | 数学              | 95             |
| 悟空           | 英语              | 68             |
| 大海           | 语文              | 94             |
| 大海           | 数学              | 56             |
| 大海           | 英语              | 84             |
| 宋宋           | 语文              | 64             |
| 宋宋           | 数学              | 86             |
| 宋宋           | 英语              | 84             |
| 小婷           | 语文              | 65             |
| 小婷           | 数学              | 85             |
| 小婷           | 英语              | 78             |
+---------------+------------------+----------------+
  • 需求:计算每门学科成绩排名
SELECT 
    name,
    subject,
    score,
    ROW_NUMBER() OVER(PARTITION BY subject ORDER BY score DESC) AS row_number,
    RANK() OVER(PARTITION BY subject ORDER BY score DESC) AS rank,
    DENSE_RANK() OVER(PARTITION BY subject ORDER BY score DESC) AS dense_rank
FROM score_w;

+-------+----------+--------+-------------+-------+-------------+
| name  | subject  | score  | row_number  | rank  | dense_rank  |
+-------+----------+--------+-------------+-------+-------------+
| 悟空   | 数学      | 95     | 1           | 1     | 1           |
| 宋宋   | 数学      | 86     | 2           | 2     | 2           |
| 小婷   | 数学      | 85     | 3           | 3     | 3           |
| 大海   | 数学      | 56     | 4           | 4     | 4           |
| 宋宋   | 英语      | 84     | 1           | 1     | 1           |
| 大海   | 英语      | 84     | 2           | 1     | 1           |
| 小婷   | 英语      | 78     | 3           | 3     | 2           |
| 悟空   | 英语      | 68     | 4           | 4     | 3           |
| 大海   | 语文      | 94     | 1           | 1     | 1           |
| 悟空   | 语文      | 87     | 2           | 2     | 2           |
| 小婷   | 语文      | 65     | 3           | 3     | 3           |
| 宋宋   | 语文      | 64     | 4           | 4     | 4           |
+-------+----------+--------+-------------+-------+-------------+