CASE002-行列转换

127 阅读3分钟

案例一:行转列

  • 准备学生成绩表

SET hive.exec.mode.local.auto=true;

CREATE TABLE stu_sc (
  sid INT,
  subject STRING,
  score INT
)
ROW FORMAT DELIMITED FIELDS TERMINATED by ',';

INSERT INTO stu_sc VALUES 
(1, '语文', 43), 
(1, '数学', 55),
(2, '语文', 77),
(2, '数学', 88),
(3, '语文', 98),
(3, '数学', 65);

SELECT * FROM stu_sc;
+-------------+-----------------+---------------+
| stu_sc.sid  | stu_sc.subject  | stu_sc.score  |
+-------------+-----------------+---------------+
| 1           | 语文             | 43            |
| 1           | 数学             | 55            |
| 2           | 语文             | 77            |
| 2           | 数学             | 88            |
| 3           | 语文             | 98            |
| 3           | 数学             | 65            |
+-------------+-----------------+---------------+
  • 要求:统计数学成绩比语文成绩好的学生ID

  • 方法一:自连接

SELECT s1.sid
FROM stu_sc AS s1 JOIN stu_sc AS s2 ON s1.sid = s2.sid
AND s1.subject = '语文'
AND s2.subject = '数学'
AND s2.score > s1.score;
  • 方法二:行列转换

将表转换为以下样式:

sidchinese_scoremath_score
14355
27788
39865
-- step1

SELECT 
    sid,
    CASE subject WHEN '语文' THEN score END AS chinese_score,
    CASE subject WHEN '数学' THEN score END AS math_score
FROM stu_sc;

+------+----------------+-------------+
| sid  | chinese_score  | math_score  |
+------+----------------+-------------+
| 1    | 43             | NULL        |
| 1    | NULL           | 55          |
| 2    | 77             | NULL        |
| 2    | NULL           | 88          |
| 3    | 98             | NULL        |
| 3    | NULL           | 65          |
+------+----------------+-------------+

-- step2:这里用SUM()、AVG()、MAX()、MIN()都可以

SELECT sid, MAX(chinese_score) AS chinese_score, MAX(math_score) AS math_score
FROM (
    SELECT 
        sid,
        CASE subject WHEN '语文' THEN score END AS chinese_score,
        CASE subject WHEN '数学' THEN score END AS math_score
    FROM stu_sc
) AS t
GROUP BY sid;

+------+----------------+-------------+
| sid  | chinese_score  | math_score  |
+------+----------------+-------------+
| 1    | 43             | 55          |
| 2    | 77             | 88          |
| 3    | 98             | 65          |
+------+----------------+-------------+

-- step3

SELECT sid FROM (
    SELECT sid, MAX(chinese_score) AS chinese_score, MAX(math_score) AS math_score
    FROM (
        SELECT 
            sid,
            CASE subject WHEN '语文' THEN score END AS chinese_score,
            CASE subject WHEN '数学' THEN score END AS math_score
        FROM stu_sc
    ) AS t1
    GROUP BY sid) AS t2
WHERE math_score > chinese_score;

+------+
| sid  |
+------+
| 1    |
| 2    |
+------+

案例二:行转列

  • 准备测试表
SET hive.exec.mode.local.auto=true;

CREATE TABLE sales (
  year_num INT COMMENT '年份',
  quarter_num INT COMMENT '季度',
  sales_volume INT COMMENT '销售量'
)
ROW FORMAT DELIMITED FIELDS TERMINATED by ',';

INSERT INTO sales VALUES 
(2021, 1, 100), 
(2021, 2, 200),
(2021, 3, 300),
(2021, 4, 400),
(2022, 1, 900),
(2022, 2, 800),
(2022, 3, 700),
(2022, 4, 600);

SELECT * FROM sales;

+-----------------+--------------------+---------------------+
| sales.year_num  | sales.quarter_num  | sales.sales_volume  |
+-----------------+--------------------+---------------------+
| 2021            | 1                  | 100                 |
| 2021            | 2                  | 200                 |
| 2021            | 3                  | 300                 |
| 2021            | 4                  | 400                 |
| 2022            | 1                  | 900                 |
| 2022            | 2                  | 800                 |
| 2022            | 3                  | 700                 |
| 2022            | 4                  | 600                 |
+-----------------+--------------------+---------------------+
  • 要求,输出如下数据
year_numsales_volume_q1sales_volume_q2sales_volume_q3sales_volume_q4
2021100200300400
2022900800700600
  • 实现
SELECT 
    year_num,
    MAX(sales_volume_q1) AS sales_volume_q1,
    MAX(sales_volume_q2) AS sales_volume_q2,
    MAX(sales_volume_q3) AS sales_volume_q3,
    MAX(sales_volume_q4) AS sales_volume_q4
FROM (
    SELECT 
        year_num, 
        CASE WHEN quarter_num = 1 THEN sales_volume END AS sales_volume_q1, 
        CASE WHEN quarter_num = 2 THEN sales_volume END AS sales_volume_q2, 
        CASE WHEN quarter_num = 3 THEN sales_volume END AS sales_volume_q3, 
        CASE WHEN quarter_num = 4 THEN sales_volume END AS sales_volume_q4 
    FROM sales
) AS t
GROUP BY year_num;

案例三:列转行

  • 准备测试表
SET hive.exec.mode.local.auto=true;

CREATE TABLE score (
  sid INT COMMENT '学生ID',
  chinese_score INT COMMENT '语文成绩',
  math_score INT COMMENT '数学成绩',
  english_score INT COMMENT '英语成绩'
)
ROW FORMAT DELIMITED FIELDS TERMINATED by ',';

INSERT INTO score VALUES 
(1, 58, 34, 76), 
(2, 87, 45, 45),
(3, 34, 76, 89);

SELECT * FROM score;

+------------+----------------------+-------------------+----------------------+
| score.sid  | score.chinese_score  | score.math_score  | score.english_score  |
+------------+----------------------+-------------------+----------------------+
| 1          | 58                   | 34                | 76                   |
| 2          | 87                   | 45                | 45                   |
| 3          | 34                   | 76                | 89                   |
+------------+----------------------+-------------------+----------------------+
  • 要求,输出如下数据
sidcoursescore
1Chinese58
1Math34
1English76
2Chinese87
2Math45
2English45
3Chinese34
3Math76
3English89
  • 实现
SELECT sid, 'Chinese' AS course, chinese_score AS score FROM score
UNION
SELECT sid, 'Math' AS course, math_score AS score FROM score
UNION
SELECT sid, 'English' AS course, english_score AS score FROM score
ORDER by sid, course;

案例四:行转列

准备测试表

SET hive.exec.mode.local.auto=true;

CREATE TABLE person_info (
  name STRING COMMENT '姓名',
  constellation STRING COMMENT '星座',
  blood_type STRING COMMENT '血型'
)
ROW FORMAT DELIMITED FIELDS TERMINATED by ',';

INSERT INTO person_info VALUES 
('孙悟空', '白羊座', 'A'),
('大海', '射手座', 'A'),
('宋宋', '白羊座', 'B'),
('猪八戒', '白羊座', 'A'),
('凤姐', '射手座', 'A'),
('老马', '白羊座', 'B');

SELECT * FROM person_info;

+-------------------+----------------------------+-------------------------+
| person_info.name  | person_info.constellation  | person_info.blood_type  |
+-------------------+----------------------------+-------------------------+
| 悟空               | 白羊座                      | A                       |
| 大海               | 射手座                      | A                       |
| 宋宋               | 白羊座                      | B                       |
| 八戒               | 白羊座                      | A                       |
| 凤姐               | 射手座                      | A                       |
| 老马               | 白羊座                      | B                       |
+-------------------+----------------------------+-------------------------+
  • 要求,输出如下数据
constellation_blood_typeperson
射手座,A大海|凤姐
射手座,A悟空|八戒
白羊座,B宋宋|老马
  • 实现
SELECT constellation_blood_type, CONCAT_WS('|', COLLECT_SET(name)) AS person
FROM (
    SELECT name, CONCAT_WS(',', constellation, blood_type) AS constellation_blood_type
    FROM person_info
) AS t
GROUP BY constellation_blood_type;