案例一:行转列
- 准备学生成绩表
SET hive.exec.mode.local.auto=true;
CREATE TABLE stu_sc (
sid INT,
subject STRING,
score INT
)
ROW FORMAT DELIMITED FIELDS TERMINATED by ',';
INSERT INTO stu_sc VALUES
(1, '语文', 43),
(1, '数学', 55),
(2, '语文', 77),
(2, '数学', 88),
(3, '语文', 98),
(3, '数学', 65);
SELECT * FROM stu_sc;
+-------------+-----------------+---------------+
| stu_sc.sid | stu_sc.subject | stu_sc.score |
+-------------+-----------------+---------------+
| 1 | 语文 | 43 |
| 1 | 数学 | 55 |
| 2 | 语文 | 77 |
| 2 | 数学 | 88 |
| 3 | 语文 | 98 |
| 3 | 数学 | 65 |
+-------------+-----------------+---------------+
-
要求:统计数学成绩比语文成绩好的学生ID
-
方法一:自连接
SELECT s1.sid
FROM stu_sc AS s1 JOIN stu_sc AS s2 ON s1.sid = s2.sid
AND s1.subject = '语文'
AND s2.subject = '数学'
AND s2.score > s1.score;
- 方法二:行列转换
将表转换为以下样式:
| sid | chinese_score | math_score |
|---|---|---|
| 1 | 43 | 55 |
| 2 | 77 | 88 |
| 3 | 98 | 65 |
-- step1
SELECT
sid,
CASE subject WHEN '语文' THEN score END AS chinese_score,
CASE subject WHEN '数学' THEN score END AS math_score
FROM stu_sc;
+------+----------------+-------------+
| sid | chinese_score | math_score |
+------+----------------+-------------+
| 1 | 43 | NULL |
| 1 | NULL | 55 |
| 2 | 77 | NULL |
| 2 | NULL | 88 |
| 3 | 98 | NULL |
| 3 | NULL | 65 |
+------+----------------+-------------+
-- step2:这里用SUM()、AVG()、MAX()、MIN()都可以
SELECT sid, MAX(chinese_score) AS chinese_score, MAX(math_score) AS math_score
FROM (
SELECT
sid,
CASE subject WHEN '语文' THEN score END AS chinese_score,
CASE subject WHEN '数学' THEN score END AS math_score
FROM stu_sc
) AS t
GROUP BY sid;
+------+----------------+-------------+
| sid | chinese_score | math_score |
+------+----------------+-------------+
| 1 | 43 | 55 |
| 2 | 77 | 88 |
| 3 | 98 | 65 |
+------+----------------+-------------+
-- step3
SELECT sid FROM (
SELECT sid, MAX(chinese_score) AS chinese_score, MAX(math_score) AS math_score
FROM (
SELECT
sid,
CASE subject WHEN '语文' THEN score END AS chinese_score,
CASE subject WHEN '数学' THEN score END AS math_score
FROM stu_sc
) AS t1
GROUP BY sid) AS t2
WHERE math_score > chinese_score;
+------+
| sid |
+------+
| 1 |
| 2 |
+------+
案例二:行转列
- 准备测试表
SET hive.exec.mode.local.auto=true;
CREATE TABLE sales (
year_num INT COMMENT '年份',
quarter_num INT COMMENT '季度',
sales_volume INT COMMENT '销售量'
)
ROW FORMAT DELIMITED FIELDS TERMINATED by ',';
INSERT INTO sales VALUES
(2021, 1, 100),
(2021, 2, 200),
(2021, 3, 300),
(2021, 4, 400),
(2022, 1, 900),
(2022, 2, 800),
(2022, 3, 700),
(2022, 4, 600);
SELECT * FROM sales;
+-----------------+--------------------+---------------------+
| sales.year_num | sales.quarter_num | sales.sales_volume |
+-----------------+--------------------+---------------------+
| 2021 | 1 | 100 |
| 2021 | 2 | 200 |
| 2021 | 3 | 300 |
| 2021 | 4 | 400 |
| 2022 | 1 | 900 |
| 2022 | 2 | 800 |
| 2022 | 3 | 700 |
| 2022 | 4 | 600 |
+-----------------+--------------------+---------------------+
- 要求,输出如下数据
| year_num | sales_volume_q1 | sales_volume_q2 | sales_volume_q3 | sales_volume_q4 |
|---|---|---|---|---|
| 2021 | 100 | 200 | 300 | 400 |
| 2022 | 900 | 800 | 700 | 600 |
- 实现
SELECT
year_num,
MAX(sales_volume_q1) AS sales_volume_q1,
MAX(sales_volume_q2) AS sales_volume_q2,
MAX(sales_volume_q3) AS sales_volume_q3,
MAX(sales_volume_q4) AS sales_volume_q4
FROM (
SELECT
year_num,
CASE WHEN quarter_num = 1 THEN sales_volume END AS sales_volume_q1,
CASE WHEN quarter_num = 2 THEN sales_volume END AS sales_volume_q2,
CASE WHEN quarter_num = 3 THEN sales_volume END AS sales_volume_q3,
CASE WHEN quarter_num = 4 THEN sales_volume END AS sales_volume_q4
FROM sales
) AS t
GROUP BY year_num;
案例三:列转行
- 准备测试表
SET hive.exec.mode.local.auto=true;
CREATE TABLE score (
sid INT COMMENT '学生ID',
chinese_score INT COMMENT '语文成绩',
math_score INT COMMENT '数学成绩',
english_score INT COMMENT '英语成绩'
)
ROW FORMAT DELIMITED FIELDS TERMINATED by ',';
INSERT INTO score VALUES
(1, 58, 34, 76),
(2, 87, 45, 45),
(3, 34, 76, 89);
SELECT * FROM score;
+------------+----------------------+-------------------+----------------------+
| score.sid | score.chinese_score | score.math_score | score.english_score |
+------------+----------------------+-------------------+----------------------+
| 1 | 58 | 34 | 76 |
| 2 | 87 | 45 | 45 |
| 3 | 34 | 76 | 89 |
+------------+----------------------+-------------------+----------------------+
- 要求,输出如下数据
| sid | course | score |
|---|---|---|
| 1 | Chinese | 58 |
| 1 | Math | 34 |
| 1 | English | 76 |
| 2 | Chinese | 87 |
| 2 | Math | 45 |
| 2 | English | 45 |
| 3 | Chinese | 34 |
| 3 | Math | 76 |
| 3 | English | 89 |
- 实现
SELECT sid, 'Chinese' AS course, chinese_score AS score FROM score
UNION
SELECT sid, 'Math' AS course, math_score AS score FROM score
UNION
SELECT sid, 'English' AS course, english_score AS score FROM score
ORDER by sid, course;
案例四:行转列
准备测试表
SET hive.exec.mode.local.auto=true;
CREATE TABLE person_info (
name STRING COMMENT '姓名',
constellation STRING COMMENT '星座',
blood_type STRING COMMENT '血型'
)
ROW FORMAT DELIMITED FIELDS TERMINATED by ',';
INSERT INTO person_info VALUES
('孙悟空', '白羊座', 'A'),
('大海', '射手座', 'A'),
('宋宋', '白羊座', 'B'),
('猪八戒', '白羊座', 'A'),
('凤姐', '射手座', 'A'),
('老马', '白羊座', 'B');
SELECT * FROM person_info;
+-------------------+----------------------------+-------------------------+
| person_info.name | person_info.constellation | person_info.blood_type |
+-------------------+----------------------------+-------------------------+
| 悟空 | 白羊座 | A |
| 大海 | 射手座 | A |
| 宋宋 | 白羊座 | B |
| 八戒 | 白羊座 | A |
| 凤姐 | 射手座 | A |
| 老马 | 白羊座 | B |
+-------------------+----------------------------+-------------------------+
- 要求,输出如下数据
| constellation_blood_type | person |
|---|---|
| 射手座,A | 大海|凤姐 |
| 射手座,A | 悟空|八戒 |
| 白羊座,B | 宋宋|老马 |
- 实现
SELECT constellation_blood_type, CONCAT_WS('|', COLLECT_SET(name)) AS person
FROM (
SELECT name, CONCAT_WS(',', constellation, blood_type) AS constellation_blood_type
FROM person_info
) AS t
GROUP BY constellation_blood_type;