本文通过简单易懂的业务场景,旨在提高大家的 SQL 水平。把文中所有的 SQL 全理解了,工作中遇到的各种 SQL 场景和面试中遇到的各种 SQL 问题都能灵活应对。文中所有的 SQL 都支持 Hive 语法,学会了 Hive 的 SQL,那么 MySQL 的 SQL 题也都可以 cover 住,下面的题目难度从简单到困难的都有。笔者在之前练习 SQL 过程中,发现了网上一些比较好的 SQL 题目,但是苦于好多博客提供的 SQL 可读性、规范性、执行效率并不高,因此才有了今天这篇博文,后续如果发现有可优化的的 SQL,笔者会持续更新本博文。
数据表介绍
学生表
create table study.student (
student_id string -- 学生编号
,name string -- 学生姓名
,birthday string -- 学生生日
,sex string -- 学生性别
)
stored as parquet
tblproperties("orc.compress"="snappy");
教师表
create table study.teacher (
teacher_id string -- 教师编号
,name string -- 教师姓名
)
stored as parquet
tblproperties("orc.compress"="snappy");
课程表
create table study.course (
course_id string -- 课程编号
,name string -- 课程名
,teacher_id string -- 课程对应的教师编号
)
stored as parquet
tblproperties("orc.compress"="snappy");
成绩表
create table study.score (
student_id string -- 学生编号
,course_id string -- 课程编号
,score int -- 对应的成绩
)
stored as parquet
tblproperties("orc.compress"="snappy");
答题时请注意:
- 这里认为课程可能是选修课,学生不一定学了所有课程
- 成绩表里的课程都应该在课程表里存在,而且每门课程都应该有对应的教师,且在教师表里可以找到
- 成绩表里的学生也应该在学生表里存在
- 下面的导入数据也是随机写的,读者也可以自己随机制造生成数据导入
导入数据
学生表
insert overwrite table study.student VALUES
('01' , '赵雷' , '1990-01-01' , '男'),
('02' , '钱电' , '1990-12-21' , '男'),
('03' , '孙风' , '1990-12-20' , '男'),
('04' , '李云' , '1990-12-06' , '男'),
('05' , '周梅' , '1991-12-01' , '女'),
('06' , '吴兰' , '1992-01-01' , '女'),
('07' , '郑竹' , '1989-01-01' , '女'),
('09' , '张三' , '2017-12-20' , '女'),
('10' , '李四' , '2017-12-25' , '女'),
('11' , '李四' , '2012-06-06' , '女'),
('12' , '赵六' , '2013-06-13' , '女'),
('13' , '孙七' , '2014-06-01' , '女');
教师表
insert overwrite table study.teacher VALUES
('01' , '张三'),
('02' , '李四'),
('03' , '王五');
课程表
insert overwrite table study.course VALUES
('01' , '语文' , '02'),
('02' , '数学' , '01'),
('03' , '英语' , '03');
成绩表
insert overwrite table study.score VALUES
('01' , '01' , 80),
('01' , '02' , 90),
('01' , '03' , 99),
('02' , '01' , 70),
('02' , '02' , 60),
('02' , '03' , 80),
('03' , '01' , 80),
('03' , '02' , 80),
('03' , '03' , 80),
('04' , '01' , 50),
('04' , '02' , 30),
('04' , '03' , 20),
('05' , '01' , 76),
('05' , '02' , 87),
('06' , '01' , 31),
('06' , '03' , 34),
('07' , '02' , 89),
('07' , '03' , 98);
练习题
-
查询所有教师的全部信息(教师编号和姓名)
-
输出所有学生中男生的全部信息
-
查询所有学生中男生的全部信息,按照生日排降序
-
查询所有学生的全部信息,先按照性别排序,再按照生日排降序
-
求出学生总数
-
查询学生中男生、女生人数
-
参加考试的学生中,查出每个学生的学生编号、选了几门课
-
检索至少选修三门课程的学生学号
-
查询存在不及格的课程编号
-
输出所有课程的课程编号、课程名、对应的教师姓名
-
求每门课程的学生人数及平均分,输出课程编号、对应学生人数、平均分
-
求每门课程的学生人数及平均分,输出课程名称、对应学生人数、平均分
-
查询同时学习" 01 "课程和" 02 "课程的学生编号及01和02课程分数
-
查询" 01 "课程比" 02 "课程成绩高的学生编号及01和02课程分数
-
查询" 01 "课程比" 02 "课程成绩高的学生姓名及01和02课程分数
-
查询选择了 "01"课程但没选择 "02"课程的学生姓名
-
查询学过 '张三' 老师课程的所有同学姓名、生日、性别
-
查询同时学习 "01"、"02"课程学生的学生编号以及"01"和"02"课程成绩
-
查询学习 "01"课程但没有学习 "02"课程学生的学生编号以及"01"课程成绩
-
查询学习 "02"课程但没有学习 "01"课程学生的学生编号以及"02"课程成绩
-
查询选课的同学的学生姓名、选课总数、所有课程的成绩总和、课程平均分
-
查询考试平均分大于 60 分同学的学生姓名、选课总数、所有课程的成绩总和、课程平均分,按照科目数排降序、科目数相同按照分数排降序
-
检索 "01" 课程分数小于 60 分的学生信息及"01" 课程分数,按照分数排降序
-
查询两门及其以上不及格课程的同学的姓名及其平均成绩
-
查询没有学全所有课程的同学的编号 (包含无选课的同学)
-
查询 1990 年出生的学生名单
-
查询名字中含有「风」字的学生信息
-
查询「李」姓老师的数量
-
查询至少有两门课与学号为" 01 "的学生所学相同的学生id
-
查询选修了全部课程的学生id的姓名和姓名
-
查询和" 01 "号的同学学习的课程完全相同的其他同学的学生id
-
统计各科成绩各分数段人数:课程编号,课程名称,[100-85],[85-70],[70-60],[60-0] 及所占百分比
-
查询学生的总成绩,并进行排名
-
查询各科成绩前三名的记录
-
查询出只选修两门课程的学生学号和姓名
-
查询每门课程的平均成绩,结果按平均成绩降序排列,平均成绩相同时,按课程编号升序排列
-
查询平均成绩大于等于 85 的所有学生的学号、姓名和平均成绩
-
查询课程名称为「数学」,且分数低于 60 的学生姓名和分数
-
查询课程编号为 01 且课程成绩在 80 分及以上的学生的学号和姓名
-
查询选修「张三」老师所授课程的学生中,成绩最高的学生编号、课程编号及其成绩
-
统计每门课程的学生选修人数(超过 5 人的课程才统计)。
-
查询该学生有不同课程但成绩相同的学生编号、课程编号、学生成绩
答案(请大家先独立思考后,再参考答案)
-
查询所有教师的全部信息(教师编号和姓名)
select * from study.teacher ;
-
输出所有学生中男生的全部信息
select * from study.student where sex = '男' ;
-
查询所有学生中男生的全部信息,按照生日排降序
select * from study.student where sex = '男' order by birthday desc ;
-
查询所有学生的全部信息,先按照性别排序,再按照生日排降序
select * from study.student order by sex ,birthday desc ;
-
求出学生总数
select count(*) from study.student ;
-
查询学生中男生、女生人数
select sex ,count(*) from study.student group by sex
-
参加考试的学生中,查出每个学生的学生编号、选了几门课
select student_id ,count(*) as course_count from study.score group by student_id ;
-
检索至少选修三门课程的学生学号
select student_id ,count(*) as course_num from score group by student_id having course_num >= 3 ;
-
查询存在不及格的课程编号
-- 写法一 select distinct course_id from score where score < 60 -- 写法二 select course_id from score where score< 60 group by course_id
-
输出所有课程的课程编号、课程名、对应的教师姓名
select course_id ,a.name as course_name ,b.name as teacher_name from ( select course_id ,name ,teacher_id from study.course ) a join ( select teacher_id ,name from study.teacher ) b on a.teacher_id = b.teacher_id
-
求每门课程的学生人数及平均分,输出课程编号、对应学生人数、平均分
select course_id ,count(*) as student_count ,avg(score) as avg_score from score group by course_id
-
求每门课程的学生人数及平均分,输出课程名称、对应学生人数、平均分
select name ,student_count ,avg(score) as avg_score from ( select * from study.course ) a join ( select course_id ,count(*) as student_count from score group by course_id ) b on a.course_id = b.course_id
-
查询同时学习" 01 "课程和" 02 "课程的学生编号及01和02课程分数
select a.student_id ,a.score as score_01 ,b.score as score_02 from ( select * from score where course_id = '01' ) a join ( select * from score where course_id = '02' ) b on a.student_id = b.student_id;
-
查询" 01 "课程比" 02 "课程成绩高的学生编号及01和02课程分数
select a.student_id ,a.score as score_01 ,b.score as score_02 from ( select * from score where course_id = '01' ) a join ( select * from score where course_id = '02' ) b on a.student_id = b.student_id where a.score > b.score
-
查询" 01 "课程比" 02 "课程成绩高的学生姓名及01和02课程分数
- 连续三道题都是有关联的,前两道题给这道题做铺垫,所以当我们拿到一个向这种比较复杂的需求时,可以进行拆分需求,先拆分成1、在做2,最后达到完整的需求
select name ,score_01 ,score_02 from ( select * from student ) a join ( select a.student_id ,a.score as score_01 ,b.score as score_02 from ( select * from score where course_id = '01' ) a join ( select * from score where course_id = '02' ) b on a.student_id = b.student_id where a.score > b.score ) b on a.student_id=b.student_id
-
查询选择了 "01"课程但没选择 "02"课程的学生姓名
- 首先我们先找出选择了 "01"课程但没选择 "02"课程的学生编号,再拿着学生编号关联学生表找到姓名即可
-- 找出选择了 "01"课程但没选择 "02"课程的学生编号 select a.student_id from ( select * from score where course_id = '01' ) a left outer join ( select * from score where course_id = '02' ) b on a.student_id = b.student_id where b.student_id is null
-- 拿着学生编号关联学生表找到姓名 select name from ( select a.student_id as student_id from ( select * from score where course_id = '01' ) a left outer join ( select * from score where course_id = '02' ) b on a.student_id = b.student_id where b.student_id is null ) a join ( select * from student ) b on a.student_id = b.student_id
-
查询学过 '张三' 老师课程的所有同学姓名、生日、性别
- 解题思路
- 找出 '张三' 老师的教师编号
- 找出 '张三' 老师所有教授的课程编号
- 找出这些课程对应的学生编号
- 根据学生编号找到对应的学生信息
- 根据上述四个步骤,依次书写 SQL
select name ,birthday ,sex from ( select student_id from ( select course_id from ( select teacher_id from teacher where name='张三' ) a join ( select course_id ,teacher_id from course ) b on a.teacher_id = b.teacher_id ) a join ( select student_id ,course_id from score ) b on a.course_id = b.course_id group by student_id ) a join ( select * from student ) b on a.student_id = b.student_id
- 解题思路
-
查询同时学习 "01"、"02"课程学生的学生编号以及"01"和"02"课程成绩
select a.student_id as student_id ,a.score as score_01 ,b.score as score_02 from ( select * from score where course_id = '01' ) a join ( select * from score where course_id = '02' ) b on a.student_id = b.student_id;
-
查询学习 "01"课程但没有学习 "02"课程学生的学生编号以及"01"课程成绩
select a.student_id as student_id ,a.score as score_01 from ( select * from score where course_id = '01' ) a left outer join ( select * from score where course_id = '02' ) b on a.student_id = b.student_id where b.student_id is null
-
查询学习 "02"课程但没有学习 "01"课程学生的学生编号以及"02"课程成绩
select b.student_id as student_id ,b.score as score_02 from ( select * from score where course_id = '01' ) a right outer join ( select * from score where course_id = '02' ) b on a.student_id = b.student_id where a.student_id is null
-
查询选课的同学的学生姓名、选课总数、所有课程的成绩总和、课程平均分
select a.name as name ,course_num ,score_sum ,score_avg from ( select student_id ,name from student ) a join ( select student_id ,count(course_id) as course_num ,sum(score) as score_sum ,avg(score) as score_avg from score group by student_id ) b on a.student_id=b.student_id order by course_num desc ,score_sum desc
-
查询考试平均分大于 60 分同学的学生姓名、选课总数、所有课程的成绩总和、课程平均分,按照科目数排降序、科目数相同按照分数排降序
select a.name as name ,course_num ,score_sum ,score_avg from ( select student_id ,name from student ) a join ( select student_id ,count(course_id) as course_num ,sum(score) as score_sum ,avg(score) as score_avg from score group by student_id ) b on a.student_id=b.student_id where score_avg > 60 order by course_num desc ,score_sum desc
-
检索 "01" 课程分数小于 60 分的学生信息及"01" 课程分数,按照分数排降序
select a.student_id as student_id ,name ,birthday ,sex ,score from ( select * from student ) a join ( select student_id ,score from score where course_id='01' and score < 60 ) b on a.student_id = b.student_id order by score desc
-
查询两门及其以上不及格课程的同学的姓名及其平均成绩
select name ,score_avg from ( select * from student ) a join ( select student_id ,score_avg from ( select student_id ,avg(score) as score_avg ,count(case when score < 60 then 1 end) as fail_count from score group by student_id ) a where fail_count > 1 ) b on a.student_id = b.student_id ;
-
查询没有学全所有课程的同学的编号 (包含无选课的同学)
-在成绩表中找出不满足课程总数的学生id -找出学生表中无成绩的学生id -二者之和
select b.student_id ( select count(*) as course_num from score ) a join ( select student_id ,count(*) as course_num from score ) b on a.course_num = b.course_num union ( select a.student_id from ( select * from student ) a left join ( select * from score ) b on a.student_id = b.student_id where course_id is null ) b
-
查询 1990 年出生的学生名单
select * from student where substr(birthday,1,4)='1990'
-
查询名字中含有「风」字的学生信息
select * from student where name like '%风%'
-
查询「李」姓老师的数量
select count(*) from teacher where name like '李%'
-
查询至少有两门课与学号为" 01 "的学生所学相同的学生id
select a.student_id from ( select b.student_id as student_id ,b.course_id as course_id from ( select course_id from score where student_id='01' ) a join ( select student_id ,course_id from score where student_id <> '01' ) b on a.course_id = b.course_id ) a group by a.student_id having count(course_id)>1;
-
查询选修了全部课程的学生id的姓名和姓名
select b.student_id as student_id ,b.name as name ,a.course_num as course_num from ( select student_id ,a.course_num as course_num from ( select student_id ,count(course_id) as course_num from course ) a join ( select student_id ,count(*) as course_num from score group by student_id ) b on a.course_num = b.course_num ) a join ( select * from student ) b on a.student_id = b.student_id
-
查询和" 01 "号的同学学习的课程完全相同的其他同学的学生id
- 先选出‘01’同学所学的课程id
- 然后选出学过这些课程id的学生id及其选过这些课程数的课程总数
- 匹配“01”同学学的课程总数(存在某同学学的课程超过“01”同学学的课程)
- 匹配成绩表中学生的课程总数
select student_id from ( select student_id ,count(course_id) as course_num from ( select student_id ,course_id from score where student_id <> '01' and course_id in ( select course_id from score where student_id = '01' ) ) a group by student_id ) a join ( select count(course_id) as course_num from score where student_id = '01' ) b on a.course_num = b.course_num join ( select student_id ,count(*) as num_course from score group by student_id ) c on a.course_num = c.course_num
-
统计各科成绩各分数段人数:课程编号,课程名称,[100-85],[85-70],[70-60],[60-0] 及所占百分比
-根据课程号在成绩表中查询分数段的个数除以该学习该课程的总数
select a.course_id as course_id ,b.name as name ,round(sum(case when score > 85 then 1 else 0 end)/count(*),2) as '100-85' ,round(sum(case when score between 70 and 84 then 1 else 0 end)/count(*),2) as '85-70' ,round(sum(case when score between 60 and 69 then 1 else 0 end)/count(*),2) as '70-60' ,round(sum(case when score < 60 then 1 else 0 end)/count(*),2) as '60-0' from ( select course_id ,score from score ) a join ( select course_id ,name from course ) b on a.course_id = b.course_id group by a.course_id ,b.name
-
查询学生的总成绩,并进行排名
select student_id ,sum(score) score_sum ,row_number() over(order by sum(score) desc) as rank from score group by student_id
-
查询各科成绩前三名的记录
select * from( select * ,row_number() over(partition by course_id order by score desc) as rank from score ) a where rank <= 3
-
查询出只选修两门课程的学生学号和姓名
select a.student_id as student_id ,a.name as name from ( select student_id ,name from student ) a join ( select student_id from score group by student_id having count(course_id) = 2 ) b on a.student_id = b.student_id;
-
查询每门课程的平均成绩,结果按平均成绩降序排列,平均成绩相同时,按课程编号升序排列
select course_id ,avg(score) as score_avg from score order by score_avg desc ,course_id asc
-
查询平均成绩大于等于 85 的所有学生的学号、姓名和平均成绩
select a.student_id as student_id ,a.name as name ,b.score_avg from ( select student_id ,name from student ) a join ( select student_id ,avg(score) as score_avg from score group by student_id having avg(score) >= 85 ) b on a.student_id = b.student_id
-
查询课程名称为「数学」,且分数低于 60 的学生姓名和分数
select name ,score from ( select student_id ,score from ( select course_id from course where name = '数学' ) a join ( select student_id ,course_id ,score from score where score < 60 ) b on a.course_id = b.course_id ) a join ( select student_id ,name from student ) b on a.student_id = b.student_id
-
查询课程编号为 01 且课程成绩在 80 分及以上的学生的学号和姓名
select a.student_id as student_id ,name from ( select student_id from score where course_id = '01' and score >= 80 ) a join ( select student_id ,name from student ) b on a.student_id = b.student_id
-
查询选修「张三」老师所授课程的学生中,成绩最高的学生编号、课程编号及其成绩
select student_id ,a.course_id as course_id ,score from ( select course_id from ( select course_id ,teacher_id from course ) a join ( select teacher_id from teacher where name = '张三' ) b on a.teacher_id = b.teacher_id ) a join ( select student_id ,course_id ,score from score ) b on a.course_id = b.course_id order by score desc limit 1 ;
-
统计每门课程的学生选修人数(超过 5 人的课程才统计)。
select a.course_id as course_id ,b.name ,a.num as num from ( select course_id ,count(*) as num from score group by course_id having count(*) >= 5 ) a join ( select course_id ,name from course ) b on a.course_id =b.course_id
-
查询该学生有不同课程但成绩相同的学生编号、课程编号、学生成绩
select a.student_id as student_id ,a.course_id as course_id ,a.score as score from ( select student_id ,course_id ,score from score ) a join ( select student_id ,course_id ,score from score ) b on a.student_id = b.student_id and a.score = b.score and a.course_id <> b.course_id group by student_id ,course_id ,score
引用: