练习:
一 将下列数据加载hive表。
字段:员工id,员工名字,工作岗位,部门经理,受雇日期,薪水,奖金,部门编号
7369,SMITH,CLERK,7902,1980-12-17,800,null,20
7499,ALLEN,SALESMAN,7698,1981-02-20,1600,300,30
7521,WARD,SALESMAN,7698,1981-02-22,1250,500,30
7566,JONES,MANAGER,7839,1981-04-02,2975,null,20
7654,MARTIN,SALESMAN,7698,1981-09-28,1250,1400,30
7698,BLAKE,MANAGER,7839,1981-05-01,2850,null,30
7782,CLARK,MANAGER,7839,1981-06-09,2450,null,10
7788,SCOTT,ANALYST,7566,1987-04-19,3000,null,20
7839,KING,PRESIDENT,null,1981-11-17,5000,null,10
7844,TURNER,SALESMAN,7698,1981-09-08,1500,0,30
7876,ADAMS,CLERK,7788,1987-05-23,1100,null,20
7900,JAMES,CLERK,7698,1981-12-03,950,null,30
7902,FORD,ANALYST,7566,1981-12-03,3000,null,20
7934,MILLER,CLERK,7782,1982-01-23,1300,null,10
将dept.txt数据插入表字段(DEPTNO、DNAME、LOC)
10,ACCOUNTING,shanghai
20,RESEARCH,DALLAS
30,SALES,CHICAGO
40,OPERATIONS,BOSTON
//创建 员工表
create table if not exists employee(
id int,
name string,
job string,
manager string,
employed_data string,
salary int,
bouns string,
department int
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ',';
//创建 部门表
create table if not exists dept(
deptno int,
dname string,
loc string
)
row format delimited fields terminated by ',';
二:使用hive -e的方式完成下面需求:
1. 列出至少有一个员工的所有部门降序排列。
select job,count(*) num from employee group by job having num >1 order by num desc;
2. 列出薪金比“SMITH”多的所有员工。
select salary from employee where name='SMITH';
select * from employee where salary > (select salary from employee where name='SMITH');
select e.id,e.name,e.job,e.salary from (select id,name,job,salary,1 as cid from employee) e left join
(select salary,1 as cid from employee where name='SMITH') s on e.cid = s.cid where e.salary > s.salary;
3. 列出所有员工的姓名及其直接上级的姓名。
select e.name,ee.name from employee e join employee ee on e.manager = ee.id;
4. 列出受雇日期早于其直接上级的所有员工。
select e.* from employee e join employee ee on e.manager = ee.id where e.employed_data < ee.employed_data;
5. 列出部门名称和这些部门的员工信息,同时列出那些没有员工的部门。
select e.*,d.dname from dept d left join employee e on d.deptno=e.department;
6. 列出所有“JAMES”(办事员)的姓名及其部门名称。
select * from (select * from employee where name ='JAMES') e join dept d on e.department = d.deptno;
7. 列出最低薪金大于1500的各种工作。
//select distinct job from employee where salary+if(bouns='null',0,bouns)>1500;
select job,min(salary+if(bouns='null',0,bouns)) sala_bouns from employee group by job having sala_bouns > 1500;
8. 列出在部门“SALES”(销售部)工作的员工的姓名,假定不知道销售部的部门编号
select * from employee e join dept d on e.department = d.deptno where d.dname = 'SALES';
9. 列出薪金高于公司平均薪金的所有员工。
-- 平均薪水
select avg(salary+if(bouns='null',0,bouns)) avg_money,1 cid from employee;
--高于平均水平的
select e.id,e.name,e.job,e.manager,e.salary,e.bouns from (
select id,name,job,manager,employed_data,salary,bouns,department,salary+if(bouns='null',0,bouns) money,1 cid from employee
) e left join
(select avg(salary+if(bouns='null',0,bouns)) avg_money,1 cid from employee) as ee on e.cid = ee.cid where e.money > ee.avg_money;
10.列出与“SCOTT”从事相同工作的所有员工。
--求‘SCOTT的工作’
select job from employee where name = 'SCOTT';
--从事相同工作的所有员工
错的:select * from employee where job = (select job from employee where name = 'SCOTT') a and name != 'SCOTT';
select e.id,e.name,e.job from (select id,name,job,1 as cid from employee)
as e join (select job,1 as cid from employee where name ='SCOTT') as ee on e.cid =ee.cid
where e.job = ee.job and e.name !='SCOTT';
11.列出薪金等于部门30中员工的薪金的所有员工的姓名和薪金。
错的:select * from employee where salary in (select salary from employee where department = 30);
select ee.* from (select salary sal from employee where department = 30) as e left join employee ee on e.sal = ee.salary;
12.列出薪金高于在部门30工作的所有员工的薪金的员工姓名和薪金。
select e.name,e.salary from (select name,salary,1 as cid from employee) as e join
(select max(salary) sal,1 as cid from employee where department = 30) as ee
on e.cid = ee.cid where e.salary>ee.sal;
13.列出在每个部门工作的员工数量、平均工资和平均服务期限。
select department,count(name) as name ,avg(salary) as avg_sal,round(avg(datediff(current_timestamp,employed_data)),1) from employee group by department;
14.列出所有员工的姓名、部门名称和工资。
select e.name,d.dname,e.salary from employee e join dept d on e.department = d.deptno;
15.列出所有部门的详细信息和部门人数。
select d.deptno,d.dname,d.loc,e.num from (select department,count(*) as num from employee group by department) e join dept d on e.department = d.deptno;
16.列出各种工作的最低工资。
select job,min(salary) money from employee group by job;
17.列出各个部门的 MANAGER(经理)的最低薪金。
select department,min(salary) money from employee where job = 'MANAGER' group by department;
18.列出所有员工的年工资,按年薪从低到高排序。
select id,department*12 from employee;
19\. 列出每个部门薪水前两名最高的人员名称以及薪水
--窗口函数 row_number 打行号
select department,name,salary,row_number() over(partition by department order by salary desc) num from employee;
select department,name,salary,rn from (
select department,name,salary,row_number() over(partition by department order by salary desc) rn from employee
) as e where e.rn <=2;
20\. 列出每个员工从受雇开始到2018-12-12 为止共受雇了多少天。
select id,name,datediff('2018-12-12 24',employed_data) from employee;练习:
一 将下列数据加载hive表。
字段:员工id,员工名字,工作岗位,部门经理,受雇日期,薪水,奖金,部门编号
7369,SMITH,CLERK,7902,1980-12-17,800,null,20
7499,ALLEN,SALESMAN,7698,1981-02-20,1600,300,30
7521,WARD,SALESMAN,7698,1981-02-22,1250,500,30
7566,JONES,MANAGER,7839,1981-04-02,2975,null,20
7654,MARTIN,SALESMAN,7698,1981-09-28,1250,1400,30
7698,BLAKE,MANAGER,7839,1981-05-01,2850,null,30
7782,CLARK,MANAGER,7839,1981-06-09,2450,null,10
7788,SCOTT,ANALYST,7566,1987-04-19,3000,null,20
7839,KING,PRESIDENT,null,1981-11-17,5000,null,10
7844,TURNER,SALESMAN,7698,1981-09-08,1500,0,30
7876,ADAMS,CLERK,7788,1987-05-23,1100,null,20
7900,JAMES,CLERK,7698,1981-12-03,950,null,30
7902,FORD,ANALYST,7566,1981-12-03,3000,null,20
7934,MILLER,CLERK,7782,1982-01-23,1300,null,10
将dept.txt数据插入表字段(DEPTNO、DNAME、LOC)
10,ACCOUNTING,shanghai
20,RESEARCH,DALLAS
30,SALES,CHICAGO
40,OPERATIONS,BOSTON
//创建 员工表
create table if not exists employee(
id int,
name string,
job string,
manager string,
employed_data string,
salary int,
bouns string,
department int
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ',';
//创建 部门表
create table if not exists dept(
deptno int,
dname string,
loc string
)
row format delimited fields terminated by ',';
二:使用hive -e的方式完成下面需求:
1. 列出至少有一个员工的所有部门降序排列。
select job,count(*) num from employee group by job having num >1 order by num desc;
2. 列出薪金比“SMITH”多的所有员工。
select salary from employee where name='SMITH';
select * from employee where salary > (select salary from employee where name='SMITH');
select e.id,e.name,e.job,e.salary from (select id,name,job,salary,1 as cid from employee) e left join
(select salary,1 as cid from employee where name='SMITH') s on e.cid = s.cid where e.salary > s.salary;
3. 列出所有员工的姓名及其直接上级的姓名。
select e.name,ee.name from employee e join employee ee on e.manager = ee.id;
4. 列出受雇日期早于其直接上级的所有员工。
select e.* from employee e join employee ee on e.manager = ee.id where e.employed_data < ee.employed_data;
5. 列出部门名称和这些部门的员工信息,同时列出那些没有员工的部门。
select e.*,d.dname from dept d left join employee e on d.deptno=e.department;
6. 列出所有“JAMES”(办事员)的姓名及其部门名称。
select * from (select * from employee where name ='JAMES') e join dept d on e.department = d.deptno;
7. 列出最低薪金大于1500的各种工作。
//select distinct job from employee where salary+if(bouns='null',0,bouns)>1500;
select job,min(salary+if(bouns='null',0,bouns)) sala_bouns from employee group by job having sala_bouns > 1500;
8. 列出在部门“SALES”(销售部)工作的员工的姓名,假定不知道销售部的部门编号
select * from employee e join dept d on e.department = d.deptno where d.dname = 'SALES';
9. 列出薪金高于公司平均薪金的所有员工。
-- 平均薪水
select avg(salary+if(bouns='null',0,bouns)) avg_money,1 cid from employee;
--高于平均水平的
select e.id,e.name,e.job,e.manager,e.salary,e.bouns from (
select id,name,job,manager,employed_data,salary,bouns,department,salary+if(bouns='null',0,bouns) money,1 cid from employee
) e left join
(select avg(salary+if(bouns='null',0,bouns)) avg_money,1 cid from employee) as ee on e.cid = ee.cid where e.money > ee.avg_money;
10.列出与“SCOTT”从事相同工作的所有员工。
--求‘SCOTT的工作’
select job from employee where name = 'SCOTT';
--从事相同工作的所有员工
错的:select * from employee where job = (select job from employee where name = 'SCOTT') a and name != 'SCOTT';
select e.id,e.name,e.job from (select id,name,job,1 as cid from employee)
as e join (select job,1 as cid from employee where name ='SCOTT') as ee on e.cid =ee.cid
where e.job = ee.job and e.name !='SCOTT';
11.列出薪金等于部门30中员工的薪金的所有员工的姓名和薪金。
错的:select * from employee where salary in (select salary from employee where department = 30);
select ee.* from (select salary sal from employee where department = 30) as e left join employee ee on e.sal = ee.salary;
12.列出薪金高于在部门30工作的所有员工的薪金的员工姓名和薪金。
select e.name,e.salary from (select name,salary,1 as cid from employee) as e join
(select max(salary) sal,1 as cid from employee where department = 30) as ee
on e.cid = ee.cid where e.salary>ee.sal;
13.列出在每个部门工作的员工数量、平均工资和平均服务期限。
select department,count(name) as name ,avg(salary) as avg_sal,round(avg(datediff(current_timestamp,employed_data)),1) from employee group by department;
14.列出所有员工的姓名、部门名称和工资。
select e.name,d.dname,e.salary from employee e join dept d on e.department = d.deptno;
15.列出所有部门的详细信息和部门人数。
select d.deptno,d.dname,d.loc,e.num from (select department,count(*) as num from employee group by department) e join dept d on e.department = d.deptno;
16.列出各种工作的最低工资。
select job,min(salary) money from employee group by job;
17.列出各个部门的 MANAGER(经理)的最低薪金。
select department,min(salary) money from employee where job = 'MANAGER' group by department;
18.列出所有员工的年工资,按年薪从低到高排序。
select id,department*12 from employee;
19\. 列出每个部门薪水前两名最高的人员名称以及薪水
--窗口函数 row_number 打行号
select department,name,salary,row_number() over(partition by department order by salary desc) num from employee;
select department,name,salary,rn from (
select department,name,salary,row_number() over(partition by department order by salary desc) rn from employee
) as e where e.rn <=2;
20\. 列出每个员工从受雇开始到2018-12-12 为止共受雇了多少天。
select id,name,datediff('2018-12-12 24',employed_data) from employee;