1. 數據說明
(1) student表
hive> select * from student;
# 學生ID 學生姓名 性別 年齡 所在系
# sid sname sex age dept
95002 Aiden female 19 IS
95017 Jacob female 18 IS
95018 Ethan female 19 IS
95013 Matthew male 21 CS
95014 Nicholas female 19 CS
......
(2) course表
hive> select * from course;
# 課程ID 課程名稱
# cid cname
1 Chinese
2 Math
3 English
4 Physics
5 Chemistry
6 Biology
(3) sc表
hive> select * from sc;
# 學生ID 課程ID 成績
# sid cid score
95001 1 81
95001 2 85
95001 3 88
95001 4 70
95002 2 90
......
2. SQL查詢練習題目
(1) 基本練習
<1> 查詢全體學生的學號與姓名
select sid, sname from student;
<2> 查詢選修了課程的學生姓名
select distinct sid from sc;
(2) hive的group by和集合函數
<1> 查詢學生的總人數
select count(*) from student;
<2> 計算1號課程的學生平均成績
select cid, avg(score) from sc
where cid = '1'
group by cid;
<3> 查詢各科成績平均分
select c.cname, tmp.avg_score from
course c join
(select cid, avg(score) avg_score from sc
group by cid) tmp
on c.cid = tmp.cid;
/*
Chinese 83.66666666666667
Math 88.66666666666667
English 81.46153846153847
Physics 83.125
Chemistry 85.0
Biology 89.45454545454545
*/
<4> 查詢1號課程的最高分數
select cid, max(score) max_score from sc
where cid = '1'
group by cid;
/*
1 98
*/
<5> 求各個課程號及相應的選課人數
select cid, count(distinct sid) count_sid from sc
group by cid;
/*
1 15
2 15
3 13
4 16
5 12
6 11
*/
<6> 查詢選修了3門以上的課程的學生學號
select sid, count(distinct cid) count_cid from sc
group by sid
having count_cid > 3;
/*
95001 4
95002 4
95004 4
95005 4
95006 6
95007 4
95011 4
95012 4
95013 4
95015 4
95018 4
95019 5
95022 4
*/
(3) hive的order by/sort by/distribute by
<1> 查詢學生信息,結果按學號全局有序
select * from student
order by sid;
<2> 查詢學生信息,結果區分性別按年齡有序
set mapred.reduce.tasks=2;(默認-1)
select * from student
distribute by sex
sort by age;
/*
95009 Alexande female 18 MA
95017 Jacob female 18 IS
95008 Zachary female 18 CS
95014 Nicholas female 19 CS
95019 Jack female 19 IS
95018 Ethan female 19 IS
95002 Aiden female 19 IS
95007 Jaden female 19 MA
95012 Andrew female 20 CS
95003 Michael female 22 MA
95021 Connor male 17 MA
95005 Tyler male 18 MA
95011 Noah male 18 MA
95015 Jackson male 18 MA
95010 Caden male 19 CS
95004 Ryan male 19 IS
95022 Logan male 20 MA
95001 Caleb male 20 CS
95020 Joshua male 21 IS
95013 Matthew male 21 CS
95016 Brayden male 21 MA
95006 Dylan male 23 CS
*/
(4) join查詢
<1> 查詢每個學生及其選修課程的情況
select s.sname, c.cname from
student s join sc on s.sid = sc.sid
join course c on c.cid = sc.cid;
/*
Caleb Chinese
Caleb Math
Caleb English
Caleb Physics
Aiden Math
Aiden English
Aiden Physics
Aiden Chemistry
Michael Chinese
Michael English
......
*/
<2> 查詢學生的得分情況
select s.sname, c.cname, sc.score from
student s join sc on s.sid = sc.sid
join course c on c.cid = sc.cid;
/*
Noah Chinese 81
Noah Math 91
Noah English 81
Noah Physics 86
Andrew Chinese 81
Andrew English 78
Andrew Physics 85
Andrew Biology 98
......
*/
<3> 查詢選修2號課程且成績在90分以上的所有學生
select s.sname, sc.cid, sc.score from
student s join sc on s.sid = sc.sid
where sc.cid = '2' and sc.score > 90;
/*
Ryan 2 92
Tyler 2 92
Caden 2 98
Noah 2 91
Nicholas 2 100
Brayden 2 99
Ethan 2 100
Joshua 2 99
Connor 2 93
*/
<4> 查詢所有學生的信息,如果在成績表中有成績,則輸出成績表中的課程號
select s.*, sc.cid from
student s left join sc
on s.sid = sc.sid;
/*
......
95015 Jackson male 18 MA 1
95015 Jackson male 18 MA 3
95015 Jackson male 18 MA 4
95015 Jackson male 18 MA 6
95016 Brayden male 21 MA 1
95016 Brayden male 21 MA 2
95016 Brayden male 21 MA 4
*/
(5) LEFT SEMI JOIN
查詢與"Jackson"在同一個系學習的學生
select student.sname, student.dept from student left semi join
(select dept from student where sname = 'Jackson') tmp
on student.dept = tmp.dept;
/*
Michael MA
Tyler MA
Jaden MA
Connor MA
Logan MA
Noah MA
Alexande MA
Jackson MA
Brayden MA
*/