配置hive時出現不能加載自己修改的hive-site.xml等配置文件的問題。發現它總是加載默認的配置文件。
解決:
hadoop的配置文件hadoop-env.sh中加上export HADOOP_CLASSPATH=$HIVE_HOVE/conf:$HADOOP_CLASSPATH
還有一個問題:運行其他hadoop子項目時總會出現找不到類文件,必須把相關jar包拷貝到hadoop的lib目錄下,導致其lib目錄會越來越大。
至今不知道怎樣將其他jar包加入到classpath中,網上說用export HADOOP_CLASSPATH=“”,但是好像不行
hive --config /root/etc/hive(注:/root/etc/hive是指hive-site.xml的存放目錄)
HiveQL以分號結束。可以跨行。
在hive的shell上可以使用dfs命令執行HDFS文件操作。
dfs -ls /user/hive/warehouse;
hive語句必須以分號“;”結束。
不支持更新,索引和事務。
表名,列名不區分大小寫。
在hive的shell上可以使用dfs命令執行HDFS文件的操作。>>dfs -ls /user/hive/warehouse/;
查看和設置臨時變量:>>set fs.default.name[=hdfs://zhaoxiang:9000];
導入jar包: >>add jar hivejar.jar;
創建函數: >>create temporary function udfTest as 'com.cstore.udfExample';
【在pig中使用UDF,先用register語句注冊jar文件,之后可以通過完全的java類名調用,或者用define語句為UDFding 指定一個名稱:
register pigjar.jar;
define UPPER org.pigjar.string.UPPER();
B = foreach a generate UPPER($0); 】
可以在本地命令行運行hive的shell:
$ hive -e 'select * from userinfo' (執行hiveQL語句)
$ hive --config /hive-0.9.0/conf (重新載入新的配置文件)
$ hive --service hiveserver 50000(啟動服務)
create table cite(citing int, cited int) row format delimited fields terminated by ',' stored as textfile; //sequencefle
load data (local) inpath 'cite75_99.txt' overwrite into table cite;//若不加local則默認為HDFS路徑
select * from cite limit 10;
show tables;
describe cite;
select count(1)/count() from cite; //count(1)相當于SQL中的count()
create table cite_count (cited int, count int);
insert overwrite table cite_count select cited , count(citing) from cite group by cited;
select * from cite_count where count > 10 limit 10;
drop table cite_count;
create table page_view(viewTime int, userid bigint,
page_url string, referrer_url string,
ip string comment 'ip address of user')
comment 'this id the page view table'
partitioned by (dt string, country string)//注意table中的列不能和partition中的列重合
clustered by (userid) into 32 buckets //桶
row format delimited
fields terminated by ','
collection items terminated by '\002'
map keys terminated by '\003'
lines terminated by '\n'
stored as textfile;
//取樣
select avg(viewTime) from page_view tablesample(bucket 1 out of 3 [on id]);
//創建外部表,指定目錄位置,刪除外部表時會刪除元數據,表中的數據需手動刪除
create external table page_view(viewTime int, userid bigint,
page_url string, referrer_url string,
ip string comment 'ip address of user')
location 'path/to/existing/table/in/HDFS';
//修改表
alter table page_view rename to pv;
alter table pv add columns (newcol string);
alter table pv drop partition (dt='2009-09-01');
show tables 'page_.*';
load data local inpath 'page_view.txt'
overwrite into table page_view
partition (dt='2009-09-01',country='US');
在hive的shell上執行unix命令:命令前加感嘆號(!),命令尾加分號(;).
hive> ! ls ;
hive> ! head hive_result;
//執行查詢:
insert overwrite table query-result; //eg: insert overwrite table query_result select * from page_view where country='US';
insert overwrite (local) directory '/hdfs-dir(local-dir)/query_result' query;
select country , count(distinct userid) from page_view group by countey;
//子查詢,只能在from子句中出現子查詢
select teacher, max(class-num) from
(select teacher, count(classname) as class-num from classinfo group by teacher)subquery
group by teacher;
//連接
select pv., choice., f.friends from page_view pv
join user u on (pv.userid=u.id)
join friends-list f on (u.id=f.uid);
//多表插入
create table mutil1 as select id, name from userinfo;
create table mutil2 like mutil1;
from userinfo insert overwrite table mutil1 select id, name
insert overwrite table mutil2 select count(distint id),name group by name;
//創建視圖
create view teacher_classnum as select teacher, count(classname) from classinfo group by teacher;