https://blog.csdn.net/wypersist/article/details/79830811
https://www.cnblogs.com/mthoutai/p/7323316.html
https://blog.csdn.net/lifuxiangcaohui/article/details/39991183/
HBase的一級索引就是rowkey,我們僅僅能通過rowkey進行檢索。
假設我們想對hbase里面列族的列進行一些組合查詢。就須要採用HBase的二級索引方案來進行多條件的查詢。
設計思路
二級索引的本質就是建立各列值與行鍵之間的映射關系
如上圖,當要對F:C1這列建立索引時,只需要建立F:C1各列值到其對應行鍵的映射關系,如C11->RK1等,這樣就完成了對F:C1列值的二級索引的構建,當要查詢符合F:C1=C11對應的F:C2的列值時(即根據C1=C11來查詢C2的值,圖1青色部分)
MapReduce方案
IndexBuilder:利用MR的方式構建Index
長處:并發批量構建Index
缺點:不能實時構建Index
舉例:
Demo
流程:
- 我們需要查詢 某列 所在行的其他信息,就需要創建一個新的索引表
把 原表的這一列 作為新表的 rowkey,把 原表的 rowkey 作為新表的 列
首先在 Mapper 中創建了一個 HashMap,把原表的所有列作為 key,value我們先不關注
在 map() 中,會傳入原表,依據原表 列族和從HashMap中獲取的列 得到原表的rowkey
把 HashMap 中的key 作為新表的 rowkey,把原表的 rowkey作為新表的對應列下的值,具體看代碼。
package IndexDouble;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
import org.apache.commons.collections.map.HashedMap;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.HConnection;
import org.apache.hadoop.hbase.client.HConnectionManager;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.MultiTableOutputFormat;
import org.apache.hadoop.hbase.mapreduce.TableInputFormat;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.util.GenericOptionsParser;
public class IndexBuilder {
private String rootDir;
private String zkServer;
private String port;
private Configuration conf;
private HConnection hConn = null;
private IndexBuilder(String rootDir,String zkServer,String port) throws IOException{
this.rootDir = rootDir;
this.zkServer = zkServer;
this.port = port;
conf = HBaseConfiguration.create();
conf.set("hbase.rootdir", rootDir);
conf.set("hbase.zookeeper.quorum", zkServer);
conf.set("hbase.zookeeper.property.clientPort", port);
hConn = HConnectionManager.createConnection(conf);
}
static class MyMapper extends TableMapper<ImmutableBytesWritable, Put>{
//記錄了要進行索引的列
private Map<byte[], ImmutableBytesWritable> indexes = new
HashMap<byte[], ImmutableBytesWritable>();
private String familyName;
@Override
protected void map(ImmutableBytesWritable key, Result value,
Context context) throws IOException, InterruptedException {
//原始表列
Set<byte[]> keys = indexes.keySet();
//索引表的rowkey是原始表的列。索引表的列是原始表的rowkey
for (byte[] key : keys){
//獲得新建索引表的表名
ImmutableBytesWritable indexTableName = indexes.get(k);
//Result存放的是原始表的數據
//依據列族 和 列 得到原始表的rowkey
byte[] rowkey = value.getValue(Bytes.toBytes(familyName), k);
if (rowkey != null) {
//索引表
Put put = new Put(rowkey);//索引表行鍵
//列族 列 原始表的行鍵(作為新表的 列:id 的值)
put.add(Bytes.toBytes("f1"),Bytes.toBytes("id"),key.get());
context.write(indexTableName, put);
}
}
}
//真正運行Map之前運行一些處理。
@Override
protected void setup(Context context) throws IOException,
InterruptedException {
//通過上下文得到配置
Configuration conf = context.getConfiguration();
//獲得表名
String tableName = conf.get("tableName");
//String family = conf.get("familyName");
//獲得列族
familyName = conf.get("columnFamily");
//獲得列
String[] qualifiers = conf.getStrings("qualifiers");
for (String qualifier : qualifiers) {
//建立一個映射,為每個列創建一個表,表的名字tableName+"-"+qualifier
//原始表的列 索引表新建表名
indexes.put(Bytes.toBytes(qualifier),
new ImmutableBytesWritable(Bytes.toBytes(tableName+"-"+qualifier)));
}
}
}
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
String rootDir = "hdfs://hadoop1:8020/hbase";
String zkServer = "hadoop1";
String port = "2181";
IndexBuilder conn = new IndexBuilder(rootDir,zkServer,port);
String[] otherArgs = new GenericOptionsParser(conn.conf, args).getRemainingArgs();
//IndexBuilder: TableName,ColumnFamily,Qualifier
if(otherArgs.length<3){
System.exit(-1);
}
//表名
String tableName = otherArgs[0];
//列族
String columnFamily = otherArgs[1];
conn.conf.set("tableName", tableName);
conn.conf.set("columnFamily", columnFamily);
//列 可能存在多個列
String[] qualifiers = new String[otherArgs.length-2];
for (int i = 0; i < qualifiers.length; i++) {
qualifiers[i] = otherArgs[i+2];
}
//設置列
conn.conf.setStrings("qualifiers", qualifiers);
@SuppressWarnings("deprecation")
Job job = new Job(conn.conf,tableName);
job.setJarByClass(IndexBuilder.class);
job.setMapperClass(MyMapper.class);
job.setNumReduceTasks(0);//因為不須要運行reduce階段
job.setInputFormatClass(TableInputFormat.class);
job.setOutputFormatClass(MultiTableOutputFormat.class);
Scan scan = new Scan();
TableMapReduceUtil.initTableMapperJob(tableName,scan,
MyMapper.class, ImmutableBytesWritable.class, Put.class, job);
job.waitForCompletion(true);
}
}