from:http://abloz.com
author:ablozhou
date:2012.7.3
在hbase的官方文檔里,講述了hbase的bin目錄下的ruby程序,可以采用如下的方式執行:
如果要使用腳本,可以看Hbase的bin 目錄.在里面找到后綴為 *.rb的腳本.要想運行這個腳本,要這樣 $ ./bin/hbase org.jruby.Main PATH_TO_SCRIPT
如:
[zhouhh@Hadoop48 bin]$ hbase-jruby get-active-master.rb Hadoop48 [zhouhh@Hadoop48 bin]$ hbase-jruby region_status.rb Region Status: 92 / 92
hbase-jruby是hbase org.jruby.Main的shell封裝,hbase 0.94提供在bin里。
在執行hbase的例子IndexBuilder.java,需要預先準備數據。
但該數據是用ruby文件來提供的:
[zhouhh@Hadoop48 mapreduce]$ pwd /home/zhouhh/hbase-0.94.0/src/examples/mapreduce [zhouhh@Hadoop48 mapreduce]$ cat index-builder-setup.rb # Set up sample data for IndexBuilder example create "people", "attributes" create "people-email", "INDEX" create "people-phone", "INDEX" create "people-name", "INDEX" [["1", "jenny", "jenny@example.com", "867-5309"], ["2", "alice", "alice@example.com", "555-1234"], ["3", "kevin", "kevinpet@example.com", "555-1212"]].each do |fields| (id, name, email, phone) = *fields put "people", id, "attributes:name", name put "people", id, "attributes:email", email put "people", id, "attributes:phone", phone end
可是hbase的文檔沒有任何解釋,如何執行該文件以導入ruby數據。如果用hbase文檔提到的方法,報錯:
[zhouhh@Hadoop48 mapreduce]$ hbase org.jruby.Main index-builder-setup.rb NoMethodError: undefined method `create' for main:Object (root) at index-builder-setup.rb:18 [zhouhh@Hadoop48 hbase-0.94.0]$ hbase-jruby ./src/examples/mapreduce/index-builder-setup.rb NoMethodError: undefined method `create' for main:Object (root) at ./src/examples/mapreduce/index-builder-setup.rb:18 [zhouhh@Hadoop48 mapreduce]$ ruby index-builder-setup.rb index-builder-setup.rb:18: undefined method `create' for main:Object (NoMethodError)
一時手足無措,用java寫了個導數據的程序:
[zhouhh@Hadoop48 myhbase]$ cat src/com/abloz/hbase/HBaseTest.java package com.abloz.hbase; //date:2012.6.7 //http://abloz.com //hadoop 1.0.3 //hbase 0.94.0 //tested on centos 5.5 //cluster distributed system:Hadoop48,Hadoop47,Hadoop46 /* */ import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.HColumnDescriptor; import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.ZooKeeperConnectionException; import org.apache.hadoop.hbase.client.HBaseAdmin; import org.apache.hadoop.hbase.client.Get; import org.apache.hadoop.hbase.client.HTable; //import org.apache.hadoop.hbase.client.Delete; //import org.apache.hadoop.hbase.client.HTablePool; import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.client.ResultScanner; import org.apache.hadoop.hbase.client.Result; //import org.apache.hadoop.hbase.client.Action; import org.apache.hadoop.hbase.MasterNotRunningException; import org.apache.hadoop.hbase.KeyValue; //import org.apache.hadoop.hbase. public class HBaseTest { //get configure of hbase-site.xml under classpath,so needn't any configuration any more. public static Configuration conf = HBaseConfiguration.create(); //or Configuration.set(String name, String value) /* Configuration conf = new Configuration(); //same as from hbase-site.xml conf.set("hbase.zookeeper.property.clientPort", "2181"); conf.set("hbase.zookeeper.quorum", "192.168.10.48,192.168.10.47,192.168.10.46"); public static Configuration conf1 = HBaseConfiguration.create(conf); */ public static void createTable(String tableName, String[] families) throws Exception { try { //table create,disable,exist ,drop,use HBaseAdmin HBaseAdmin hadmin = new HBaseAdmin(conf); if( hadmin.tableExists(tableName)) { hadmin.disableTable(tableName); hadmin.deleteTable(tableName); System.out.println("table "+tableName+" exist,delete it."); } HTableDescriptor tbdesc = new HTableDescriptor(tableName); for(String family : families) { tbdesc.addFamily(new HColumnDescriptor(family)); } hadmin.createTable(tbdesc); } catch (MasterNotRunningException e){ e.printStackTrace(); } catch (ZooKeeperConnectionException e) { e.printStackTrace(); } System.out.println("table "+ tableName+ " create ok."); } public static void putData(String tableName,String rowKey,String family, String qualifier, String value ) throws Exception { //insert,update,delete,get row,column families, use HTable. try { if(qualifier == null) qualifier = ""; if(value == null) value = ""; HTable htb = new HTable(conf,tableName); Put put = new Put(rowKey.getBytes()); put.add(family.getBytes(),qualifier.getBytes(),value.getBytes()); htb.put(put); System.out.println("put data to "+ tableName + ",rowKey:"+rowKey+",family:"+family+",qual:"+qualifier+",value:"+value); } catch (IOException e) { e.printStackTrace(); } } public static void getData(String tableName, String rowKey) throws Exception { try { HTable htb = new HTable(conf,tableName); Get get = new Get(rowKey.getBytes()); Result rs = htb.get(get); System.out.println("get from "+tableName+ ",rowkey:"+rowKey); for(KeyValue kv:rs.raw()) { System.out.println(new String(kv.getRow()) +":\t"+ new String(kv.getFamily())+":"+ new String(kv.getQualifier())+",\t"+ new String(kv.getValue())+",\t"+ kv.getTimestamp() ); } } catch (IOException e) { e.printStackTrace(); } } public static void scanData(String tableName) throws Exception { try { HTable htb = new HTable(conf,tableName); Scan scan = new Scan(tableName.getBytes()); ResultScanner rss = htb.getScanner(scan); System.out.println("scan "+tableName); System.out.println("==============begin================="); for(Result r:rss) { for(KeyValue kv: r.raw()) { System.out.println(new String(kv.getRow()) +":\t"+ new String(kv.getFamily())+":"+ new String(kv.getQualifier())+",\t"+ new String(kv.getValue())+",\t"+ kv.getTimestamp() ); } } System.out.println("================end==============="); } catch(IOException e) { e.printStackTrace(); } } public static void test_student() { String tableName = "student"; //String[] families = {"age","sex"}; String rowKey="1"; String family="class"; String token = ""; //String[] tokens={"class","score"};; String value=""; String[] families = {"class"}; String[][] data={{"jenny", "chinese", "85"}, {"jenny", "math", "55"}, {"jenny", "english", "65"}, {"alice", "chinese", "74"}, {"alice", "math", "88"}, {"alice", "english", "85"}, {"kevin", "chinese", "35"}, {"kevin", "math", "95"}, {"kevin", "english", "75"}}; try { HBaseTest.createTable(tableName,families); for(String[] user:data) { rowKey=user[0]; token = user[1]; value = user[2]; HBaseTest.putData(tableName, rowKey, family, token, value); } HBaseTest.getData(tableName, rowKey); HBaseTest.scanData(tableName); } catch (Exception e) { e.printStackTrace(); } } public static void test_people() { String tableName = "people"; String rowKey="1"; String family=""; //String token=""; String value=""; String[] families = {"attribute"}; String[][] data={{"1", "jenny", "jenny@example.com", "867-5309"}, {"2", "alice", "alice@example.com", "555-1234"}, {"3", "kevin", "kevinpet@example.com", "555-1212"}}; try { HBaseTest.createTable(tableName,families); for(String[] user:data) { rowKey=user[0]; family="attribute"; value=user[1]; HBaseTest.putData(tableName, rowKey, family, "name", value); value=user[2]; HBaseTest.putData(tableName, rowKey, family, "email", value); value=user[3]; HBaseTest.putData(tableName, rowKey, family, "phone", value); } HBaseTest.getData(tableName, rowKey); HBaseTest.scanData(tableName); } catch (Exception e) { e.printStackTrace(); } } public static void main(String[] args) { //test_people(); test_student(); } } |
不過,感覺花了九牛二虎之力。ruby的程序相當簡潔。考慮到hbase的shell是ruby寫的,shell應該可以執行該程序。但進入shell,沒有發現任何導入文件和執行的命令。后面靈機一動,可能是直接執行的:
[zhouhh@Hadoop48 hbase-0.94.0]$ hbase shell ./src/examples/mapreduce/index-builder-setup.rb 0 row(s) in 8.6140 seconds ... hbase(main):001:0> list TABLE people people-email people-name people-phone ... hbase(main):004:0> scan 'people' ROW COLUMN+CELL 1 column=attributes:email, timestamp=1341306690384, value=jenny@example.com 1 column=attributes:name, timestamp=1341306690278, value=jenny ...
一下子就解決了該問題。用ruby來操作hbase的數據,看來還是比較完美的方案。