分享

python 调用HBase 范例

 tim_spac163 2014-06-19
python 调用HBase 实例
新来的一个工程师不懂HBase,java不熟,python还行,我建议他那可以考虑用HBase的thrift调用,完成目前的工作。
首先,安装thrift
下载thrift,这里,我用的是thrift-0.7.0-dev.tar.gz 这个版本
tar xzf thrift-0.7.0-dev.tar.gz
cd thrift-0.7.0-dev
sudo ./configure  --with-cpp=no --with-ruby=no
sudo make
sudo make install



然后,到HBase的源码包里,找到
src/main/resources/org/apache/hadoop/hbase/thrift/
执行
 thrift --gen py Hbase.thrift
 mv gen-py/hbase/ /usr/lib/python2.4/site-packages/ (根据python版本可能有不同)


我这里写了些调用的脚本,供大家参考

from unittest import TestCase, main
from thrift import Thrift
from thrift.transport import TSocket
from thrift.transport import TTransport
from thrift.protocol import TBinaryProtocol

from hbase import Hbase
from hbase.ttypes import ColumnDescriptor, Mutation, BatchMutation
class HBaseTester:

    def __init__(self, netloc, port, table="staftesttable"):
        self.tableName = table

        self.transport = TTransport.TBufferedTransport(
            TSocket.TSocket(netloc, port))
        self.protocol = TBinaryProtocol.TBinaryProtocol(self.transport)
        self.client = Hbase.Client(self.protocol)
        self.transport.open()

        tables = self.client.getTableNames()
        if self.tableName not in tables:
            self.__createTable()

    def __del__(self):
        self.transport.close()

    def __createTable(self):
        name = ColumnDescriptor(name='name')
        foo = ColumnDescriptor(name='foo')

        self.client.createTable(self.tableName,
                                [name,foo])
    
    def put(self,key,name,foo):
        name = Mutation(column="name:v", value=name)
        foo = Mutation(column="foo:v",value=foo)
        
        self.client.mutateRow(self.tablename,key,[name,foo])
    
    def scanner(self,column):
        scanner = client.scannerOpen(self.tablename,"",[column])
				r = client.scannerGet(scanner)
				result= []
        while r:
          print r[0]
          result.append(r[0])
          r = client.scannerGet(scanner)
        print "Scanner finished"
        return result
     
    
        
        
        
          
class TestHBaseTester(TestCase):
    
    def setUp(self):
        self.writer = HBaseTester("localhost", 9090)

    def tearDown(self):
        name = self.writer.tableName
        client = self.writer.client
        client.disableTable(name)
        client.deleteTable(name)


    def testCreate(self):
        tableName = self.writer.tableName
        client = self.writer.client
        self.assertTrue(self.writer.tableName in client.getTableNames())
        columns =['name:','foo:']
        for i in client.getColumnDescriptors(tableName):
            self.assertTrue(i in columns)
            
    def testPut(self):
        self.writer.put("r1","n1","f1")
        self.writer.put("r2","n2","f2")
        self.writer.put("r3","n3","")
        self.writer.scanner("name:") 
       
if __name__ == "__main__":
    main()





    本站是提供个人知识管理的网络存储空间,所有内容均由用户发布,不代表本站观点。请注意甄别内容中的联系方式、诱导购买等信息,谨防诈骗。如发现有害或侵权内容,请点击一键举报。
    转藏 分享 献花(0

    0条评论

    发表

    请遵守用户 评论公约

    类似文章 更多