hbase索索引引

1.damo

package test05;

/**
 * Created by guest2 on 2018/7/5.
 */

import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Durability;
import org.apache.hadoop.hbase.client.HTableInterface;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.coprocessor.BaseRegionObserver;
import org.apache.hadoop.hbase.coprocessor.ObserverContext;
import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment;
import org.apache.hadoop.hbase.regionserver.wal.WALEdit;
import org.apache.hadoop.hbase.util.Bytes;

import java.io.IOException;
import java.util.List;

/**
 * @author developer
 * 说明：hbase协处理器observer的应用逻辑代码
 * 功能：在应用了该observer的hbase表中，所有的put操作，都会将每行数据的info:name列值作为rowkey、info:score列值作为value
 * 写入另一张二级索引表index_ob_table，可以提高对于特定字段的查询效率
 */
@SuppressWarnings("deprecation")
public class PutObserver05 extends BaseRegionObserver{

    @Override
    public void postPut(ObserverContext<RegionCoprocessorEnvironment> e,
                        Put put, WALEdit edit, Durability durability) throws IOException {
        // 获取二级索引表
        HTableInterface table = e.getEnvironment().getTable(TableName.valueOf("xulinv03"));
        byte[] rowkey = put.getRow();
        // 获取值
        List<Cell> cellList1 = put.get(Bytes.toBytes("cf1"), Bytes.toBytes("LINE_NO"));
        List<Cell> cellList2 = put.get(Bytes.toBytes("cf1"), Bytes.toBytes("IS_UP_DOWN"));
        //List<Cell> cellList3 = put.get(Bytes.toBytes("cf1"), Bytes.toBytes("SITE_TIME"));//到站时间
        List<Cell> cellList3 = put.get(Bytes.toBytes("cf1"), Bytes.toBytes("ROWKEY"));//到站时间 必须大写
           // cellList1.get(0);
            //cellList2.get(0);
        byte[] val1 =  CellUtil.cloneValue(cellList1.get(0));
        byte[] val2 =  CellUtil.cloneValue(cellList2.get(0));
        byte[] val3 =  CellUtil.cloneValue(cellList3.get(0));

        String v1=new String(val1,"utf-8");
        String v2=new String(val2,"utf-8");
        String v3=new String(val3,"utf-8");  //-----

/*
        SimpleDateFormat formatime = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
        ParsePosition pos = new ParsePosition(0);
        java.util.Date    result =  formatime.parse(v3, pos);
        //2二次处理
        SimpleDateFormat formatime3 = new SimpleDateFormat("yyyyMMddHHmmss");
        String pubtime=formatime3.format(result);
        String v4= new StringBuilder(pubtime).reverse().toString();//反转*/

        //String v4= new StringBuilder(pubtime).reverse().toString();//反转
         String v4 = v3.substring(6,20);
        String cl=v1+v2+v4;

       // String cl=v3;
                             //可以使用

        // 将数据插入二级索引表
        //for (Cell cell1 : cellList1) {
            // 列info:name的值作为二级索引表的rowkey
            Put indexPut = new Put(cl.getBytes("UTF-8"));
          //  for (Cell cell2 : cellList2) {
                // 列info:score的值作为二级索引表中列info:score的值
                indexPut.add(Bytes.toBytes("cf1"), Bytes.toBytes("rowkey"), rowkey);
            //}
            // 数据插入二级索引表
            table.put(indexPut);
       // }
        // 关闭资源
        table.close();
    }

}

第二个

package test05;

/**
 * Created by guest2 on 2018/7/3.
 */

import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CoprocessorEnvironment;
import org.apache.hadoop.hbase.client.Durability;
import org.apache.hadoop.hbase.client.HTableInterface;
import org.apache.hadoop.hbase.client.HTablePool;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.coprocessor.BaseRegionObserver;
import org.apache.hadoop.hbase.coprocessor.ObserverContext;
import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment;
import org.apache.hadoop.hbase.regionserver.wal.WALEdit;
import org.apache.hadoop.hbase.util.Bytes;

import java.io.IOException;

public class TestCoprocessor2 extends BaseRegionObserver {

    private HTablePool pool = null;

    @Override
    public void start(CoprocessorEnvironment env) throws IOException {

        pool = new HTablePool(env.getConfiguration(), 10);
    }

    @Override

    public void prePut(ObserverContext<RegionCoprocessorEnvironment> e,

                       Put put, WALEdit edit, Durability durability) throws IOException {

        HTableInterface table1 = pool.getTable(Bytes.toBytes("guanzhu"));


        byte[] rowkey = put.getRow();

        Cell cell = put.get("f1".getBytes(), "from".getBytes()).get(0);

        Put indexput = new Put(cell.getValueArray(), cell.getValueOffset(), cell.getValueLength());


        indexput.add(Bytes.toBytes("f1"), Bytes.toBytes("from"), rowkey);


        table1.put(indexput);

        table1.close();

    }

    @Override

    public void stop(CoprocessorEnvironment env) throws IOException {
        pool.close();
    }

}

3.hbase的命令

1.单独启动一个HRegionServer进程：
bin/hbase-daemon.sh start regionserver
2.单独启动一个HMaster进程：
bin/hbase-daemon.sh start master
3.清空整张表
 truncate 'member'
 5.上传jar包
 hadoop fs -put hbase.jar /hbase_es 
hadoop fs -chmod -R 777 /hbase_es 
 4.清除hbase的二级索引
 disable 'test_record'
alter 'test_record', METHOD => 'table_att_unset',NAME => 'coprocessor$1'
//添加
alter 'fensi', METHOD => 'table_att', 'coprocessor' => 'hdfs:///hbasecp/cppp.jar|Test05.TestCoprocessor|1001|'

4.别的测试demo

package TV18;



import org.junit.Test;

import java.sql.*;
import java.text.ParsePosition;
import java.text.SimpleDateFormat;

public class OperateOracle {

    // 定义连接所需的字符串
    // 192.168.0.X是本机地址(要改成自己的IP地址)，1521端口号，XE是精简版Oracle的默认数据库名
    private static String USERNAMR = "test";
    private static String PASSWORD = "test";
    private static String DRVIER = "oracle.jdbc.OracleDriver";
    private static String URL = "jdbc:oracle:thin:@192.168.*.2*8:1521/orcl";

    // 创建一个数据库连接
    Connection connection = null;
    // 创建预编译语句对象，一般都是用这个而不用Statement
    PreparedStatement pstm = null;
    // 创建一个结果集对象
    ResultSet rs = null;

    /**
     * 获取Connection对象
     *
     * @return
     */
    public Connection getConnection() {
        try {
            Class.forName(DRVIER);
            connection = DriverManager.getConnection(URL, USERNAMR, PASSWORD);
            System.out.println("成功连接数据库");
        } catch (ClassNotFoundException e) {
            throw new RuntimeException("class not find !", e);
        } catch (SQLException e) {
            throw new RuntimeException("get connection error!", e);
        }

        return connection;
    }


    /**
     * 向数据库中查询数据
     */
    @Test
    public void SelectData() {
        connection = getConnection();
        String sql = "select * from TEST04";
        try {
            pstm = connection.prepareStatement(sql);
            rs = pstm.executeQuery();
            while (rs.next()) {
                   // Date currentTime = new Date();

                Date time1=new Date(rs.getTimestamp("site_time").getTime());
                SimpleDateFormat formatime = new SimpleDateFormat("yyyyMMddHHmmss");
                String pubtime=formatime.format(time1);
                //------

                String city_no = rs.getString("city_no");
                Date site_time = rs.getDate("site_time");


                String v4= new StringBuilder(pubtime).reverse().toString();//反转
                System.out.println(city_no + "  " + v4 );
            }
        } catch (SQLException e) {
            e.printStackTrace();
        } finally {
            ReleaseResource();
        }
    }


    /**
     * 向数据库中查询数据
     */
    @Test
    public void SelectData03() {
        connection = getConnection();
        String sql = "select * from TEST04";
        try {
            pstm = connection.prepareStatement(sql);
            rs = pstm.executeQuery();
            while (rs.next()) {
                // Date currentTime = new Date();

//                Date time1=new Date(rs.getTimestamp("site_time").getTime());
//                SimpleDateFormat formatime = new SimpleDateFormat("yyyyMMddHHmmss");
//                String pubtime=formatime.format(time1);
//                //------

                /*String city_no = rs.getString("city_no");
                String site_time = rs.getString("site_time");
                SimpleDateFormat formatime = new SimpleDateFormat("yyyyMMddHHmmss");
                String pubtime=formatime.format(site_time);*/

                String city_no = rs.getString("city_no");
                String site_time = rs.getString("site_time");

                SimpleDateFormat formatime = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
                ParsePosition pos = new ParsePosition(0);
                java.util.Date    result =  formatime.parse(site_time, pos);
                //2二次处理
                SimpleDateFormat formatime3 = new SimpleDateFormat("yyyyMMddHHmmss");
                String pubtime=formatime3.format(result);
                String v4= new StringBuilder(pubtime).reverse().toString();//反转

                System.out.println(city_no + " 33333 " + v4 );
            }
        } catch (SQLException e) {
            e.printStackTrace();
        } finally {
            ReleaseResource();
        }
    }


    @Test
    public void test03(){
         String val="1234567890";
        //val.substring(1,9);
        System.out.println( val.substring(1,9) );
    }

    @Test
    public void test04(){
        String val="011579 809032 90806102";
        //val.substring(1,9);
        System.out.println( val.length() );
    }

    /**
     * 向数据库中删除数据
     * @param stuName:根据姓名删除数据
     */
    public void DeleteData(String stuName) {
        connection = getConnection();
        String sqlStr = "delete from student where stu_name=?";
        System.out.println(stuName);
        try {
            // 执行删除数据操作
            pstm = connection.prepareStatement(sqlStr);
            pstm.setString(1, stuName);
            pstm.executeUpdate();
        } catch (SQLException e) {
            e.printStackTrace();
        } finally {
            ReleaseResource();
        }
    }

   

    /**
     * 释放资源
     */
    public void ReleaseResource() {
        if (rs != null) {
            try {
                rs.close();
            } catch (SQLException e) {
                e.printStackTrace();
            }
        }
        if (pstm != null) {
            try {
                pstm.close();
            } catch (SQLException e) {
                e.printStackTrace();
            }
        }
        if (connection != null) {
            try {
                connection.close();
            } catch (SQLException e) {
                e.printStackTrace();
            }
        }
    }
}

模拟多个列值插入测试demo

package TV18;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.util.Bytes;
import org.junit.Test;

import java.text.DecimalFormat;

/**
 * Created by guest2 on 2018/7/4.
 */
public class TestHBaseAPI {
    @Test
    //百万数据插入优化测试
    public void bigInsert() throws Exception {
        //数字格式化
        DecimalFormat format = new DecimalFormat();
        format.applyPattern("0000");

        long start = System.currentTimeMillis();
        Configuration conf = HBaseConfiguration.create();
        conf.set("hbase.zookeeper.quorum", "192.168.1.116");

        Connection conn = ConnectionFactory.createConnection(conf);
        TableName tname = TableName.valueOf("fensi10");
        HTable table = (HTable) conn.getTable(tname);
        //不要自动清理缓冲区 ----
        table.setAutoFlush(false);

        for (int i = 1; i < 5; i++) {
            Put put = new Put(Bytes.toBytes("row" + format.format(i)));
            //关闭写前日志 ----
            put.setWriteToWAL(false);
           // put.addColumn(Bytes.toBytes("f1"), Bytes.toBytes("id"), Bytes.toBytes(i));
            put.addColumn(Bytes.toBytes("f1"), Bytes.toBytes("from"), Bytes.toBytes("tom" + i));
            put.addColumn(Bytes.toBytes("f1"), Bytes.toBytes("age"), Bytes.toBytes("3"+i));
            table.put(put);

            if (i % 200 == 0) {
                table.flushCommits();
            }
        }
        //
        table.flushCommits();
        System.out.println(System.currentTimeMillis() - start);
    }
}

demo3

package test05;

/**
 * Created by guest2 on 2018/7/5.
 */

import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Durability;
import org.apache.hadoop.hbase.client.HTableInterface;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.coprocessor.BaseRegionObserver;
import org.apache.hadoop.hbase.coprocessor.ObserverContext;
import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment;
import org.apache.hadoop.hbase.regionserver.wal.WALEdit;
import org.apache.hadoop.hbase.util.Bytes;

import java.io.IOException;
import java.util.List;

/**
 * @author developer
 * 说明：hbase协处理器observer的应用逻辑代码
 * 功能：在应用了该observer的hbase表中，所有的put操作，都会将每行数据的info:name列值作为rowkey、info:score列值作为value
 * 写入另一张二级索引表index_ob_table，可以提高对于特定字段的查询效率
 */
@SuppressWarnings("deprecation")
public class PutObserver05 extends BaseRegionObserver{

    @Override
    public void postPut(ObserverContext<RegionCoprocessorEnvironment> e,
                        Put put, WALEdit edit, Durability durability) throws IOException {
        // 获取二级索引表
        HTableInterface table = e.getEnvironment().getTable(TableName.valueOf("xulinv03"));
        byte[] rowkey = put.getRow();
        // 获取值
        List<Cell> cellList1 = put.get(Bytes.toBytes("cf1"), Bytes.toBytes("LINE_NO"));
        List<Cell> cellList2 = put.get(Bytes.toBytes("cf1"), Bytes.toBytes("IS_UP_DOWN"));
        //List<Cell> cellList3 = put.get(Bytes.toBytes("cf1"), Bytes.toBytes("SITE_TIME"));//到站时间
        List<Cell> cellList3 = put.get(Bytes.toBytes("cf1"), Bytes.toBytes("ROWKEY"));//到站时间 必须大写
           // cellList1.get(0);
            //cellList2.get(0);
        byte[] val1 =  CellUtil.cloneValue(cellList1.get(0));
        byte[] val2 =  CellUtil.cloneValue(cellList2.get(0));
        byte[] val3 =  CellUtil.cloneValue(cellList3.get(0));

        String v1=new String(val1,"utf-8");
        String v2=new String(val2,"utf-8");
        String v3=new String(val3,"utf-8");  //-----

/*
        SimpleDateFormat formatime = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
        ParsePosition pos = new ParsePosition(0);
        java.util.Date    result =  formatime.parse(v3, pos);
        //2二次处理
        SimpleDateFormat formatime3 = new SimpleDateFormat("yyyyMMddHHmmss");
        String pubtime=formatime3.format(result);
        String v4= new StringBuilder(pubtime).reverse().toString();//反转*/

        //String v4= new StringBuilder(pubtime).reverse().toString();//反转
         String v4 = v3.substring(6,20);
        String cl=v1+v2+v4;

       // String cl=v3;
                             //可以使用

        // 将数据插入二级索引表
        //for (Cell cell1 : cellList1) {
            // 列info:name的值作为二级索引表的rowkey
            Put indexPut = new Put(cl.getBytes("UTF-8"));
          //  for (Cell cell2 : cellList2) {
                // 列info:score的值作为二级索引表中列info:score的值
                indexPut.add(Bytes.toBytes("cf1"), Bytes.toBytes("rowkey"), rowkey);
            //}
            // 数据插入二级索引表
            table.put(indexPut);
       // }
        // 关闭资源
        table.close();
    }

}

cdh tm

[hdfs@tm-cdh-15 ~]$ sqoop  import -D sqoop.hbase.add.row.key=true --connect jdbc:oracle:thin:@192.168.113.8:1521/orclgps --username hadoop --password hadoop --table TEST03  --hbase-table tm_temp05  --hbase-row-key ROWKEY --column-family cf1 -m 1

posted @ 2018-07-07 16:00 努力中国阅读(275) 评论(0) 收藏举报

刷新页面返回顶部

努力中国

hbase索索引引

公告