hbase索索引引
1.damo
package test05; /** * Created by guest2 on 2018/7/5. */ import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.CellUtil; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.Durability; import org.apache.hadoop.hbase.client.HTableInterface; import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.coprocessor.BaseRegionObserver; import org.apache.hadoop.hbase.coprocessor.ObserverContext; import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment; import org.apache.hadoop.hbase.regionserver.wal.WALEdit; import org.apache.hadoop.hbase.util.Bytes; import java.io.IOException; import java.util.List; /** * @author developer * 说明:hbase协处理器observer的应用逻辑代码 * 功能:在应用了该observer的hbase表中,所有的put操作,都会将每行数据的info:name列值作为rowkey、info:score列值作为value * 写入另一张二级索引表index_ob_table,可以提高对于特定字段的查询效率 */ @SuppressWarnings("deprecation") public class PutObserver05 extends BaseRegionObserver{ @Override public void postPut(ObserverContext<RegionCoprocessorEnvironment> e, Put put, WALEdit edit, Durability durability) throws IOException { // 获取二级索引表 HTableInterface table = e.getEnvironment().getTable(TableName.valueOf("xulinv03")); byte[] rowkey = put.getRow(); // 获取值 List<Cell> cellList1 = put.get(Bytes.toBytes("cf1"), Bytes.toBytes("LINE_NO")); List<Cell> cellList2 = put.get(Bytes.toBytes("cf1"), Bytes.toBytes("IS_UP_DOWN")); //List<Cell> cellList3 = put.get(Bytes.toBytes("cf1"), Bytes.toBytes("SITE_TIME"));//到站时间 List<Cell> cellList3 = put.get(Bytes.toBytes("cf1"), Bytes.toBytes("ROWKEY"));//到站时间 必须大写 // cellList1.get(0); //cellList2.get(0); byte[] val1 = CellUtil.cloneValue(cellList1.get(0)); byte[] val2 = CellUtil.cloneValue(cellList2.get(0)); byte[] val3 = CellUtil.cloneValue(cellList3.get(0)); String v1=new String(val1,"utf-8"); String v2=new String(val2,"utf-8"); String v3=new String(val3,"utf-8"); //----- /* SimpleDateFormat formatime = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); ParsePosition pos = new ParsePosition(0); java.util.Date result = formatime.parse(v3, pos); //2二次处理 SimpleDateFormat formatime3 = new SimpleDateFormat("yyyyMMddHHmmss"); String pubtime=formatime3.format(result); String v4= new StringBuilder(pubtime).reverse().toString();//反转*/ //String v4= new StringBuilder(pubtime).reverse().toString();//反转 String v4 = v3.substring(6,20); String cl=v1+v2+v4; // String cl=v3; //可以使用 // 将数据插入二级索引表 //for (Cell cell1 : cellList1) { // 列info:name的值作为二级索引表的rowkey Put indexPut = new Put(cl.getBytes("UTF-8")); // for (Cell cell2 : cellList2) { // 列info:score的值作为二级索引表中列info:score的值 indexPut.add(Bytes.toBytes("cf1"), Bytes.toBytes("rowkey"), rowkey); //} // 数据插入二级索引表 table.put(indexPut); // } // 关闭资源 table.close(); } }
第二个
package test05; /** * Created by guest2 on 2018/7/3. */ import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.CoprocessorEnvironment; import org.apache.hadoop.hbase.client.Durability; import org.apache.hadoop.hbase.client.HTableInterface; import org.apache.hadoop.hbase.client.HTablePool; import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.coprocessor.BaseRegionObserver; import org.apache.hadoop.hbase.coprocessor.ObserverContext; import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment; import org.apache.hadoop.hbase.regionserver.wal.WALEdit; import org.apache.hadoop.hbase.util.Bytes; import java.io.IOException; public class TestCoprocessor2 extends BaseRegionObserver { private HTablePool pool = null; @Override public void start(CoprocessorEnvironment env) throws IOException { pool = new HTablePool(env.getConfiguration(), 10); } @Override public void prePut(ObserverContext<RegionCoprocessorEnvironment> e, Put put, WALEdit edit, Durability durability) throws IOException { HTableInterface table1 = pool.getTable(Bytes.toBytes("guanzhu")); byte[] rowkey = put.getRow(); Cell cell = put.get("f1".getBytes(), "from".getBytes()).get(0); Put indexput = new Put(cell.getValueArray(), cell.getValueOffset(), cell.getValueLength()); indexput.add(Bytes.toBytes("f1"), Bytes.toBytes("from"), rowkey); table1.put(indexput); table1.close(); } @Override public void stop(CoprocessorEnvironment env) throws IOException { pool.close(); } }
3.hbase的命令
1.单独启动一个HRegionServer进程: bin/hbase-daemon.sh start regionserver 2.单独启动一个HMaster进程: bin/hbase-daemon.sh start master 3.清空整张表 truncate 'member' 5.上传jar包 hadoop fs -put hbase.jar /hbase_es hadoop fs -chmod -R 777 /hbase_es 4.清除hbase的二级索引 disable 'test_record' alter 'test_record', METHOD => 'table_att_unset',NAME => 'coprocessor$1' //添加 alter 'fensi', METHOD => 'table_att', 'coprocessor' => 'hdfs:///hbasecp/cppp.jar|Test05.TestCoprocessor|1001|'
4.别的测试demo
package TV18; import org.junit.Test; import java.sql.*; import java.text.ParsePosition; import java.text.SimpleDateFormat; public class OperateOracle { // 定义连接所需的字符串 // 192.168.0.X是本机地址(要改成自己的IP地址),1521端口号,XE是精简版Oracle的默认数据库名 private static String USERNAMR = "test"; private static String PASSWORD = "test"; private static String DRVIER = "oracle.jdbc.OracleDriver"; private static String URL = "jdbc:oracle:thin:@192.168.*.2*8:1521/orcl"; // 创建一个数据库连接 Connection connection = null; // 创建预编译语句对象,一般都是用这个而不用Statement PreparedStatement pstm = null; // 创建一个结果集对象 ResultSet rs = null; /** * 获取Connection对象 * * @return */ public Connection getConnection() { try { Class.forName(DRVIER); connection = DriverManager.getConnection(URL, USERNAMR, PASSWORD); System.out.println("成功连接数据库"); } catch (ClassNotFoundException e) { throw new RuntimeException("class not find !", e); } catch (SQLException e) { throw new RuntimeException("get connection error!", e); } return connection; } /** * 向数据库中查询数据 */ @Test public void SelectData() { connection = getConnection(); String sql = "select * from TEST04"; try { pstm = connection.prepareStatement(sql); rs = pstm.executeQuery(); while (rs.next()) { // Date currentTime = new Date(); Date time1=new Date(rs.getTimestamp("site_time").getTime()); SimpleDateFormat formatime = new SimpleDateFormat("yyyyMMddHHmmss"); String pubtime=formatime.format(time1); //------ String city_no = rs.getString("city_no"); Date site_time = rs.getDate("site_time"); String v4= new StringBuilder(pubtime).reverse().toString();//反转 System.out.println(city_no + " " + v4 ); } } catch (SQLException e) { e.printStackTrace(); } finally { ReleaseResource(); } } /** * 向数据库中查询数据 */ @Test public void SelectData03() { connection = getConnection(); String sql = "select * from TEST04"; try { pstm = connection.prepareStatement(sql); rs = pstm.executeQuery(); while (rs.next()) { // Date currentTime = new Date(); // Date time1=new Date(rs.getTimestamp("site_time").getTime()); // SimpleDateFormat formatime = new SimpleDateFormat("yyyyMMddHHmmss"); // String pubtime=formatime.format(time1); // //------ /*String city_no = rs.getString("city_no"); String site_time = rs.getString("site_time"); SimpleDateFormat formatime = new SimpleDateFormat("yyyyMMddHHmmss"); String pubtime=formatime.format(site_time);*/ String city_no = rs.getString("city_no"); String site_time = rs.getString("site_time"); SimpleDateFormat formatime = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); ParsePosition pos = new ParsePosition(0); java.util.Date result = formatime.parse(site_time, pos); //2二次处理 SimpleDateFormat formatime3 = new SimpleDateFormat("yyyyMMddHHmmss"); String pubtime=formatime3.format(result); String v4= new StringBuilder(pubtime).reverse().toString();//反转 System.out.println(city_no + " 33333 " + v4 ); } } catch (SQLException e) { e.printStackTrace(); } finally { ReleaseResource(); } } @Test public void test03(){ String val="1234567890"; //val.substring(1,9); System.out.println( val.substring(1,9) ); } @Test public void test04(){ String val="011579 809032 90806102"; //val.substring(1,9); System.out.println( val.length() ); } /** * 向数据库中删除数据 * @param stuName:根据姓名删除数据 */ public void DeleteData(String stuName) { connection = getConnection(); String sqlStr = "delete from student where stu_name=?"; System.out.println(stuName); try { // 执行删除数据操作 pstm = connection.prepareStatement(sqlStr); pstm.setString(1, stuName); pstm.executeUpdate(); } catch (SQLException e) { e.printStackTrace(); } finally { ReleaseResource(); } } /** * 释放资源 */ public void ReleaseResource() { if (rs != null) { try { rs.close(); } catch (SQLException e) { e.printStackTrace(); } } if (pstm != null) { try { pstm.close(); } catch (SQLException e) { e.printStackTrace(); } } if (connection != null) { try { connection.close(); } catch (SQLException e) { e.printStackTrace(); } } } }
模拟多个列值插入测试demo
package TV18; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.Connection; import org.apache.hadoop.hbase.client.ConnectionFactory; import org.apache.hadoop.hbase.client.HTable; import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.util.Bytes; import org.junit.Test; import java.text.DecimalFormat; /** * Created by guest2 on 2018/7/4. */ public class TestHBaseAPI { @Test //百万数据插入优化测试 public void bigInsert() throws Exception { //数字格式化 DecimalFormat format = new DecimalFormat(); format.applyPattern("0000"); long start = System.currentTimeMillis(); Configuration conf = HBaseConfiguration.create(); conf.set("hbase.zookeeper.quorum", "192.168.1.116"); Connection conn = ConnectionFactory.createConnection(conf); TableName tname = TableName.valueOf("fensi10"); HTable table = (HTable) conn.getTable(tname); //不要自动清理缓冲区 ---- table.setAutoFlush(false); for (int i = 1; i < 5; i++) { Put put = new Put(Bytes.toBytes("row" + format.format(i))); //关闭写前日志 ---- put.setWriteToWAL(false); // put.addColumn(Bytes.toBytes("f1"), Bytes.toBytes("id"), Bytes.toBytes(i)); put.addColumn(Bytes.toBytes("f1"), Bytes.toBytes("from"), Bytes.toBytes("tom" + i)); put.addColumn(Bytes.toBytes("f1"), Bytes.toBytes("age"), Bytes.toBytes("3"+i)); table.put(put); if (i % 200 == 0) { table.flushCommits(); } } // table.flushCommits(); System.out.println(System.currentTimeMillis() - start); } }
demo3
package test05; /** * Created by guest2 on 2018/7/5. */ import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.CellUtil; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.Durability; import org.apache.hadoop.hbase.client.HTableInterface; import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.coprocessor.BaseRegionObserver; import org.apache.hadoop.hbase.coprocessor.ObserverContext; import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment; import org.apache.hadoop.hbase.regionserver.wal.WALEdit; import org.apache.hadoop.hbase.util.Bytes; import java.io.IOException; import java.util.List; /** * @author developer * 说明:hbase协处理器observer的应用逻辑代码 * 功能:在应用了该observer的hbase表中,所有的put操作,都会将每行数据的info:name列值作为rowkey、info:score列值作为value * 写入另一张二级索引表index_ob_table,可以提高对于特定字段的查询效率 */ @SuppressWarnings("deprecation") public class PutObserver05 extends BaseRegionObserver{ @Override public void postPut(ObserverContext<RegionCoprocessorEnvironment> e, Put put, WALEdit edit, Durability durability) throws IOException { // 获取二级索引表 HTableInterface table = e.getEnvironment().getTable(TableName.valueOf("xulinv03")); byte[] rowkey = put.getRow(); // 获取值 List<Cell> cellList1 = put.get(Bytes.toBytes("cf1"), Bytes.toBytes("LINE_NO")); List<Cell> cellList2 = put.get(Bytes.toBytes("cf1"), Bytes.toBytes("IS_UP_DOWN")); //List<Cell> cellList3 = put.get(Bytes.toBytes("cf1"), Bytes.toBytes("SITE_TIME"));//到站时间 List<Cell> cellList3 = put.get(Bytes.toBytes("cf1"), Bytes.toBytes("ROWKEY"));//到站时间 必须大写 // cellList1.get(0); //cellList2.get(0); byte[] val1 = CellUtil.cloneValue(cellList1.get(0)); byte[] val2 = CellUtil.cloneValue(cellList2.get(0)); byte[] val3 = CellUtil.cloneValue(cellList3.get(0)); String v1=new String(val1,"utf-8"); String v2=new String(val2,"utf-8"); String v3=new String(val3,"utf-8"); //----- /* SimpleDateFormat formatime = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); ParsePosition pos = new ParsePosition(0); java.util.Date result = formatime.parse(v3, pos); //2二次处理 SimpleDateFormat formatime3 = new SimpleDateFormat("yyyyMMddHHmmss"); String pubtime=formatime3.format(result); String v4= new StringBuilder(pubtime).reverse().toString();//反转*/ //String v4= new StringBuilder(pubtime).reverse().toString();//反转 String v4 = v3.substring(6,20); String cl=v1+v2+v4; // String cl=v3; //可以使用 // 将数据插入二级索引表 //for (Cell cell1 : cellList1) { // 列info:name的值作为二级索引表的rowkey Put indexPut = new Put(cl.getBytes("UTF-8")); // for (Cell cell2 : cellList2) { // 列info:score的值作为二级索引表中列info:score的值 indexPut.add(Bytes.toBytes("cf1"), Bytes.toBytes("rowkey"), rowkey); //} // 数据插入二级索引表 table.put(indexPut); // } // 关闭资源 table.close(); } }
cdh tm
[hdfs@tm-cdh-15 ~]$ sqoop import -D sqoop.hbase.add.row.key=true --connect jdbc:oracle:thin:@192.168.113.8:1521/orclgps --username hadoop --password hadoop --table TEST03 --hbase-table tm_temp05 --hbase-row-key ROWKEY --column-family cf1 -m 1

浙公网安备 33010602011771号