大数据预处理技术 1.大数据库Hbase技术1.9 Hbase专用过滤器SingleColumnValueFilter的使用 1.10 Hbase专用过滤器PageFilter的使用

#Hbase专用过滤器SingleColumnValueFilter的使用
##【实验目的】
1)掌握Hbase Java API开发环境
2)学会使用SingleColumnValueFilter查询列值等于指定值的数据
3)学会使用SingleColumnValueFilter查询列值包含某个前缀的数据
4)学会使用SingleColumnValueFilter查询列值小于指定值的数据
##【实验原理】
1、建立Java工程，调用Java API进行行健过滤器操作。
2、SingleColumnValueFilter是用来对列进行过滤的。
比较符如下：
Operator	Description
LESS	小于
LESS_OR_EQUAL	小于等于
EQUAL	等于
NOT_EQUAL	不等于
GREATER_OR_EQUAL	大于等于
GREATER	大于
NO_OP	排除所有
比较器如下：
Comparator	Description
BinaryComparator	使用Bytes.compareTo()比较
BinaryPrefixComparator	和BinaryComparator差不多，从前面开始比较
NullComparator	Does not compare against an actual value but whether a given one is null, or not null.
BitComparator	Performs a bitwise comparison, providing a BitwiseOp class with AND, OR, and XOR operators.
RegexStringComparator	正则表达式
SubstringComparator	把数据当成字符串，用contains()来判断

##【实验环境】
本次环境是：centos6.5+jdk1.7.0_79+hbase0.96+eclipse
  host01是计算机名称对应ip地址为 192.168.0.131，可以在/etc/hosts文件中查看映射关系
工具在/simple/soft目录下
##【实验步骤】
### 一、准备阶段
1.1 配置主机名和IP的映射关系。
如果你在Windows平台下使用Eclipse开发并执行代码调用Linux平台的Hbase，则需要配置主机名和IP的映射关系。
Linux上部署Hbase的主机名为host01，IP为192.168.0.131。
需要在C:\Windows\System32\drivers\etc目录下的hosts文件中添加内容192.168.0.131	host01。如图1所示。
1.2 准备用于开发Java程序的eclipse。
如图2所示。
1.3 准备项目中所需的Hbase包。如图3所示。
包的位置：$HBASE_HOME/lib,如图4所示。
1.4 准备用于测试的表和数据。
创建表的代码如下。
含义：创建表account3，含有两个列族分别是baseInfo、contacts。

	package com.simple.create;

	import java.io.IOException;

	import org.apache.hadoop.conf.Configuration;
	import org.apache.hadoop.hbase.HBaseConfiguration;
	import org.apache.hadoop.hbase.HColumnDescriptor;
	import org.apache.hadoop.hbase.HTableDescriptor;
	import org.apache.hadoop.hbase.TableName;
	import org.apache.hadoop.hbase.client.HBaseAdmin;

	public class CreateTable3 {
	
	public static void main(String[] args) throws IOException {
		//一、配置文件设置
		//创建用于客户端的配置类实例
		Configuration config = HBaseConfiguration.create();
		//设置连接zookeeper的地址
		//hbase客户端连接的是zookeeper
		config.set("hbase.zookeeper.quorum", "192.168.0.131:2181");
		
		//二、表描述相关信息
		//创建表描述器并命名表名为account3
		HTableDescriptor tableDesc = new HTableDescriptor(TableName.valueOf("account3"));
		//创建列族描述器并命名一个列族名为baseInfo
		HColumnDescriptor columnDesc1 = new HColumnDescriptor("baseinfo");
		//设置列族的最大版本数
		columnDesc1.setMaxVersions(5);
		//创建列族描述器并命名一个列族名为baseInfo
		HColumnDescriptor columnDesc2 = new HColumnDescriptor("contacts");
		//设置列族的最大版本数
		columnDesc2.setMaxVersions(3);
		//添加一个列族给表
		tableDesc.addFamily(columnDesc1);
		//添加一个列族给表
		tableDesc.addFamily(columnDesc2);
		
		//三、实例化HBaseAdmin、创建表
		//根据配置文件创建HBaseAdmin对象
		HBaseAdmin hbaseAdmin = new HBaseAdmin(config);
		//创建表
		hbaseAdmin.createTable(tableDesc);
		
		//四、释放资源
		hbaseAdmin.close();
	}
	}
1.5 创建数据的代码如下。
含义：向表account3中插入8条数据，行健从rk01开始到rk08。列baseinfo:name的值从JiKang1开始到JiKang8，列baseinfo:age的值为数字，列contacts:address存储省+城市名称。

	package com.simple.put;

	import java.io.IOException;
	import java.util.ArrayList;
	import java.util.List;

	import org.apache.hadoop.conf.Configuration;
	import org.apache.hadoop.hbase.HBaseConfiguration;
	import org.apache.hadoop.hbase.client.HTable;
	import org.apache.hadoop.hbase.client.Put;
	import org.apache.hadoop.hbase.util.Bytes;

	public class PutListTest3 {

	public static void main(String[] args) throws IOException {
		// 一、配置文件设置
		// 创建用于客户端的配置类实例
		Configuration config = HBaseConfiguration.create();
		// 设置连接zookeeper的地址
		// hbase客户端连接的是zookeeper
		config.set("hbase.zookeeper.quorum", "192.168.0.131:2181");

		// 二、 获得要操作的表的对象。
		// 第一个参数"config"为配置文件；第二个参数"account3"为数据库中的表名。
		// （注："account3"为上节中所创建的表）
		HTable table = new HTable(config, "account3");

		// 三、设置Put对象
		// 设置行健值 ；设置列族、列、cell值
		Put put1 = new Put(Bytes.toBytes("rk01"));
		put1.add(Bytes.toBytes("baseinfo"), Bytes.toBytes("name"),
				Bytes.toBytes("JiKang1"));
		put1.add(Bytes.toBytes("baseinfo"), Bytes.toBytes("age"),
				Bytes.toBytes("33"));
		put1.add(Bytes.toBytes("contacts"), Bytes.toBytes("address"),
				Bytes.toBytes("北京通州"));
		// 设置行健值 ；设置列族、列、cell值
		Put put2 = new Put(Bytes.toBytes("rk02"));
		put2.add(Bytes.toBytes("baseinfo"), Bytes.toBytes("name"),
				Bytes.toBytes("JiKang2"));
		put2.add(Bytes.toBytes("baseinfo"), Bytes.toBytes("age"),
				Bytes.toBytes("26"));
		put2.add(Bytes.toBytes("contacts"), Bytes.toBytes("address"),
				Bytes.toBytes("上海浦东"));
		// 设置行健值 ；设置列族、列、cell值
		Put put3 = new Put(Bytes.toBytes("rk03"));
		put3.add(Bytes.toBytes("baseinfo"), Bytes.toBytes("name"),
				Bytes.toBytes("JiKang3"));
		put3.add(Bytes.toBytes("baseinfo"), Bytes.toBytes("age"),
				Bytes.toBytes("89"));
		put3.add(Bytes.toBytes("contacts"), Bytes.toBytes("address"),
				Bytes.toBytes("甘肃兰州"));
		// 设置行健值 ；设置列族、列、cell值
		Put put4 = new Put(Bytes.toBytes("rk04"));
		put4.add(Bytes.toBytes("baseinfo"), Bytes.toBytes("name"),
				Bytes.toBytes("JiKang4"));
		put4.add(Bytes.toBytes("baseinfo"), Bytes.toBytes("age"),
				Bytes.toBytes("23"));
		put4.add(Bytes.toBytes("contacts"), Bytes.toBytes("address"),
				Bytes.toBytes("河北沧州"));
		// 设置行健值 ；设置列族、列、cell值
		Put put5 = new Put(Bytes.toBytes("rk05"));
		put5.add(Bytes.toBytes("baseinfo"), Bytes.toBytes("name"),
				Bytes.toBytes("JiKang5"));
		put5.add(Bytes.toBytes("baseinfo"), Bytes.toBytes("age"),
				Bytes.toBytes("90"));
		put5.add(Bytes.toBytes("contacts"), Bytes.toBytes("address"),
				Bytes.toBytes("天津滨海"));
		// 设置行健值 ；设置列族、列、cell值
		Put put6 = new Put(Bytes.toBytes("rk06"));
		put6.add(Bytes.toBytes("baseinfo"), Bytes.toBytes("name"),
				Bytes.toBytes("JiKang6"));
		put6.add(Bytes.toBytes("baseinfo"), Bytes.toBytes("age"),
				Bytes.toBytes("55"));
		put6.add(Bytes.toBytes("contacts"), Bytes.toBytes("address"),
				Bytes.toBytes("河南郑州"));
		// 设置行健值 ；设置列族、列、cell值
		Put put7 = new Put(Bytes.toBytes("rk07"));
		put7.add(Bytes.toBytes("baseinfo"), Bytes.toBytes("name"),
				Bytes.toBytes("JiKang7"));
		put7.add(Bytes.toBytes("baseinfo"), Bytes.toBytes("age"),
				Bytes.toBytes("15"));
		put7.add(Bytes.toBytes("contacts"), Bytes.toBytes("address"),
				Bytes.toBytes("甘肃天水"));
		// 设置行健值 ；设置列族、列、cell值
		Put put8 = new Put(Bytes.toBytes("rk08"));
		put8.add(Bytes.toBytes("baseinfo"), Bytes.toBytes("name"),
				Bytes.toBytes("JiKang8"));
		put8.add(Bytes.toBytes("baseinfo"), Bytes.toBytes("age"),
				Bytes.toBytes("25"));
		put8.add(Bytes.toBytes("contacts"), Bytes.toBytes("address"),
				Bytes.toBytes("西藏拉萨"));
		

		// 四、构造List<Put>
		List<Put> listPut = new ArrayList<Put>();
		listPut.add(put1);
		listPut.add(put2);
		listPut.add(put3);
		listPut.add(put4);
		listPut.add(put5);
		listPut.add(put6);
		listPut.add(put7);
		listPut.add(put8);

		// 五、插入多行数据
		table.put(listPut);

		// 五、释放资源
		table.close();
	}
	}

1.6 在linux终端下启动hadoop服务和hbase服务
通过`start-all.sh`启动Hadoop服务，并通过`cd /simple/hbase-0.96-2-hadoop2/bin`密令进入Hbase的bin目录下
`./start-hbase.sh`启动Hbase服务。通过`jps`查看是否启动成功。
1.7 先后运行创建表和创建数据的类

### 二、程序编写
2.1 创建Java工程。
在eclipse中的项目列表中，右键点击，选择“new“—>”Java Project…”新建一个项目“SingleColumnValueFilter” 。 如图5所示。

2.2 创建Java类。
在项目src目录下，右键点击，选择“新建”创建一个类文件名称为“SingleColumnValueFilterTest”，并指定包名” com.simple.filter” 。如图6所示。

2.3 复制hbase相关jar包到lib文件夹。
在编写“SingleColumnValueFilterTest”类之前需要把hbase相关的jar包导入，
首先在项目根目录下创建一个文件夹lib，把hbase相关jar包复制到该文件中 。如图7所示。

2.4 将lib下所有的jar包导入到项目环境中。
首先全选lib文件夹下的jar包文件，右键点击，选择“build path”-->“add to build path”。
添加后，发现jar包被引用到了工程的Referenced Libraries中。如图8所示。

2.5 创建程序的入口main方法。
在类“SingleColumnValueFilterTest”中编写程序的入口main方法。如图9所示。
2.6 编写代码如下。

	package com.simple.filter;

	import java.io.IOException;

	import org.apache.hadoop.conf.Configuration;
	import org.apache.hadoop.hbase.HBaseConfiguration;
	import org.apache.hadoop.hbase.client.HTable;
	import org.apache.hadoop.hbase.client.Result;
	import org.apache.hadoop.hbase.client.ResultScanner;
	import org.apache.hadoop.hbase.client.Scan;
	import org.apache.hadoop.hbase.filter.BinaryPrefixComparator;
	import org.apache.hadoop.hbase.filter.CompareFilter;
	import org.apache.hadoop.hbase.filter.FilterList;
	import org.apache.hadoop.hbase.filter.SingleColumnValueFilter;
	import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp;
	import org.apache.hadoop.hbase.util.Bytes;

	public class SingleColumnValueFilterTest {

	public void testRowFilter() throws IOException {

		// 一、配置文件设置
		// 创建用于客户端的配置类实例
		Configuration config = HBaseConfiguration.create();
		// 设置连接zookeeper的地址
		// hbase客户端连接的是zookeeper
		config.set("hbase.zookeeper.quorum", "192.168.0.131:2181");

		// 二、 获得要操作的表的对象。
		// 第一个参数"config"为配置文件；第二个参数"account3"为数据库中的表名。
		HTable table = new HTable(config, "account3");
		// 三、创建Scan对象
		Scan scan = new Scan();

		// 四-1、查询列baseinfo:name的值等于"JiKang3"的数据
		System.out.println("列baseinfo:name的值等于JiKang3的数据");
		// 创建FilterList对象。
		FilterList filterList1 = new FilterList();
		// 设置过滤器。第一个参数为列族名称；第二个参数为列名称；第三个参数为CompareOp；第四个参数为要设置的条件的值。
		SingleColumnValueFilter singleColumnValueFilter1 = new SingleColumnValueFilter(
				Bytes.toBytes("baseinfo"), Bytes.toBytes("name"),
				CompareOp.EQUAL, Bytes.toBytes("JiKang3"));
		// 将SingleColumnValueFilter对象添加到FilterList。
		filterList1.addFilter(singleColumnValueFilter1);
		// 将FilterList对象设置到Scan。
		scan.setFilter(filterList1);
		// scan.addColumn(b_family, b_qual);
		ResultScanner scanner1 = table.getScanner(scan);
		// 遍历结果
		for (Result res : scanner1) {
			System.out.println("姓名==>"
					+ Bytes.toString(res.getValue(Bytes.toBytes
					("baseinfo"),
							Bytes.toBytes("name")))
					+ "；地址==>"
					+ Bytes.toString(res.getValue(Bytes.toBytes
					("contacts"),
							Bytes.toBytes("address")))
					+ "；年龄==>"
					+ Bytes.toString(res.getValue(Bytes.toBytes
					("baseinfo"),
							Bytes.toBytes("age")))		
					);
		}
		// 释放ResultScanner资源
		scanner1.close();

		// 四-2、查询列contacts:address的值以"甘肃"开头的数据
		System.out.println("列contacts:address的值以甘肃开头的数据");
		// 创建FilterList对象。
		FilterList filterList2 = new FilterList();
		// 设置过滤器。第一个参数为列族名称；第二个参数为列名称；第三个参数为CompareOp；第四个参数为要设置的条件的值。
		SingleColumnValueFilter singleColumnValueFilter2 = new SingleColumnValueFilter(
				Bytes.toBytes("contacts"), Bytes.toBytes("address"),
				CompareFilter.CompareOp.EQUAL, new BinaryPrefixComparator(
						Bytes.toBytes("甘肃")));

		// 将SingleColumnValueFilter对象添加到FilterList。
		filterList2.addFilter(singleColumnValueFilter2);
		// 将FilterList对象设置到Scan。
		scan.setFilter(filterList2);
		// scan.addColumn(b_family, b_qual);
		ResultScanner scanner2 = table.getScanner(scan);
		// 遍历结果
		for (Result res : scanner2) {
			System.out.println("姓名==>"
					+ Bytes.toString(res.getValue(Bytes.toBytes
					("baseinfo"),
							Bytes.toBytes("name")))
					+ "；地址==>"
					+ Bytes.toString(res.getValue(Bytes.toBytes
					("contacts"),
							Bytes.toBytes("address")))
					+ "；年龄==>"
					+ Bytes.toString(res.getValue(Bytes.toBytes
					("baseinfo"),
							Bytes.toBytes("age")))		
					);
		}
		// 释放ResultScanner资源
		scanner2.close();

		// 四-3、查询列baseinfo:age的值大于30的数据
		System.out.println("列baseinfo:age的值大于30的数据");
		// 创建FilterList对象。
		FilterList filterList3 = new FilterList();
		// 设置过滤器。第一个参数为列族名称；第二个参数为列名称；第三个参数为CompareOp；第四个参数为要设置的条件的值。
		SingleColumnValueFilter singleColumnValueFilter3 = new SingleColumnValueFilter(
				Bytes.toBytes("baseinfo"), Bytes.toBytes("age"),
				CompareOp.GREATER, Bytes.toBytes("30"));
		// 将SingleColumnValueFilter对象添加到FilterList。
		filterList3.addFilter(singleColumnValueFilter3);
		// 将FilterList对象设置到Scan。
		scan.setFilter(filterList3);
		// scan.addColumn(b_family, b_qual);
		ResultScanner scanner3 = table.getScanner(scan);
		// 遍历结果
		for (Result res : scanner3) {
			System.out.println("姓名==>"
					+ Bytes.toString(res.getValue(Bytes.toBytes
					("baseinfo"),
							Bytes.toBytes("name")))
					+ "；地址==>"
					+ Bytes.toString(res.getValue(Bytes.toBytes
					("contacts"),
							Bytes.toBytes("address")))
					+ "；年龄==>"
					+ Bytes.toString(res.getValue(Bytes.toBytes
					("baseinfo"),
							Bytes.toBytes("age")))		
					);
		}
		// 释放ResultScanner资源
		scanner3.close();

		// 五、释放HTable资源
		table.close();
	}

	/**
	 * @param args
	 * @throws IOException
	 */
	public static void main(String[] args) throws IOException {
		// 创建测试类实例
		SingleColumnValueFilterTest test = new SingleColumnValueFilterTest();
		// 调用测试代码
		test.testRowFilter();
	}
}

### 三、程序测试
3.1 执行代码。
选中测试类SingleColumnValueFilterTest，右键点击选择”Run as”-->”Java Application”,程序将执行。
查看控制台打印的日志，可以查看到运行结果。如图10所示。

3.2 注：如果控制台打印如下日志java.io.IOException: Could not locate executable null\bin\winutils.exe in the Hadoop binaries.无需理会。
该异常是window平台没有查找到winutils.exe所打印的提示。如图11所示。

3.3 查看结果。
控制台打印结果如图12所示。

# Hbase专用过滤器PageFilter的使用
##【实验目的】
1)掌握Hbase Java API开发环境
2)学会使用PageFilter进行分页查询操作
##【实验原理】
1、建立Java工程，调用Java API进行行健过滤器操作。
2、PageFilter是用来进行分页过滤查询的。
比较符如下：
Operator	Description
LESS	小于
LESS_OR_EQUAL	小于等于
EQUAL	等于
NOT_EQUAL	不等于
GREATER_OR_EQUAL	大于等于
GREATER	大于
NO_OP	排除所有
比较器如下：
Comparator	Description
BinaryComparator	使用Bytes.compareTo()比较
BinaryPrefixComparator	和BinaryComparator差不多，从前面开始比较
NullComparator	Does not compare against an actual value but whether a given one is null, or not null.
BitComparator	Performs a bitwise comparison, providing a BitwiseOp class with AND, OR, and XOR operators.
RegexStringComparator	正则表达式
SubstringComparator	把数据当成字符串，用contains()来判断

##【实验环境】
本次环境是：centos6.5+jdk1.7.0_79+hbase0.96+eclipse
  host01是计算机名称对应ip地址为 192.168.0.131，可以在/etc/hosts文件中查看映射关系
工具在/simple/soft目录下
##【实验步骤】
### 一、准备阶段
1.1 配置主机名和IP的映射关系。
如果你在Windows平台下使用Eclipse开发并执行代码调用Linux平台的Hbase，则需要配置主机名和IP的映射关系。Linux上部署Hbase的主机名为host01，IP为192.168.0.131。
需要在C:\Windows\System32\drivers\etc目录下的hosts文件中添加内容192.168.0.131	host01。如图1所示。
1.2 准备用于开发Java程序的eclipse。
如图2所示。
1.3 准备项目中所需的Hbase包。如图3所示。
包的位置：$HBASE_HOME/lib。如图4所示。
1.4 准备用于测试的表和数据。
创建表的代码如下。
含义：创建表account4，含有两个列族分别是baseInfo、contacts。

	package com.simple.create;

	import java.io.IOException;

	import org.apache.hadoop.conf.Configuration;
	import org.apache.hadoop.hbase.HBaseConfiguration;
	import org.apache.hadoop.hbase.HColumnDescriptor;
	import org.apache.hadoop.hbase.HTableDescriptor;
	import org.apache.hadoop.hbase.TableName;
	import org.apache.hadoop.hbase.client.HBaseAdmin;

	public class CreateTable4 {
	
	public static void main(String[] args) throws IOException {
		//一、配置文件设置
		//创建用于客户端的配置类实例
		Configuration config = HBaseConfiguration.create();
		//设置连接zookeeper的地址
		//hbase客户端连接的是zookeeper
		config.set("hbase.zookeeper.quorum", "192.168.0.131:2181");
		
		//二、表描述相关信息
		//创建表描述器并命名表名为account4
		HTableDescriptor tableDesc = new HTableDescriptor(TableName.valueOf("account4"));
		//创建列族描述器并命名一个列族名为baseInfo
		HColumnDescriptor columnDesc1 = new HColumnDescriptor("baseinfo");
		//设置列族的最大版本数
		columnDesc1.setMaxVersions(5);
		//创建列族描述器并命名一个列族名为baseInfo
		HColumnDescriptor columnDesc2 = new HColumnDescriptor("contacts");
		//设置列族的最大版本数
		columnDesc2.setMaxVersions(3);
		//添加一个列族给表
		tableDesc.addFamily(columnDesc1);
		//添加一个列族给表
		tableDesc.addFamily(columnDesc2);
		
		//三、实例化HBaseAdmin、创建表
		//根据配置文件创建HBaseAdmin对象
		HBaseAdmin hbaseAdmin = new HBaseAdmin(config);
		//创建表
		hbaseAdmin.createTable(tableDesc);
		
		//四、释放资源
		hbaseAdmin.close();
	}
	}

1.5 创建数据的代码如下。
含义：向表account4中插入8条数据，行健从rk01开始到rk08。列baseinfo:name的值从JiKang1开始到JiKang8，列baseinfo:age的值为数字，列contacts:address存储省+城市名称。

	package com.simple.put;

	import java.io.IOException;
	import java.util.ArrayList;
	import java.util.List;

	import org.apache.hadoop.conf.Configuration;
	import org.apache.hadoop.hbase.HBaseConfiguration;
	import org.apache.hadoop.hbase.client.HTable;
	import org.apache.hadoop.hbase.client.Put;
	import org.apache.hadoop.hbase.util.Bytes;

	public class PutListTest4 {

	public static void main(String[] args) throws IOException {
		// 一、配置文件设置
		// 创建用于客户端的配置类实例
		Configuration config = HBaseConfiguration.create();
		// 设置连接zookeeper的地址
		// hbase客户端连接的是zookeeper
		config.set("hbase.zookeeper.quorum", "192.168.0.131:2181");

		// 二、 获得要操作的表的对象。
		// 第一个参数"config"为配置文件；第二个参数"account4"为数据库中的表名。
		HTable table = new HTable(config, "account4");

		// 三、设置Put对象
		// 设置行健值 ；设置列族、列、cell值
		Put put1 = new Put(Bytes.toBytes("rk01"));
		put1.add(Bytes.toBytes("baseinfo"), Bytes.toBytes("name"),
				Bytes.toBytes("JiKang1"));
		put1.add(Bytes.toBytes("baseinfo"), Bytes.toBytes("age"),
				Bytes.toBytes("33"));
		put1.add(Bytes.toBytes("contacts"), Bytes.toBytes("address"),
				Bytes.toBytes("北京通州"));
		// 设置行健值 ；设置列族、列、cell值
		Put put2 = new Put(Bytes.toBytes("rk02"));
		put2.add(Bytes.toBytes("baseinfo"), Bytes.toBytes("name"),
				Bytes.toBytes("JiKang2"));
		put2.add(Bytes.toBytes("baseinfo"), Bytes.toBytes("age"),
				Bytes.toBytes("26"));
		put2.add(Bytes.toBytes("contacts"), Bytes.toBytes("address"),
				Bytes.toBytes("上海浦东"));
		// 设置行健值 ；设置列族、列、cell值
		Put put3 = new Put(Bytes.toBytes("rk03"));
		put3.add(Bytes.toBytes("baseinfo"), Bytes.toBytes("name"),
				Bytes.toBytes("JiKang3"));
		put3.add(Bytes.toBytes("baseinfo"), Bytes.toBytes("age"),
				Bytes.toBytes("89"));
		put3.add(Bytes.toBytes("contacts"), Bytes.toBytes("address"),
				Bytes.toBytes("甘肃兰州"));
		// 设置行健值 ；设置列族、列、cell值
		Put put4 = new Put(Bytes.toBytes("rk04"));
		put4.add(Bytes.toBytes("baseinfo"), Bytes.toBytes("name"),
				Bytes.toBytes("JiKang4"));
		put4.add(Bytes.toBytes("baseinfo"), Bytes.toBytes("age"),
				Bytes.toBytes("23"));
		put4.add(Bytes.toBytes("contacts"), Bytes.toBytes("address"),
				Bytes.toBytes("河北沧州"));
		// 设置行健值 ；设置列族、列、cell值
		Put put5 = new Put(Bytes.toBytes("rk05"));
		put5.add(Bytes.toBytes("baseinfo"), Bytes.toBytes("name"),
				Bytes.toBytes("JiKang5"));
		put5.add(Bytes.toBytes("baseinfo"), Bytes.toBytes("age"),
				Bytes.toBytes("90"));
		put5.add(Bytes.toBytes("contacts"), Bytes.toBytes("address"),
				Bytes.toBytes("天津滨海"));
		// 设置行健值 ；设置列族、列、cell值
		Put put6 = new Put(Bytes.toBytes("rk06"));
		put6.add(Bytes.toBytes("baseinfo"), Bytes.toBytes("name"),
				Bytes.toBytes("JiKang6"));
		put6.add(Bytes.toBytes("baseinfo"), Bytes.toBytes("age"),
				Bytes.toBytes("55"));
		put6.add(Bytes.toBytes("contacts"), Bytes.toBytes("address"),
				Bytes.toBytes("河南郑州"));
		// 设置行健值 ；设置列族、列、cell值
		Put put7 = new Put(Bytes.toBytes("rk07"));
		put7.add(Bytes.toBytes("baseinfo"), Bytes.toBytes("name"),
				Bytes.toBytes("JiKang7"));
		put7.add(Bytes.toBytes("baseinfo"), Bytes.toBytes("age"),
				Bytes.toBytes("15"));
		put7.add(Bytes.toBytes("contacts"), Bytes.toBytes("address"),
				Bytes.toBytes("甘肃天水"));
		// 设置行健值 ；设置列族、列、cell值
		Put put8 = new Put(Bytes.toBytes("rk08"));
		put8.add(Bytes.toBytes("baseinfo"), Bytes.toBytes("name"),
				Bytes.toBytes("JiKang8"));
		put8.add(Bytes.toBytes("baseinfo"), Bytes.toBytes("age"),
				Bytes.toBytes("25"));
		put8.add(Bytes.toBytes("contacts"), Bytes.toBytes("address"),
				Bytes.toBytes("西藏拉萨"));
		

		// 四、构造List<Put>
		List<Put> listPut = new ArrayList<Put>();
		listPut.add(put1);
		listPut.add(put2);
		listPut.add(put3);
		listPut.add(put4);
		listPut.add(put5);
		listPut.add(put6);
		listPut.add(put7);
		listPut.add(put8);

		// 五、插入多行数据
		table.put(listPut);

		// 五、释放资源
		table.close();
	}
	}

1.6 在linux终端下启动hadoop服务和hbase服务
通过`start-all.sh`启动Hadoop服务，并通过`cd /simple/hbase-0.96-2-hadoop2/bin`密令进入Hbase的bin目录下`./start-hbase.sh`启动Hbase服务。通过`jps`查看是否启动成功。
1.7 先后运行创建表和创建数据的类

### 二、程序编写
2.1 创建Java工程。
在eclipse中的项目列表中，右键点击，选择“new“—>”Java Project…”新建一个项目“PageFilter”。如图5所示。
2.2 创建Java类。
在项目src目录下，右键点击，选择“新建”创建一个类文件名称为“PageFilterTest”，并指定包名” com.simple.filter” 。如图6所示。

2.3 复制hbase相关jar包到lib文件夹。
在编写“PageFilterTest”类之前需要把hbase相关的jar包导入，首先在项目根目录下创建一个文件夹lib，把hbase相关jar包复制到该文件中 。如图7所示。

2.4 将lib下所有的jar包导入到项目环境中。
首先全选lib文件夹下的jar包文件，右键点击，选择“build path”-->“add to build path”。添加后，
发现jar包被引用到了工程的Referenced Libraries中。如图8所示。

2.5 创建程序的入口main方法。
在类“PageFilterTest”中编写程序的入口main方法。如图9所示。
<center>![](/UploadImage/2016/6/16/153725965247176601.png)</center>
<center>图9</center>
编写代码如下。

	package com.simple.filter;

	import java.io.IOException;

	import org.apache.hadoop.conf.Configuration;
	import org.apache.hadoop.hbase.HBaseConfiguration;
	import org.apache.hadoop.hbase.client.HTable;
	import org.apache.hadoop.hbase.client.Result;
	import org.apache.hadoop.hbase.client.ResultScanner;
	import org.apache.hadoop.hbase.client.Scan;
	import org.apache.hadoop.hbase.filter.Filter;
	import org.apache.hadoop.hbase.filter.PageFilter;
	import org.apache.hadoop.hbase.util.Bytes;

	public class PageFilterTest {
	
	public void testFilter() throws IOException {

		// 一、配置文件设置
		// 创建用于客户端的配置类实例
		Configuration config = HBaseConfiguration.create();
		// 设置连接zookeeper的地址
		// hbase客户端连接的是zookeeper
		config.set("hbase.zookeeper.quorum", "192.168.0.131:2181");

		// 二、 获得要操作的表的对象。
		// 第一个参数"config"为配置文件；第二个参数"account3"为数据库中的表名。
		HTable table = new HTable(config, "account4");
		
		// 三、创建Scan对象
		// 1、创建过滤器PageFilter。该过滤器表示按行分页。参数3表示每个分页有3行记录。
		Filter filter = new PageFilter(3);  
		// POSTFIX=0
		final byte[] POSTFIX = new byte[] { 0x00 };
		int totalRows = 0;
		byte[] lastRow = null;  
        // 2、进入循环。为了演示效果，这里遍历所有符合条件的数据，需要循环输出。
        while (true) {  
        	// 3、初始化Scan实例。该实例用于查询符合条件的数据。
            Scan scan = new Scan();  
            // 4、设置过滤器。将前面创建好的分页过滤器设置到Scan实例中。
            scan.setFilter(filter);
            // 5、设置遍历的开始位置。即表示开始的行健位置，如果是第一次循环（即第一页），则不进入该语句块。
            if(lastRow != null){  
                //注意这里添加了POSTFIX操作，不然死循环了  
                byte[] startRow = Bytes.add(lastRow, POSTFIX);  
                System.out.println("start row:"+Bytes.toStringBinary(startRow));//
                scan.setStartRow(startRow);  
            } 
            // 6、执行查询。使用HTable实例执行扫描查询，并且将扫描结果输出，并且给行健遍历赋值。
            ResultScanner scanner = table.getScanner(scan);  
            int localRows = 0;  
            Result result;
            // 输出一页的结果。
            while((result = scanner.next()) != null){  
                //System.out.println(localRows++ + ":" + result);  
            	System.out.println(result+"==>"
	+Bytes.toString(result.getValue(
	Bytes.toBytes("baseinfo"), Bytes.toBytes("name")))
    					+"==>"+Bytes.toString(result.getValue(
						
						Bytes.toBytes("contacts"), Bytes.toBytes("address"))));
                totalRows ++;  
                localRows ++;//
                lastRow = result.getRow();  
            }
            System.out.println("");
            // 7、关闭ResultScanner实例。
            scanner.close(); 
            // 8、跳出循环条件
            if(localRows == 0) break;  
        }  
        System.out.println("total rows:" + totalRows);
        
		// 五、释放HTable资源
		table.close();
	}

	/**
	 * @param args
	 * @throws IOException
	 */
	public static void main(String[] args) throws IOException {
		// 创建测试类实例
		PageFilterTest test = new PageFilterTest();
		// 调用测试代码
		test.testFilter();
	}
	}

### 三、程序测试
3.1 执行代码。
选中测试类PageFilterTest，右键点击选择”Run as”-->”Java Application”,程序将执行。查看控制台打印的日志，可以查看到运行结果。如图10所示。
3.2 查看结果。
控制台打印结果如图11所示。共8行分3页输出。

posted on 2020-09-01 23:58 小希米七阅读(540) 评论(0) 收藏举报

刷新页面返回顶部

公告