HBASE_HAD
HBASE操作
hadoop@dblab-VirtualBox:~$ start-all.sh hadoop@dblab-VirtualBox:~$ jps hadoop@dblab-VirtualBox:~$ start-hbase.sh hadoop@dblab-VirtualBox:~$ hbase shell hbase(main):001:0> list hbase(main):002:0> disable 'student' hbase(main):003:0> drop 'student' hbase(main):004:0> list hbase(main):005:0> create 'student','info' hbase(main):006:0> list hbase(main):007:0> put 'student','1','info:name','zhangsan' hbase(main):008:0> put 'student','1','info:gender','Female' hbase(main):009:0> put 'student','1','info:age','23' hbase(main):010:0> put 'student','2','info:name','lisi' hbase(main):011:0> put 'student','2','info:gender','Male' hbase(main):012:0> put 'student','2','info:age','24' hbase(main):013:0> scan 'student' hbase(main):014:0> get 'student','1' hbase(main):015:0> get 'student','1','info:name' hbase(main):016:0> exit hadoop@dblab-VirtualBox:~$
SparkOperateHBase.scala
package dblab.SparkHbaseDemo
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.hbase._
import org.apache.hadoop.hbase.client._
import org.apache.hadoop.hbase.mapreduce.TableInputFormat
import org.apache.hadoop.hbase.util.Bytes
import org.apache.spark.SparkContext
import org.apache.spark.SparkContext._
import org.apache.spark.SparkConf
object SparkOperateHBase
{
def main(args: Array[String])
{
val conf = HBaseConfiguration.create()
val sc = new SparkContext(new SparkConf().setAppName("SparkOperateHBase").setMaster("local"))
//设置查询的表名
conf.set(TableInputFormat.INPUT_TABLE, "student")
val stuRDD = sc.newAPIHadoopRDD(conf, classOf[TableInputFormat],
classOf[org.apache.hadoop.hbase.io.ImmutableBytesWritable],
classOf[org.apache.hadoop.hbase.client.Result])
val count = stuRDD.count()
println("Students RDD Count:" + count)
stuRDD.cache()
//遍历输出
stuRDD.foreach({ case (_,result) =>
val key = Bytes.toString(result.getRow)
val name = Bytes.toString(result.getValue("info".getBytes,"name".getBytes))
val gender = Bytes.toString(result.getValue("info".getBytes,"gender".getBytes))
val age = Bytes.toString(result.getValue("info".getBytes,"age".getBytes))
println("Row key:"+key+" Name:"+name+" Gender:"+gender+" Age:"+age)
})
}
}
SparkWriteHBase.scala
package dblab.SparkHbaseDemo
import org.apache.hadoop.hbase.HBaseConfiguration
import org.apache.hadoop.hbase.mapreduce.TableOutputFormat
import org.apache.spark._
import org.apache.hadoop.mapreduce.Job
import org.apache.hadoop.hbase.io.ImmutableBytesWritable
import org.apache.hadoop.hbase.client.Result
import org.apache.hadoop.hbase.client.Put
import org.apache.hadoop.hbase.util.Bytes
object SparkWriteHBase
{
def main(args: Array[String]): Unit =
{
val sparkConf = new SparkConf().setAppName("SparkWriteHBase").setMaster("local")
val sc = new SparkContext(sparkConf)
sc.hadoopConfiguration.set(TableOutputFormat.OUTPUT_TABLE, "student")
val job = new Job(sc.hadoopConfiguration)
job.setOutputKeyClass(classOf[ImmutableBytesWritable])
job.setOutputValueClass(classOf[Result])
job.setOutputFormatClass(classOf[TableOutputFormat[ImmutableBytesWritable]])
//下面这行代码用于构建两行记录
val indataRDD = sc.makeRDD(Array("4,wangwu,Male,26","5,chengxin,Female,27"))
val rdd = indataRDD.map(_.split(',')).map{arr => {
//设置行健的值
val put = new Put(Bytes.toBytes(arr(0)))
//设置info:name列的值\
put.add(Bytes.toBytes("info"),Bytes.toBytes("name"),Bytes.toBytes(arr(1)))
//设置info:gender列的值
put.add(Bytes.toBytes("info"),Bytes.toBytes("gender"),Bytes.toBytes(arr(2)))
//设置info:age列的值
put.add(Bytes.toBytes("info"),Bytes.toBytes("age"),Bytes.toBytes(arr(3).toInt))
//构建一个键值对,作为rdd的一个元素
(new ImmutableBytesWritable, put)
}}
rdd.saveAsNewAPIHadoopDataset(job.getConfiguration())
}
}
POX.XML
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd"> <modelVersion>4.0.0</modelVersion> <groupId>dblab</groupId> <artifactId>SparkHbaseDemo</artifactId> <version>0.0.1-SNAPSHOT</version> <name>${project.artifactId}</name> <properties> <maven.compiler.source>1.6</maven.compiler.source> <maven.compiler.target>1.6</maven.compiler.target> <encoding>UTF-8</encoding> <scala.version>2.11</scala.version> <spark.version>2.1.0</spark.version> <hbase.version>1.1.5</hbase.version> </properties> <dependencies> <!-- Spark --> <dependency> <groupId>org.apache.spark</groupId> <artifactId>spark-core_${scala.version}</artifactId> <version>${spark.version}</version> </dependency> <!-- Hbase --> <dependency> <groupId>org.apache.hbase</groupId> <artifactId>hbase-server</artifactId> <version>${hbase.version}</version> </dependency> <dependency> <groupId>org.apache.hbase</groupId> <artifactId>hbase-common</artifactId> <version>${hbase.version}</version> </dependency> <dependency> <groupId>org.apache.hbase</groupId> <artifactId>hbase-client</artifactId> <version>${hbase.version}</version> </dependency> </dependencies> <build> <sourceDirectory>src/main/scala</sourceDirectory> <testSourceDirectory>src/test/scala</testSourceDirectory> </build> </project>
浙公网安备 33010602011771号