import org.apache.hadoop.hbase.HBaseConfiguration
import org.apache.hadoop.hbase.mapreduce.TableInputFormat
import org.apache.hadoop.hbase.util.Bytes
import org.apache.spark.{SparkConf, SparkContext}
/**
* 读取HBase表数据
*/
object SparkOperateHBase {
def main(args: Array[String]): Unit = {
val conf = HBaseConfiguration.create()
val sc = new SparkContext(new SparkConf())
conf.set(TableInputFormat.INPUT_TABLE,"student")
val stuRDD = sc.newAPIHadoopRDD(conf, classOf[TableInputFormat],
classOf[org.apache.hadoop.hbase.io.ImmutableBytesWritable],
classOf[org.apache.hadoop.hbase.client.Result])
stuRDD.cache()
val count = stuRDD.count()
println("Students RDDCount: " + count)
//读取HBase表数据并打印出来
stuRDD.foreach({case (_,result) =>
val key = Bytes.toString(result.getRow)
val name = Bytes.toString(result.getValue("info".getBytes,"name".getBytes()))
val gender = Bytes.toString(result.getValue("info".getBytes,"gender".getBytes()))
val age = Bytes.toString(result.getValue("info".getBytes,"age".getBytes()))
println("Row key:" + key + " Name: " + name + " Gender: " + gender + " Age: " + age)
})
//读取HBase表数据并转为RDD
val resRDD = stuRDD.map(res => {
val key = Bytes.toString(res._2.getRow)
val name = Bytes.toString(res._2.getValue("info".getBytes,"name".getBytes()))
val gender = Bytes.toString(res._2.getValue("info".getBytes,"gender".getBytes()))
val age = Bytes.toString(res._2.getValue("info".getBytes,"age".getBytes()))
(key, name, gender, age)
})
}
}