封装工具类
今天把项目所需一些工具类进行封装,主要包括一下工具类
配置文件加载工具
- ES相关工具
- Redis 工具
- kafka 工具
- Offset 维护工具
1、配置文件加载工具类
配置文件主要记录些 redis kfka 等相关组件的配置信息,需要有一个工具来对配置文件进行加载。
import java.io.InputStreamReader
import java.nio.charset.StandardCharsets
import java.util.Properties
/**
* 从配置文件读取 配置信息
*/
object MyPropertiesUtil {
def main(args: Array[String]): Unit = {
val properties: Properties = MyPropertiesUtil.load("config.properties")
println(properties.getProperty("kafka.broker.list"))
}
def load(propertiesName: String): Properties = {
val properties: Properties = new Properties()
//加载指定配置文件库:从类路径下加载配置文件,因为文件编译后
// 会将 resource 下的 conf.propertis 加载到 target 下,此时可以通过当前类的类加载器进行加载配置文件
properties.load(new InputStreamReader(
Thread.currentThread().getContextClassLoader.getResourceAsStream(propertiesName), StandardCharsets.UTF_8))
properties
}
}
2、Redis 工具类
这个小项目使用redis 的地方是使用 redis 进行高效日活剔重,和kfka的偏移量维护等,工具类主要方法如下:
object MyRedisUtil {
//声明连接对象
private var jedisPool: JedisPool = null;
//创建 jedisPool
def build() = {
val config = MyPropertiesUtil.load("config.properties")
val host: String = config.getProperty("redis.host")
val port: String = config.getProperty("redis.port")
val jedisPoolConfig = new JedisPoolConfig()
jedisPoolConfig.setMaxTotal(100) //最大连接数
jedisPoolConfig.setMaxIdle(20) //最大空闲
jedisPoolConfig.setMinIdle(20) //最小空闲
jedisPoolConfig.setBlockWhenExhausted(true) //忙碌时是否等待
jedisPoolConfig.setMaxWaitMillis(10000) //忙碌时等待时长 毫秒
jedisPoolConfig.setTestOnBorrow(true) //每次获得连接的进行测试
jedisPool = new JedisPool(jedisPoolConfig, host, port.toInt)
}
//获取 jedis 客户端
def getJedisClient(): Jedis = {
//判断 jedisPool 是否为空
if (jedisPool == null) {
build()
}
jedisPool.getResource
}
}
3、Kafka工具类
import java.util.Properties
import org.apache.kafka.clients.consumer.ConsumerRecord
import org.apache.kafka.common.TopicPartition
import org.apache.kafka.common.serialization.StringDeserializer
import org.apache.spark.streaming.dstream.InputDStream
import org.apache.spark.streaming.{StreamingContext}
import org.apache.spark.streaming.kafka010.{ConsumerStrategies, KafkaUtils, LocationStrategies}
/**
* 读取Kafka工具
*/
object MyKafkaUtil {
//通过工具类加载配置文件
val properties: Properties = MyPropertiesUtil.load("config.properties")
val broker_list: String = properties.getProperty("kafka.broker.list")
// kafka消费者配置
var kafkaParam = collection.mutable.Map(
"bootstrap.servers" -> broker_list, //用于初始化链接到集群的地址
"key.deserializer" -> classOf[StringDeserializer],
"value.deserializer" -> classOf[StringDeserializer],
//用于标识这个消费者属于哪个消费团体
"group.id" -> "gmall0429_group",
//latest自动重置偏移量为最新的偏移量
"auto.offset.reset" -> "latest",
//如果是true,则这个消费者的偏移量会在后台自动提交,但是kafka宕机容易丢失数据
//如果是false,会需要手动维护kafka偏移量
"enable.auto.commit" -> (false: java.lang.Boolean))
// 创建DStream,返回接收到的输入数据
def getKafkaStream(topic: String, ssc: StreamingContext): InputDStream[ConsumerRecord[String, String]] = {
val dStream = KafkaUtils.createDirectStream[String, String](
ssc,
LocationStrategies.PreferConsistent,
ConsumerStrategies.Subscribe[String, String](Array(topic), kafkaParam)
)
dStream
}
def getKafkaStream(topic: String, ssc: StreamingContext, groupId: String): InputDStream[ConsumerRecord[String, String]] = {
kafkaParam("group.id") = groupId
val dStream = KafkaUtils.createDirectStream[String, String](
ssc,
LocationStrategies.PreferConsistent,
ConsumerStrategies.Subscribe[String, String](Array(topic), kafkaParam))
dStream
}
def getKafkaStream(topic: String, ssc: StreamingContext, offsets: Map[TopicPartition, Long], groupId: String): InputDStream[ConsumerRecord[String, String]] = {
kafkaParam("group.id") = groupId
val dStream = KafkaUtils.createDirectStream[String, String](
ssc,
LocationStrategies.PreferConsistent,
ConsumerStrategies.Subscribe[String, String](Array(topic), kafkaParam, offsets))
dStream
}
/**
* LocationStrategies 位置策略
* PreferConsistent 大部分集群采用这中策略:根据节点和 excetor 距离自行判断
* PreferBrokers 只有在节点和 excetor 在同一节点才使用
* */
}
4、ES 工具类
import java.util
import io.searchbox.client.config.HttpClientConfig
import io.searchbox.client.{JestClient, JestClientFactory}
import io.searchbox.core._
import org.elasticsearch.index.query.{BoolQueryBuilder, MatchQueryBuilder, TermQueryBuilder}
import org.elasticsearch.search.builder.SearchSourceBuilder
import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder
import org.elasticsearch.search.sort.SortOrder
import org.wdh01.gmall.realtime.bean.DauInfo
/**
* 操作 ES 工具类
*/
object MyESutil {
//声明 jest 客户端工厂
private var jestFactory: JestClientFactory = null;
def build() = {
jestFactory = new JestClientFactory
jestFactory.setHttpClientConfig(new HttpClientConfig
.Builder("http://hadoop201:9200") //ES 连接地址
.multiThreaded(true) //开启多线程处理
.maxTotalConnection(200) //对大连接数
.connTimeout(10000) //链接等待时间
.readTimeout(10000) //操作等待时间
.build()
)
}
//获取客户端
def getJestClient(): JestClient = {
//如果连接工厂为空,调用 build() 创建工厂,否则直接返回对象
if (jestFactory == null) {
//创建客户端工厂对象
build();
}
jestFactory.getObject
}
/**
* 批量保存
*
* @param dauInfolist
* @param indexName
*/
def bulkInsert(dauInfolist: List[(String, DauInfo)], indexName: String) = {
//首先判断当前采集周期是否有数据
if (dauInfolist != null && dauInfolist.size > 0) {
//获取客户端链接
val client: JestClient = getJestClient()
val builder: Bulk.Builder = new Bulk.Builder()
for ((id, dauInfo) <- dauInfolist) {
val index: Index = new Index.Builder(dauInfo)
.index(indexName)
.id(id)
.`type`("_doc")
.build()
builder.addAction(index)
}
//创建批量保存对象
val bulk: Bulk = builder.build()
val bulkRes: BulkResult = client.execute(bulk)
//输出查看 保存ES 记录数
println("ES 插入 " + bulkRes.getItems.size() + " 条数据...")
//关闭资源
client.close()
}
}
// 查询多个文档数据 方式2
def queryDos1() = {
//获取客户端链接
val client: JestClient = getJestClient()
//用于构建查询JSONM格式字符串
val searchSourceBuilder: SearchSourceBuilder = new SearchSourceBuilder
val boolQueryBuilder: BoolQueryBuilder = new BoolQueryBuilder()
boolQueryBuilder.must(new MatchQueryBuilder("name", "人"))
boolQueryBuilder.filter(new TermQueryBuilder("actorList.name", "殷桃"))
searchSourceBuilder.query(boolQueryBuilder)
searchSourceBuilder.from(0)
searchSourceBuilder.size(1)
searchSourceBuilder.sort("doubanScore", SortOrder.DESC)
searchSourceBuilder.highlighter(new HighlightBuilder().field("name"))
val query: String = searchSourceBuilder.toString()
// println(query)
//封装 search 对象
val search: Search = new Search.Builder(query)
.addIndex("movie_index")
.build()
val result: SearchResult = client.execute(search)
val list: util.List[SearchResult#Hit[util.Map[String, Any], Void]] = result.getHits(classOf[util.Map[String, Any]])
//将 java list 转为 scala 集合
import scala.collection.JavaConverters._
val list1: List[util.Map[String, Any]] = list.asScala.map(_.source).toList
println(list1.mkString("\n"))
//关闭连接
client.close()
}
// 查询多个文档数据 方式1
def queryDos() = {
//获取客户端链接
val client: JestClient = getJestClient()
val query: String =
"""
|{
| "query": {
| "match": {
| "name": "人"
| }
| },
| "sort": [
| {
| "doubanScore": {
| "order": "desc"
| }
| }
| ]
|}
|""".stripMargin
//封装 search 对象
val search: Search = new Search.Builder(query)
.addIndex("movie_index")
.build()
val result: SearchResult = client.execute(search)
val list: util.List[SearchResult#Hit[util.Map[String, Any], Void]] = result.getHits(classOf[util.Map[String, Any]])
//将 java list 转为 scala 集合
import scala.collection.JavaConverters._
val list1: List[util.Map[String, Any]] = list.asScala.map(_.source).toList
println(list1.mkString("\n"))
//关闭连接
client.close()
}
// 根据ID 查询数据
def queryById() = {
//获取客户端链接
val client: JestClient = getJestClient()
//设置需要查询的 index & id
val get: Get = new Get.Builder("movie_index", "5").build()
//获取返回值对象
val result: DocumentResult = client.execute(get)
//此处直接通过返回值对象的 getJsonString 方法进行输出
println(result.getJsonString)
//关闭连接
client.close()
}
// ES 插入数据 方式二,封装眼里类对象 插入文档
def putIndex1() = {
//获取客户端链接
val client: JestClient = getJestClient()
val actorList = new util.ArrayList[util.Map[String, Object]]()
val actorMap = new util.HashMap[String, Object]()
actorMap.put("id", "01")
actorMap.put("name", "殷桃")
actorList.add(actorMap)
//封装成样例类对象
val movie: Movie = Movie(102, "人世间", 9.5f, actorList)
//创建Action 实现类 Index
val index: Index = new Index.Builder(movie) //放入样例类对象
.index("movie_index")
.`type`("movie")
.id("5")
.build()
client.execute(index)
//关闭链接
client.close()
}
// ES 插入数据 方式一
def putIndex() = {
//获取客户端链接
val client: JestClient = getJestClient()
//定义执行的 source
val source: String =
"""{
| "id":101,
| "name":"peration meigong river",
| "doubanScore": 8.1,
| "actorList":
| [
| {"id":1,
| "name":"liu yi fei"
| }]
|}""".stripMargin
//创建插入的index,Bulider 的参数表示要插入的文档对象,底层会转换为 JSON 对象,也可以传入封装后的眼里类对象
val index: Index = new Index.Builder(source)
.index("movie_index")
.`type`("movie")
.id("4")
.build()
//使用客户端对象操作ES,execute 的参数是 Action 类型,Index 是 Action 的实现类
client.execute(index)
//关闭链接
client.close()
}
def main(args: Array[String]): Unit = {
//putIndex1
//putIndex1
//queryById()
//queryDos
queryDos1
}
}
//定义样例类
case class Movie(id: Long, name: String, doubanScore: Float, actorList: util.List[util.Map[String, Object]]) {
}
5、Offset 维护工具类
import java.util
import org.apache.kafka.common.TopicPartition
import org.apache.spark.streaming.kafka010.OffsetRange
import redis.clients.jedis.Jedis
/**
* 偏移量维护
*/
object OffsetManagerUtil {
//从 Redis 获取偏移量 ;redis:type Hash ,key offset:topic:groupid fileld:partition value 偏移量
def getOffset(topic: String, groupid: String): Map[TopicPartition, Long] = {
//获取 jedis 连接
val jedis: Jedis = MyRedisUtil.getJedisClient()
//拼接key offset:topic:groupid
var offsetKey = "offset" + topic + ":" + groupid
//获取当前消费者组对应分区和偏移量
val offsetMap: util.Map[String, String] = jedis.hgetAll(offsetKey)
//关闭连接
jedis.close()
//将java map 转换为 scala map,scala map 一些操作更方便
import scala.collection.JavaConverters._
offsetMap.asScala.map {
case (partition, offset) => {
// Map[TopicPartition, Long]
(new TopicPartition(topic, partition.toInt), offset.toLong)
}
}.toMap
}
//保存 偏移量 到 Redis
def saveOffset(topic: String, groupId: String, offsetRanges: Array[OffsetRange]): Unit = {
//获取 jedis 连接
val jedis: Jedis = MyRedisUtil.getJedisClient()
//拼接key offset:topic:groupid
var offsetKey = "offset" + topic + ":" + groupId
//定义 Java Map,用于存放 偏移量
val offsetMap: util.HashMap[String, String] = new util.HashMap[String, String]()
//遍历 OffsetRange 封装 offsetMap
for (offsetRange <- offsetRanges) {
val partitionID: Int = offsetRange.partition
val fromOffset: Long = offsetRange.fromOffset
val untilOffset: Long = offsetRange.untilOffset
offsetMap.put(partitionID.toString, untilOffset.toString)
//输出测试
println("保存分区:" + partitionID + " " + fromOffset + " -----> " + untilOffset)
}
//保存数据
jedis.hmset(offsetKey, offsetMap)
//关闭连接
jedis.close()
}
}

浙公网安备 33010602011771号