storm
nimbus 英 [ˈnɪmbəs] 美 [ˈnɪmbəs] n. (大片的)雨云;光环
strom 分布式实时的流式计算框架
strom如下图右侧,来一个数据,处理一个,单位时间内处理的数据量不能太大,以保证它的正常运行,但是一旦启动一直运行。
批处理则不同,spark则是微批处理框架的计算框架,也能够达到实时性。
MR 做不到实时性,数量级是TB,PB级的,频繁操作磁盘,频繁启停job.






ETL(数据清洗)extracted transform load
Spout
英 [spaʊt] 美 [spaʊt]
壶嘴;喷出;喷口;管口;龙卷
bolt
英 [bəʊlt] 美 [boʊlt]
n.
(门窗的)闩,插v.
用插销闩上;能被闩上;用螺栓把(甲和乙)固定在一起;(马等受惊)脱缰 adv. 突然地;像箭似地;直立地
Nimbus 类似于 master supervisor 类似于 slave
worker task








strom 数据累加 strom 运行模式 strom local 模式, strom 集群运行 jar


本地模式运行strom程序
// 累加案例
package com.bjsxt.sum;
import java.util.List;
import java.util.Map;
import backtype.storm.spout.SpoutOutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.topology.base.BaseRichSpout;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Values;
import backtype.storm.utils.Utils;
public class WsSpout extends BaseRichSpout {
Map map;
int i =0;
TopologyContext context;
SpoutOutputCollector collector;
/**
* 配置初始化spout类
*/
@Override
public void open(Map map, TopologyContext context, SpoutOutputCollector collector) {
this.map = map;
this.context = context;
this.collector = collector;
}
/**
* 采集并向后推送数据
*/
@Override
public void nextTuple() {
i++;
List<Object> num = new Values(i);
this.collector.emit(num);
System.out.println("spout--------------" + i);
Utils.sleep(1000);
}
/**
* 向接收数据的逻辑处理单元声明发送数据的字段名称
* @param arg0
*/
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("num"));
}
}
package com.bjsxt.sum;
import java.util.Map;
import backtype.storm.task.OutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.topology.base.BaseRichBolt;
import backtype.storm.tuple.Tuple;
public class WsBolt extends BaseRichBolt {
Map stormConf;
TopologyContext context;
OutputCollector collector;
int sum = 0;
@Override
public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
this.stormConf = stormConf;
this.context = context;
this.collector = collector;
}
/**
* 获取数据,(有必要的话,向后发送数据)
*/
@Override
public void execute(Tuple input) {
// input.getInteger(0);// offset
Integer num = input.getIntegerByField("num");
sum += num;
// 展示积累的数据
System.out.println("bolt------------ sum=" + sum);
}
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
// TODO Auto-generated method stub
}
}
package com.bjsxt.sum;
import backtype.storm.Config;
import backtype.storm.LocalCluster;
import backtype.storm.topology.TopologyBuilder;
public class Test {
/**
* 建立拓扑结构,放入集群运行
* @param args 命令行参数
*/
public static void main(String[] args) {
// 构建strom拓扑结构
TopologyBuilder tb = new TopologyBuilder();
tb.setSpout("wsspout", new WsSpout());
// 规定上一步的分发策略 shuffleGrouping cpoutid
tb.setBolt("wsblot", new WsBolt()).shuffleGrouping("wsspout");
// 创建本地strom集群
LocalCluster lc = new LocalCluster();
lc.submitTopology("wordsum", new Config(), tb.createTopology());
}
}
// word count 案例,多个bolt
package com.bjsxt.wc;
import java.util.Map;
import java.util.Random;
import backtype.storm.spout.SpoutOutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.topology.base.BaseRichSpout;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Values;
import backtype.storm.utils.Utils;
public class WcSpout extends BaseRichSpout {
SpoutOutputCollector collector;
// 准备原始数据
String[] text = {
"helo sxt bj",
"sxt nihao world",
"bj nihao hi"
};
Random r = new Random();
@Override
public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) {
this.collector = collector;
}
// 随机发送每一行字符串
@Override
public void nextTuple() {
Values line = new Values(text[r.nextInt(text.length)]);
this.collector.emit(line);
System.out.println("spout emit ---------" + line);
Utils.sleep(1000);
}
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("line"));
}
}
package com.bjsxt.wc;
import java.util.List;
import java.util.Map;
import backtype.storm.task.OutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.topology.base.BaseRichBolt;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Tuple;
import backtype.storm.tuple.Values;
public class WsplitBolt extends BaseRichBolt{
OutputCollector collector;
/**
* 获取tuple每一行数据
*/
@Override
public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
this.collector = collector;
}
@Override
public void execute(Tuple input) {
String line = input.getString(0);
// 切割
String[] words = line.split(" ");
for (String w : words) {
List wd = new Values(w);
this.collector.emit(wd);
}
}
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("w"));
}
}
package com.bjsxt.wc;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import backtype.storm.task.OutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.topology.base.BaseRichBolt;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Tuple;
import backtype.storm.tuple.Values;
public class WcountBolt extends BaseRichBolt{
Map<String,Integer> wcMap = new HashMap<>(); // key 出现的单词, value 出现的次数
/**
* 获取tuple每一行数据
*/
@Override
public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
}
/**
* 获取tuple每一个单词,并且按照单词统计输出出现的次数
*/
@Override
public void execute(Tuple input) {
// 获取单词
String word = input.getStringByField("w");
Integer count = 1;
// 如果map中已经出现过该单词,
if(wcMap.containsKey(word)){
count = (int)wcMap.get(word) + 1;
}
wcMap.put(word, count);
System.out.println("("+word + ","+ count +")" );
}
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
}
}
package com.bjsxt.wc;
import backtype.storm.Config;
import backtype.storm.LocalCluster;
import backtype.storm.topology.TopologyBuilder;
import backtype.storm.tuple.Fields;
public class Test {
/**
* 建立拓扑结构,放入集群运行
* @param args 命令行参数
*/
public static void main(String[] args) {
// 构建strom拓扑结构
TopologyBuilder tb = new TopologyBuilder();
tb.setSpout("wcspout", new WcSpout());
tb.setBolt("wsplitblot", new WsplitBolt()).shuffleGrouping("wcspout");
// 多个bolt 各自统计,map中各自有一部分统计数据
// 使用fieldsGrouping则可以按fields统计// 只要有一个单词在某一个bolt上,第二次也必须分发到这个bolt上,
// 1个并行度,如下会统计有错
// tb.setBolt("wcountbolt", new WcountBolt()).shuffleGrouping("wsplitblot");
// 3个并行度,如下会统计有错
// tb.setBolt("wcountbolt", new WcountBolt(),3).shuffleGrouping("wsplitblot");
// 多个并行度,按如下统计
tb.setBolt("wcountbolt", new WcountBolt(),3).fieldsGrouping("wsplitblot", new Fields("w"));
// 创建本地strom集群
LocalCluster lc = new LocalCluster();
lc.submitTopology("wordcount", new Config(), tb.createTopology());
}
}
// 分发策略演示
trace.log
www.taobao.com XXYH6YCGFJYERTT834R52FDXV9U34 2017-02-21 12:40:49
www.taobao.com XXYH6YCGFJYERTT834R52FDXV9U34 2017-02-21 09:40:49
www.taobao.com XXYH6YCGFJYERTT834R52FDXV9U34 2017-02-21 08:40:51
www.taobao.com VVVYH6Y4V4SFXZ56JIPDPB4V678 2017-02-21 12:40:49
www.taobao.com BBYH61456FGHHJ7JL89RG5VV9UYU7 2017-02-21 08:40:51
package com.sxt.storm.grouping;
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.util.Map;
import backtype.storm.spout.SpoutOutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.IRichSpout;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Values;
public class MySpout implements IRichSpout {
private static final long serialVersionUID = 1L;
FileInputStream fis;
InputStreamReader isr;
BufferedReader br;
SpoutOutputCollector collector = null;
String str = null;
@Override
public void nextTuple() {
try {
while ((str = this.br.readLine()) != null) {
// 过滤动作
collector.emit(new Values(str, str.split("\t")[1]));
}
} catch (Exception e) {
}
}
@Override
public void close() {
try {
br.close();
isr.close();
fis.close();
} catch (Exception e) {
e.printStackTrace();
}
}
@Override
public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) {
try {
this.collector = collector;
this.fis = new FileInputStream("track.log");
this.isr = new InputStreamReader(fis, "UTF-8");
this.br = new BufferedReader(isr);
} catch (Exception e) {
e.printStackTrace();
}
}
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
// 发送几个元素,需要对应几个字段
declarer.declare(new Fields("log", "session_id"));
}
@Override
public Map<String, Object> getComponentConfiguration() {
return null;
}
@Override
public void ack(Object msgId) {
System.out.println("spout ack:" + msgId.toString());
}
@Override
public void activate() {
}
@Override
public void deactivate() {
}
@Override
public void fail(Object msgId) {
System.out.println("spout fail:" + msgId.toString());
}
}
package com.sxt.storm.grouping;
import java.util.Map;
import backtype.storm.task.OutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.IRichBolt;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Tuple;
public class MyBolt implements IRichBolt {
private static final long serialVersionUID = 1L;
OutputCollector collector = null;
int num = 0;
String valueString = null;
@Override
public void cleanup() {
}
@Override
public void execute(Tuple input) {
try {
valueString = input.getStringByField("log");
if (valueString != null) {
num++;
System.err.println(input.getSourceStreamId() + " " + Thread.currentThread().getName() + "--id="
+ Thread.currentThread().getId() + " lines :" + num + " session_id:"
+ valueString.split("\t")[1]);
}
collector.ack(input);
// Thread.sleep(2000);
} catch (Exception e) {
collector.fail(input);
e.printStackTrace();
}
}
@Override
public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
this.collector = collector;
}
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields(""));
}
@Override
public Map<String, Object> getComponentConfiguration() {
return null;
}
}
package com.sxt.storm.grouping;
import backtype.storm.Config;
import backtype.storm.LocalCluster;
import backtype.storm.StormSubmitter;
import backtype.storm.generated.AlreadyAliveException;
import backtype.storm.generated.InvalidTopologyException;
import backtype.storm.topology.TopologyBuilder;
import backtype.storm.tuple.Fields;
public class Main {
/**
* @param args
*/
public static void main(String[] args) {
TopologyBuilder builder = new TopologyBuilder();
builder.setSpout("spout", new MySpout(), 1);
// shuffleGrouping其实就是随机往下游去发,不自觉的做到了负载均衡
// builder.setBolt("bolt", new MyBolt(), 2).shuffleGrouping("spout");
// fieldsGrouping其实就是MapReduce里面理解的Shuffle,根据fields求hash来取模
// builder.setBolt("bolt", new MyBolt(), 2).fieldsGrouping("spout", new Fields("session_id"));
// 只往一个里面发,往taskId小的那个里面去发送
// builder.setBolt("bolt", new MyBolt(), 2).globalGrouping("spout");
// 等于shuffleGrouping
// builder.setBolt("bolt", new MyBolt(), 2).noneGrouping("spout");
// 广播
builder.setBolt("bolt", new MyBolt(), 2).allGrouping("spout");
// Map conf = new HashMap();
// conf.put(Config.TOPOLOGY_WORKERS, 4);
Config conf = new Config();
conf.setDebug(false);
conf.setMessageTimeoutSecs(30);
if (args.length > 0) { // 集群中运行时,执行此处
try {
StormSubmitter.submitTopology(args[0], conf, builder.createTopology());
} catch (AlreadyAliveException e) {
e.printStackTrace();
} catch (InvalidTopologyException e) {
e.printStackTrace();
}
} else {
LocalCluster localCluster = new LocalCluster();
localCluster.submitTopology("mytopology", conf, builder.createTopology());
}
}
}


worker 直接从zookeeper中获取任务





Strom 伪分布式部署 node1 nimbus , zookeeper supervisor worker 都在node1上。 步骤: [root@node1 software]# tar -zxvf apache-storm-0.10.0.tar.gz -C /opt/sxt/ [root@node1 sxt]# cd apache-storm-0.10.0/ [root@node1 apache-storm-0.10.0]# mkdir logs ## 存储日志文件 [root@node1 apache-storm-0.10.0]# ./bin/storm help [root@node1 apache-storm-0.10.0]# ./bin/storm dev-zookeeper >> ./logs/dev-zookeeper.out 2>&1 & ## 启动自带zookeeper程序 [root@node1 apache-storm-0.10.0]# jps ## 查看程序,还在配置中,未启动完成 6838 Jps 6828 config_value [root@node1 apache-storm-0.10.0]# jps ## 启动成功, 6795 dev_zookeeper 6859 Jps [root@node1 apache-storm-0.10.0]# ./bin/storm nimbus >> ./logs/nimbus.out 2>&1 & [2] 6873 [root@node1 apache-storm-0.10.0]# jps 6884 config_value 6795 dev_zookeeper 6894 Jps [root@node1 apache-storm-0.10.0]# jps 6981 Jps 6873 nimbus 6795 dev_zookeeper [root@node1 apache-storm-0.10.0]# ./bin/storm supervisor >> ./logs/supervisor.out 2>&1 & [root@node1 apache-storm-0.10.0]# ./bin/storm ui >> ./logs/ui.out 2>&1 & [root@node1 apache-storm-0.10.0]# jps 7104 core ## ui 7191 Jps 6873 nimbus 6795 dev_zookeeper 7004 supervisor [root@node1 apache-storm-0.10.0]# ss -nal tcp LISTEN 0 50 :::8080 http://node1:8080 ui首页

准备提交任务到storm
// 修改wordcount代码:
// 如果args传参表示提交到strom
package com.bjsxt.wc;
import backtype.storm.Config;
import backtype.storm.LocalCluster;
import backtype.storm.StormSubmitter;
import backtype.storm.generated.AlreadyAliveException;
import backtype.storm.generated.InvalidTopologyException;
import backtype.storm.topology.TopologyBuilder;
import backtype.storm.tuple.Fields;
public class Test {
/**
* 建立拓扑结构,放入集群运行
* @param args 命令行参数
*/
public static void main(String[] args) {
// 构建strom拓扑结构
TopologyBuilder tb = new TopologyBuilder();
tb.setSpout("wcspout", new WcSpout());
tb.setBolt("wsplitblot", new WsplitBolt()).shuffleGrouping("wcspout");
// 多个bolt 各自统计,map中各自有一部分统计数据
// 使用fieldsGrouping则可以按fields统计// 只要有一个单词在某一个bolt上,第二次也必须分发到这个bolt上,
// 1个并行度,如下会统计有错
// tb.setBolt("wcountbolt", new WcountBolt()).shuffleGrouping("wsplitblot");
// 3个并行度,如下会统计有错
// tb.setBolt("wcountbolt", new WcountBolt(),3).shuffleGrouping("wsplitblot");
// 多个并行度,按如下统计
tb.setBolt("wcountbolt", new WcountBolt(),3).fieldsGrouping("wsplitblot", new Fields("w"));
Config conf = new Config();
if(args.length > 0){ // 如果传入参数,则是提交到集群
try {
StormSubmitter.submitTopology(args[0], conf, tb.createTopology());
} catch (AlreadyAliveException | InvalidTopologyException e) {
e.printStackTrace();
}
}else{
// 创建本地strom集群
LocalCluster lc = new LocalCluster();
lc.submitTopology("wordcount", conf, tb.createTopology());
}
}
}
## 将wordcount 打包,上传到node1
[root@node1 apache-storm-0.10.0]# pwd
/opt/sxt/apache-storm-0.10.0
[root@node1 apache-storm-0.10.0]# ./bin/storm help jar ## 查看帮助
Syntax: [storm jar topology-jar-path class ...]
##演示 运行程序,
[root@node1 apache-storm-0.10.0]# ./bin/storm jar ~/software/WCDemo.jar
com.bjsxt.wc.Test
##演示提交程序 wc 标识args.length > 0 ,提交
[root@node1 apache-storm-0.10.0]# ./bin/storm jar ~/software/WCDemo.jar com.bjsxt.wc.Test wc
## 查看提交的topology [root@node1 apache-storm-0.10.0]# cd /opt/sxt/apache-storm-0.10.0/storm-local/nimbus/inbox/ [root@node1 inbox]# ls stormjar-e2c773e5-be65-4ede-a243-7d51e52371c4.jar

如下拓扑图

strom 依赖 jdk1.6以上,python2.6.6+
分布式部署:
node2,3,4 有zookeeper
node2 nimbus node3,node4 supervisor. ( 各自自己本机拥有4个worker)
[root@node2 software]# python
Python 2.7.5 (default, Oct 30 2018, 23:45:53)
[root@node2 software]# java -version
java version "1.8.0_221"
node2
tar -zxvf apache-storm-0.10.0.tar.gz -C /opt/sxt/
cd apache-storm-0.10.0/conf/
vi storm.yaml
[root@node2 conf]# cat storm.yaml
## 增加如下配置 supervisor.slots.ports 表示每个supervisor下的worker,
storm.zookeeper.servers:
- "node2"
- "node3"
- "node4"
#
nimbus.host: "node2"
storm.local.dir: "/var/storm"
supervisor.slots.ports:
- 6700
- 6701
- 6702
- 6703
#
[root@node2 apache-storm-0.10.0]# mkdir logs
分发到node3,4
[root@node2 sxt]# scp -r apache-storm-0.10.0 node3:`pwd`
启动node,2,3,4 zk
/opt/sxt/zookeeper-3.4.6/bin/zkServer.sh start
## node2 nimbus
[root@node2 apache-storm-0.10.0]# ./bin/storm nimbus >> ./logs/nimbus.out 2>&1 &
[root@node2 apache-storm-0.10.0]# ./bin/storm ui >> ./logs/ui.out 2>&1 &
## node3,4 各自启动supervisor
[root@node4 apache-storm-0.10.0]# ./bin/storm supervisor >> ./logs/supervisor.out 2>&1 &
[root@node3 apache-storm-0.10.0]# ./bin/storm supervisor >> ./logs/supervisor.out 2>&1 &

// 提交任务 修改wordcount 如下:
package com.bjsxt.wc;
import backtype.storm.Config;
import backtype.storm.LocalCluster;
import backtype.storm.StormSubmitter;
import backtype.storm.generated.AlreadyAliveException;
import backtype.storm.generated.InvalidTopologyException;
import backtype.storm.topology.TopologyBuilder;
import backtype.storm.tuple.Fields;
public class Test {
/**
* 建立拓扑结构,放入集群运行
* @param args 命令行参数
*/
// public static void main(String[] args) {
//
// // 构建strom拓扑结构
// TopologyBuilder tb = new TopologyBuilder();
//
// tb.setSpout("wcspout", new WcSpout());
//
// tb.setBolt("wsplitblot", new WsplitBolt()).shuffleGrouping("wcspout");
// // 多个bolt 各自统计,map中各自有一部分统计数据
// // 使用fieldsGrouping则可以按fields统计// 只要有一个单词在某一个bolt上,第二次也必须分发到这个bolt上,
// // 1个并行度,如下会统计有错
//// tb.setBolt("wcountbolt", new WcountBolt()).shuffleGrouping("wsplitblot");
// // 3个并行度,如下会统计有错
//// tb.setBolt("wcountbolt", new WcountBolt(),3).shuffleGrouping("wsplitblot");
// // 多个并行度,按如下统计
// tb.setBolt("wcountbolt", new WcountBolt(),3).fieldsGrouping("wsplitblot", new Fields("w"));
//
//
// Config conf = new Config();
// if(args.length > 0){ // 如果传入参数,则是提交到集群
// try {
// StormSubmitter.submitTopology(args[0], conf, tb.createTopology());
// } catch (AlreadyAliveException | InvalidTopologyException e) {
// e.printStackTrace();
// }
// }else{
// // 创建本地strom集群
// LocalCluster lc = new LocalCluster();
// lc.submitTopology("wordcount", conf, tb.createTopology());
// }
//
// }
//
/**
* 建立拓扑结构,放入集群运行
* @param args 命令行参数
*/
public static void main(String[] args) {
// 构建strom拓扑结构
TopologyBuilder tb = new TopologyBuilder();
tb.setSpout("wcspout", new WcSpout(),2);
tb.setBolt("wsplitblot", new WsplitBolt(),4).shuffleGrouping("wcspout");
tb.setBolt("wcountbolt", new WcountBolt(),2).setNumTasks(4).fieldsGrouping("wsplitblot", new Fields("w"));
// 共10个任务
Config conf = new Config();
conf.setNumWorkers(2);
if(args.length > 0){ // 如果传入参数,则是提交到集群
try {
StormSubmitter.submitTopology(args[0], conf, tb.createTopology());
} catch (AlreadyAliveException | InvalidTopologyException e) {
e.printStackTrace();
}
}else{
// 创建本地strom集群
LocalCluster lc = new LocalCluster();
lc.submitTopology("wordcount", conf, tb.createTopology());
}
}
}
[root@node3 apache-storm-0.10.0]# ./bin/storm jar ~/software/WCDemo.jar com.bjsxt.wc.Test wc
## 在主节点上才能查看到上传的jar包
[root@node2 apache-storm-0.10.0]# cd /var/storm/nimbus/inbox/
[root@node2 inbox]# ls
stormjar-992586f4-b8a5-442a-828c-d41c1f828dd4.jar
## ui页面查看wc executor task ## 其中每个worker上都有ack也会拥有executor
## 修改wcountbolt executor数量
[root@node2 apache-storm-0.10.0]# ./bin/storm help rebalance
[root@node2 apache-storm-0.10.0]# ./bin/storm rebalance wc -n 4 -e wcountbolt=4

kill后的结果


上图表示两个线程共跑四个任务。




ack机制无法保证数据不被重复计算,但是可以保证数据至少被正确处理一次。(可能因错误,引发非错误数据重发被计算两次)
package com.sxt.storm.ack;
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.util.Map;
import backtype.storm.spout.SpoutOutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.IRichSpout;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Values;
public class MySpout implements IRichSpout{
private static final long serialVersionUID = 1L;
int index = 0;
FileInputStream fis;
InputStreamReader isr;
BufferedReader br;
SpoutOutputCollector collector = null;
String str = null;
@Override
public void nextTuple() {
try {
if ((str = this.br.readLine()) != null) {
// 过滤动作
index++;
collector.emit(new Values(str), index);
// collector.emit(new Values(str));
}
} catch (Exception e) {
}
}
@Override
public void close() {
try {
br.close();
isr.close();
fis.close();
} catch (Exception e) {
e.printStackTrace();
}
}
@Override
public void open(Map conf, TopologyContext context,
SpoutOutputCollector collector) {
try {
this.collector = collector;
this.fis = new FileInputStream("track.log");
this.isr = new InputStreamReader(fis, "UTF-8");
this.br = new BufferedReader(isr);
} catch (Exception e) {
e.printStackTrace();
}
}
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("log"));
}
@Override
public Map<String, Object> getComponentConfiguration() {
return null;
}
@Override
public void ack(Object msgId) {
System.err.println(" [" + Thread.currentThread().getName() + "] "+ " spout ack:"+msgId.toString());
}
@Override
public void activate() {
}
@Override
public void deactivate() {
}
@Override
public void fail(Object msgId) {
System.err.println(" [" + Thread.currentThread().getName() + "] "+ " spout fail:"+msgId.toString());
}
}
package com.sxt.storm.ack;
import java.util.Map;
import backtype.storm.task.OutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.IRichBolt;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Tuple;
import backtype.storm.tuple.Values;
public class MyBolt implements IRichBolt {
private static final long serialVersionUID = 1L;
OutputCollector collector = null;
@Override
public void cleanup() {
}
int num = 0;
String valueString = null;
@Override
public void execute(Tuple input) {
try {
valueString = input.getStringByField("log") ;
if(valueString != null) {
num ++ ;
System.err.println(Thread.currentThread().getName()+" lines :"+num +" session_id:"+valueString.split("\t")[1]);
}
collector.emit(input, new Values(valueString));
// collector.emit(new Values(valueString));
collector.ack(input);
Thread.sleep(2000);
} catch (Exception e) {
collector.fail(input);
e.printStackTrace();
}
}
@Override
public void prepare(Map stormConf, TopologyContext context,
OutputCollector collector) {
this.collector = collector ;
}
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("session_id")) ;
}
@Override
public Map<String, Object> getComponentConfiguration() {
return null;
}
}
package com.sxt.storm.ack;
import backtype.storm.Config;
import backtype.storm.LocalCluster;
import backtype.storm.StormSubmitter;
import backtype.storm.generated.AlreadyAliveException;
import backtype.storm.generated.InvalidTopologyException;
import backtype.storm.topology.TopologyBuilder;
public class Main {
/**
* @param args
*/
public static void main(String[] args) {
TopologyBuilder builder = new TopologyBuilder();
builder.setSpout("spout", new MySpout(), 1);
builder.setBolt("bolt", new MyBolt(), 2).shuffleGrouping("spout");
// Map conf = new HashMap();
// conf.put(Config.TOPOLOGY_WORKERS, 4);
Config conf = new Config() ;
conf.setDebug(true);
conf.setMessageTimeoutSecs(conf, 100);
conf.setNumAckers(4);
if (args.length > 0) {
try {
StormSubmitter.submitTopology(args[0], conf, builder.createTopology());
} catch (AlreadyAliveException e) {
e.printStackTrace();
} catch (InvalidTopologyException e) {
e.printStackTrace();
}
}else {
LocalCluster localCluster = new LocalCluster();
localCluster.submitTopology("mytopology", conf, builder.createTopology());
}
}
}

单点故障, flume ha 单点瓶颈, load balance http://flume.apache.org/FlumeUserGuide.html#scribe-source 美团日志收集系统架构 https://tech.meituan.com/2013/12/09/meituan-flume-log-system-architecture-and-design.html 实例: 电话掉话率,(非正常挂断:没有声音了,不在服务区)
中国移动项目架构图:

步骤: cmccstormjk02 1 producer 生产数据放到kafka的topic中。 2.strom spout 到kafka topic 中获取数据。 filterbolt 过滤, bolt 计算。 3.将计算结果:掉话数和通话数 每隔一段时间保存一次到hbase. cmcc02_hbase 4. 另外一个项目到hbase中获取指定时段的数据,展示到前端echart中。 准备: 配置 node,2,3,4 的kafka,启动 zk,启动kafka. 1.创建topic
./kafka-topics.sh --zookeeper node2:2181,node3:2181,node4:2181 --create --replication-factor 2 --partitions 3 --topic mylog_cmcc
2. comsumer 消费监控用于临时查看
./kafka-console-consumer.sh --zookeeper node2:2181,node3:2181,node4:2181 --from-beginning --topic mylog_cmcc
3 创建hbase 表
[root@node1 shells]# start-dfs.sh
[root@node1 shells]# ./start-yarn-ha.sh ## 自己写的ha yarn 启动脚本
[root@node1 ~]# cat shells/start-yarn-ha.sh
start-yarn.sh
ssh root@node3 "$HADOOP_HOME/sbin/yarn-daemon.sh start resourcemanager"
ssh root@node4 "$HADOOP_HOME/sbin/yarn-daemon.sh start resourcemanager"
[root@node1 shells]# start-hbase.sh
[root@node1 shells]# hbase shell
hbase(main):003:0> create 'cell_monitor_table','cf' ## ctrl+backspace 回退删除字符
## cmccstormjk02 项目 代码
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package kafka.productor;
import java.util.Properties;
import java.util.Random;
import backtype.storm.utils.Utils;
import kafka.producer.KeyedMessage;
import kafka.producer.ProducerConfig;
import tools.DateFmt;
/***
* 模拟发送数据到kafka中
*
* @author hadoop
*
*/
public class CellProducer extends Thread {
// bin/kafka-topics.sh --create --zookeeper localhost:2181
// --replication-factor 3 --partitions 5 --topic cmcccdr
private final kafka.javaapi.producer.Producer<Integer, String> producer;
private final String topic;
private final Properties props = new Properties();
public CellProducer(String topic) {
props.put("serializer.class", "kafka.serializer.StringEncoder");// 字符串消息
props.put("metadata.broker.list", KafkaProperties.broker_list);
producer = new kafka.javaapi.producer.Producer<Integer, String>(new ProducerConfig(props));
this.topic = topic;
}
/*
* public void run() { // order_id,order_amt,create_time,province_id Random
* random = new Random(); String[] cell_num = { "29448-37062",
* "29448-51331", "29448-51331","29448-51333", "29448-51343" }; String[]
* drop_num = { "0","1","2"};//掉话1(信号断断续续) 断话2(完全断开)
*
* // Producer.java // record_time, imei, cell,
* ph_num,call_num,drop_num,duration,drop_rate,net_type,erl // 2011-06-28
* 14:24:59.867,356966,29448-37062,0,0,0,0,0,G,0 // 2011-06-28
* 14:24:59.867,352024,29448-51331,0,0,0,0,0,G,0 // 2011-06-28
* 14:24:59.867,353736,29448-51331,0,0,0,0,0,G,0 // 2011-06-28
* 14:24:59.867,353736,29448-51333,0,0,0,0,0,G,0 // 2011-06-28
* 14:24:59.867,351545,29448-51333,0,0,0,0,0,G,0 // 2011-06-28
* 14:24:59.867,353736,29448-51343,1,0,0,8,0,G,0 int i =0 ; NumberFormat nf
* = new DecimalFormat("000000"); while(true) { i ++ ; // String messageStr
* = i+"\t"+cell_num[random.nextInt(cell_num.length)]+"\t"+DateFmt.
* getCountDate(null,
* DateFmt.date_long)+"\t"+drop_num[random.nextInt(drop_num.length)] ;
* String testStr = nf.format(random.nextInt(10)+1);
*
* String messageStr =
* i+"\t"+("29448-"+testStr)+"\t"+DateFmt.getCountDate(null,
* DateFmt.date_long)+"\t"+drop_num[random.nextInt(drop_num.length)] ;
*
* System.out.println("product:"+messageStr); producer.send(new
* KeyedMessage<Integer, String>(topic, messageStr)); Utils.sleep(1000) ; //
* if (i==500) { // break; // } }
*
* }
*/
public void run() {
Random random = new Random();
String[] cell_num = { "29448-37062", "29448-51331", "29448-51331", "29448-51333", "29448-51343" };
// 正常0; 掉话1(信号断断续续); 断话2(完全断开)
String[] drop_num = { "0", "1", "2" };
int i = 0;
while (true) {
i++;
String testStr = String.format("%06d", random.nextInt(10) + 1);
// messageStr: 2494 29448-000003 2016-01-05 10:25:17 1
//
String messageStr = i + "\t" + ("29448-" + testStr) + "\t" + DateFmt.getCountDate(null, DateFmt.date_long)
+ "\t" + drop_num[random.nextInt(drop_num.length)];
System.out.println("product:" + messageStr);
producer.send(new KeyedMessage<Integer, String>(topic, messageStr));
Utils.sleep(1000);
// if(i == 500) {
// break;
// }
}
}
public static void main(String[] args) {
// topic设置
CellProducer producerThread = new CellProducer(KafkaProperties.Cell_Topic);
// 启动线程生成数据
producerThread.start();
}
}
package bolt;
import java.util.Map;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.BasicOutputCollector;
import backtype.storm.topology.IBasicBolt;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Tuple;
import backtype.storm.tuple.Values;
import tools.DateFmt;
public class CellFilterBolt implements IBasicBolt {
/**
*
*/
private static final long serialVersionUID = 1L;
@Override
public void execute(Tuple input, BasicOutputCollector collector) {
String logString = input.getString(0);
try {
if (input != null) {
String arr[] = logString.split("\\t");
// messageStr格式:消息编号\t小区编号\t时间\t状态
// 例: 2494 29448-000003 2016-01-05 10:25:17 1
// DateFmt.date_short是yyyy-MM-dd,把2016-01-05 10:25:17格式化2016-01-05
// 发出的数据格式: 时间, 小区编号, 掉话状态
collector.emit(new Values(DateFmt.getCountDate(arr[2], DateFmt.date_short), arr[1], arr[3]));
}
} catch (Exception e) {
e.printStackTrace();
}
}
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("date", "cell_num", "drop_num"));
}
@Override
public Map<String, Object> getComponentConfiguration() {
return null;
}
@Override
public void cleanup() {
// TODO Auto-generated method stub
}
@Override
public void prepare(Map map, TopologyContext arg1) {
// TODO Auto-generated method stub
}
}
package bolt;
import java.util.Calendar;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.BasicOutputCollector;
import backtype.storm.topology.IBasicBolt;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.tuple.Tuple;
import cmcc.hbase.dao.HBaseDAO;
import cmcc.hbase.dao.impl.HBaseDAOImp;
import tools.DateFmt;
public class CellDaoltBolt implements IBasicBolt {
private static final long serialVersionUID = 1L;
HBaseDAO dao = null;
long beginTime = System.currentTimeMillis();
long endTime = 0;
// 通话总数
Map<String, Long> cellCountMap = new HashMap<String, Long>();
// 掉话数 >0
Map<String, Long> cellDropCountMap = new HashMap<String, Long>();
String todayStr = null;
@Override
public void execute(Tuple input, BasicOutputCollector collector) {
// input为2016-01-05,29448-000003,1
if (input != null) {
String dateStr = input.getString(0);
String cellNum = input.getString(1);
String dropNum = input.getString(2);
// 判断是否是当天,不是当天 就清除map 避免内存过大
// 基站数目 大概5-10万(北京市)
// http://bbs.c114.net/thread-793707-1-1.html
todayStr = DateFmt.getCountDate(null, DateFmt.date_short);
// 跨天的处理,大于当天的数据来了,就清空两个map
// 思考: 如果程序崩溃了,map清零了,如果不出问题,一直做同一个cellid的累加
// 这个逻辑不好,应该换成一个线程定期的清除map数据,而不是这里判断
if (todayStr != dateStr && todayStr.compareTo(dateStr) < 0) {
cellCountMap.clear();
cellDropCountMap.clear();
}
// 当前cellid的通话数统计
Long cellAll = cellCountMap.get(cellNum);
if (cellAll == null) {
cellAll = 0L;
}
cellCountMap.put(cellNum, ++cellAll);
// 掉话数统计,大于0就是掉话
Long cellDropAll = cellDropCountMap.get(cellNum);
int t = Integer.parseInt(dropNum);
if (t > 0) {
if (cellDropAll == null) {
cellDropAll = 0L;
}
cellDropCountMap.put(cellNum, ++cellDropAll);
}
// 1.定时写库.为了防止写库过于频繁 这里间隔一段时间写一次
// 2.也可以检测map里面数据size 写数据到 hbase
// 3.自己可以设计一些思路 ,当然 采用redis 也不错
// 4.采用tick定时存储也是一个思路
endTime = System.currentTimeMillis();
// flume+kafka 集成
// 当前掉话数
// 1.每小时掉话数目
// 2.每小时 通话数据
// 3.每小时 掉话率
// 4.昨天的历史轨迹
// 5.同比去年今天的轨迹(如果有数据)
// hbase 按列存储的数据()
// 10万
// rowkey cellnum+ day
if (endTime - beginTime >= 5000) {
// 5s 写一次库
if (cellCountMap.size() > 0 && cellDropCountMap.size() > 0) {
// x轴,相对于小时的偏移量,格式为 时:分,数值 数值是时间的偏移
String arr[] = this.getAxsi();
// 当前日期
String today = DateFmt.getCountDate(null, DateFmt.date_short);
// 当前分钟
String today_minute = DateFmt.getCountDate(null, DateFmt.date_minute);
// cellCountMap为通话数据的map
Set<String> keys = cellCountMap.keySet();
for (Iterator iterator = keys.iterator(); iterator.hasNext();) {
String key_cellnum = (String) iterator.next();
System.out.println("key_cellnum: " + key_cellnum + "***"
+ arr[0] + "---"
+ arr[1] + "---"
+ cellCountMap.get(key_cellnum) + "----"
+ cellDropCountMap.get(key_cellnum));
//写入HBase数据,样例: {time_title:"10:45",xAxis:10.759722222222223,call_num:140,call_drop_num:91}
dao.insert("cell_monitor_table",
key_cellnum + "_" + today,
"cf",
new String[] { today_minute },
new String[] { "{" + "time_title:\"" + arr[0] + "\",xAxis:" + arr[1] + ",call_num:"
+ cellCountMap.get(key_cellnum) + ",call_drop_num:" + cellDropCountMap.get(key_cellnum) + "}" }
);
}
}
// 需要重置初始时间
beginTime = System.currentTimeMillis();
}
}
}
@Override
public void prepare(Map stormConf, TopologyContext context) {
// TODO Auto-generated method stub
dao = new HBaseDAOImp();
Calendar calendar = Calendar.getInstance();
}
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
// TODO Auto-generated method stub
}
@Override
public Map<String, Object> getComponentConfiguration() {
// TODO Auto-generated method stub
return null;
}
// 获取X坐标,就是当前时间的坐标,小时是单位
public String[] getAxsi() {
// 取当前时间
Calendar c = Calendar.getInstance();
int hour = c.get(Calendar.HOUR_OF_DAY);
int minute = c.get(Calendar.MINUTE);
int sec = c.get(Calendar.SECOND);
// 总秒数
int curSecNum = hour * 3600 + minute * 60 + sec;
// (12*3600+30*60+0)/3600=12.5
Double xValue = (double) curSecNum / 3600;
// 时:分,数值 数值是时间的偏移
String[] end = { hour + ":" + minute, xValue.toString() };
return end;
}
@Override
public void cleanup() {
}
}
package cmcc.constant;
public class Constants {
// public static final String HBASE_ZOOKEEPER_LIST = "node4:2181";
public static final String HBASE_ZOOKEEPER_LIST = "node2:2181,node3:2181,node4:2181";
public static final String KAFKA_ZOOKEEPER_LIST = "node2:2181,node3:2181,node4:2181";
public static final String BROKER_LIST = "node2:9092,node3:9092,node4:9092";
public static final String ZOOKEEPERS = "node2,node3,node4";
}
package topo;
import java.util.ArrayList;
import java.util.List;
import backtype.storm.Config;
import backtype.storm.LocalCluster;
import backtype.storm.StormSubmitter;
import backtype.storm.generated.AlreadyAliveException;
import backtype.storm.generated.InvalidTopologyException;
import backtype.storm.spout.SchemeAsMultiScheme;
import backtype.storm.topology.TopologyBuilder;
import backtype.storm.tuple.Fields;
import bolt.CellDaoltBolt;
import bolt.CellFilterBolt;
import cmcc.constant.Constants;
import kafka.productor.KafkaProperties;
import storm.kafka.KafkaSpout;
import storm.kafka.SpoutConfig;
import storm.kafka.StringScheme;
import storm.kafka.ZkHosts;
public class KafkaOneCellMonintorTopology {
/**
* @param args
*/
public static void main(String[] args) {
TopologyBuilder builder = new TopologyBuilder();
ZkHosts zkHosts = new ZkHosts(Constants.KAFKA_ZOOKEEPER_LIST);
SpoutConfig spoutConfig = new SpoutConfig(zkHosts,
"mylog_cmcc",
"/MyKafka", // 偏移量offset的根目录
"MyTrack"); // 对应一个应用
List<String> zkServers = new ArrayList<String>();
System.out.println(zkHosts.brokerZkStr);
for (String host : zkHosts.brokerZkStr.split(",")) {
zkServers.add(host.split(":")[0]);
}
spoutConfig.zkServers = zkServers;
spoutConfig.zkPort = 2181;
// 是否从头开始消费
spoutConfig.forceFromStart = false;
spoutConfig.socketTimeoutMs = 60 * 1000;
// String
spoutConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
builder.setSpout("spout", new KafkaSpout(spoutConfig), 3);
builder.setBolt("cellBolt", new CellFilterBolt(), 3).shuffleGrouping("spout");
builder.setBolt("CellDaoltBolt", new CellDaoltBolt(), 5)
.fieldsGrouping("cellBolt", new Fields("cell_num"));
Config conf = new Config();
conf.setDebug(false);
conf.setNumWorkers(5);
if (args.length > 0) {
try {
StormSubmitter.submitTopology(args[0], conf, builder.createTopology());
} catch (AlreadyAliveException e) {
e.printStackTrace();
} catch (InvalidTopologyException e) {
e.printStackTrace();
}
} else {
System.out.println("Local running");
LocalCluster localCluster = new LocalCluster();
localCluster.submitTopology("mytopology", conf, builder.createTopology());
}
}
}
package cmcc.hbase.dao.impl;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.MasterNotRunningException;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.ZooKeeperConnectionException;
import org.apache.hadoop.hbase.client.Delete;
import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.client.HConnection;
import org.apache.hadoop.hbase.client.HConnectionManager;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.HTableInterface;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.filter.PrefixFilter;
import cmcc.constant.Constants;
import cmcc.hbase.dao.HBaseDAO;
public class HBaseDAOImp implements HBaseDAO {
HConnection hTablePool = null;
static Configuration conf = null;
public HBaseDAOImp() {
conf = new Configuration();
// ZooKeeper连接
String zk_list = Constants.HBASE_ZOOKEEPER_LIST;
conf.set("hbase.zookeeper.quorum", zk_list);
try {
hTablePool = HConnectionManager.createConnection(conf);
} catch (IOException e) {
e.printStackTrace();
}
}
@Override
public void save(Put put, String tableName) {
// TODO Auto-generated method stub
HTableInterface table = null;
try {
table = hTablePool.getTable(tableName);
table.put(put);
} catch (Exception e) {
e.printStackTrace();
} finally {
try {
table.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
@Override
public void insert(String tableName, String rowKey, String family, String quailifer, String value) {
// TODO Auto-generated method stub
HTableInterface table = null;
try {
table = hTablePool.getTable(tableName);
Put put = new Put(rowKey.getBytes());
put.add(family.getBytes(), quailifer.getBytes(), value.getBytes());
table.put(put);
} catch (Exception e) {
e.printStackTrace();
} finally {
try {
table.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
@Override
public void insert(String tableName, String rowKey, String family, String quailifer[], String value[]) {
HTableInterface table = null;
try {
table = hTablePool.getTable(tableName);
Put put = new Put(rowKey.getBytes());
// 批量添加
for (int i = 0; i < quailifer.length; i++) {
String col = quailifer[i];
String val = value[i];
put.add(family.getBytes(), col.getBytes(), val.getBytes());
}
table.put(put);
} catch (Exception e) {
e.printStackTrace();
} finally {
try {
table.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
@Override
public void save(List<Put> Put, String tableName) {
// TODO Auto-generated method stub
HTableInterface table = null;
try {
table = hTablePool.getTable(tableName);
table.put(Put);
} catch (Exception e) {
// TODO: handle exception
} finally {
try {
table.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
@Override
public Result getOneRow(String tableName, String rowKey) {
// TODO Auto-generated method stub
HTableInterface table = null;
Result rsResult = null;
try {
table = hTablePool.getTable(tableName);
Get get = new Get(rowKey.getBytes());
rsResult = table.get(get);
} catch (Exception e) {
e.printStackTrace();
} finally {
try {
table.close();
} catch (IOException e) {
e.printStackTrace();
}
}
return rsResult;
}
@Override
public List<Result> getRows(String tableName, String rowKeyLike) {
// TODO Auto-generated method stub
HTableInterface table = null;
List<Result> list = null;
try {
table = hTablePool.getTable(tableName);
PrefixFilter filter = new PrefixFilter(rowKeyLike.getBytes());
Scan scan = new Scan();
scan.setFilter(filter);
ResultScanner scanner = table.getScanner(scan);
list = new ArrayList<Result>();
for (Result rs : scanner) {
list.add(rs);
}
} catch (Exception e) {
e.printStackTrace();
} finally {
try {
table.close();
} catch (IOException e) {
e.printStackTrace();
}
}
return list;
}
@Override
public List<Result> getRows(String tableName, String rowKeyLike, String cols[]) {
// TODO Auto-generated method stub
HTableInterface table = null;
List<Result> list = null;
try {
table = hTablePool.getTable(tableName);
PrefixFilter filter = new PrefixFilter(rowKeyLike.getBytes());
Scan scan = new Scan();
for (int i = 0; i < cols.length; i++) {
scan.addColumn("cf".getBytes(), cols[i].getBytes());
}
scan.setFilter(filter);
ResultScanner scanner = table.getScanner(scan);
list = new ArrayList<Result>();
for (Result rs : scanner) {
list.add(rs);
}
} catch (Exception e) {
e.printStackTrace();
} finally {
try {
table.close();
} catch (IOException e) {
e.printStackTrace();
}
}
return list;
}
@Override
public List<Result> getRows(String tableName, String startRow, String stopRow) {
HTableInterface table = null;
List<Result> list = null;
try {
table = hTablePool.getTable(tableName);
Scan scan = new Scan();
scan.setStartRow(startRow.getBytes());
scan.setStopRow(stopRow.getBytes());
ResultScanner scanner = table.getScanner(scan);
list = new ArrayList<Result>();
for (Result rsResult : scanner) {
list.add(rsResult);
}
} catch (Exception e) {
e.printStackTrace();
} finally {
try {
table.close();
} catch (IOException e) {
e.printStackTrace();
}
}
return list;
}
@Override
public void deleteRecords(String tableName, String rowKeyLike) {
HTableInterface table = null;
try {
table = hTablePool.getTable(tableName);
PrefixFilter filter = new PrefixFilter(rowKeyLike.getBytes());
Scan scan = new Scan();
scan.setFilter(filter);
ResultScanner scanner = table.getScanner(scan);
List<Delete> list = new ArrayList<Delete>();
for (Result rs : scanner) {
Delete del = new Delete(rs.getRow());
list.add(del);
}
table.delete(list);
} catch (Exception e) {
e.printStackTrace();
} finally {
try {
table.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
public void createTable(String tableName, String[] columnFamilys) {
try {
// admin 对象
HBaseAdmin admin = new HBaseAdmin(conf);
if (admin.tableExists(tableName)) {
System.err.println("此表,已存在!");
} else {
HTableDescriptor tableDesc = new HTableDescriptor(TableName.valueOf(tableName));
for (String columnFamily : columnFamilys) {
tableDesc.addFamily(new HColumnDescriptor(columnFamily));
}
admin.createTable(tableDesc);
System.err.println("建表成功!");
}
admin.close();// 关闭释放资源
} catch (MasterNotRunningException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (ZooKeeperConnectionException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
/**
* 删除一个表
*
* @param tableName
* 删除的表名
*/
public void deleteTable(String tableName) {
try {
HBaseAdmin admin = new HBaseAdmin(conf);
if (admin.tableExists(tableName)) {
admin.disableTable(tableName);// 禁用表
admin.deleteTable(tableName);// 删除表
System.err.println("删除表成功!");
} else {
System.err.println("删除的表不存在!");
}
admin.close();
} catch (MasterNotRunningException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (ZooKeeperConnectionException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
/**
* 查询表中所有行
*
* @param tablename
*/
public void scaner(String tablename) {
try {
HTable table = new HTable(conf, tablename);
Scan s = new Scan();
ResultScanner rs = table.getScanner(s);
for (Result r : rs) {
KeyValue[] kv = r.raw();
for (int i = 0; i < kv.length; i++) {
System.out.print(new String(kv[i].getRow()) + "");
System.out.print(new String(kv[i].getFamily()) + ":");
System.out.print(new String(kv[i].getQualifier()) + "");
System.out.print(kv[i].getTimestamp() + "");
System.out.println(new String(kv[i].getValue()));
}
}
} catch (IOException e) {
e.printStackTrace();
}
}
public static void main(String[] args) {
HBaseDAO dao = new HBaseDAOImp();
// 创建表
// String tableName="test";
// String cfs[] = {"cf"};
// dao.createTable(tableName,cfs);
// 存入一条数据
// Put put = new Put("bjsxt".getBytes());
// put.add("cf".getBytes(), "name".getBytes(), "cai10".getBytes()) ;
// dao.save(put, "test") ;
// 插入多列数据
// Put put = new Put("bjsxt".getBytes());
// List<Put> list = new ArrayList<Put>();
// put.add("cf".getBytes(), "addr".getBytes(), "shanghai1".getBytes()) ;
// put.add("cf".getBytes(), "age".getBytes(), "30".getBytes()) ;
// put.add("cf".getBytes(), "tel".getBytes(), "13889891818".getBytes())
// ;
// list.add(put) ;
// dao.save(list, "test");
// 插入单行数据
// dao.insert("test", "testrow", "cf", "age", "35") ;
// dao.insert("test", "testrow", "cf", "cardid", "12312312335") ;
// dao.insert("test", "testrow", "cf", "tel", "13512312345") ;
List<Result> list = dao.getRows("test", "testrow", new String[] { "age" });
for (Result rs : list) {
for (Cell cell : rs.rawCells()) {
System.out.println("RowName:" + new String(CellUtil.cloneRow(cell)) + " ");
System.out.println("Timetamp:" + cell.getTimestamp() + " ");
System.out.println("column Family:" + new String(CellUtil.cloneFamily(cell)) + " ");
System.out.println("row Name:" + new String(CellUtil.cloneQualifier(cell)) + " ");
System.out.println("value:" + new String(CellUtil.cloneValue(cell)) + " ");
}
}
Result rs = dao.getOneRow("test", "testrow");
System.out.println(new String(rs.getValue("cf".getBytes(), "age".getBytes())));
}
}
cmcc02_hbase 页面获取hbase 数据
package cmcc.hbase.dao.impl;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.MasterNotRunningException;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.ZooKeeperConnectionException;
import org.apache.hadoop.hbase.client.Delete;
import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.client.HConnection;
import org.apache.hadoop.hbase.client.HConnectionManager;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.HTableInterface;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.filter.BinaryComparator;
import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp;
import org.apache.hadoop.hbase.filter.Filter;
import org.apache.hadoop.hbase.filter.PrefixFilter;
import org.apache.hadoop.hbase.filter.RowFilter;
import org.apache.hadoop.hbase.filter.SubstringComparator;
import org.apache.hadoop.hbase.util.Bytes;
import cmcc.hbase.dao.HBaseDAO;
public class HBaseDAOImp implements HBaseDAO {
HConnection hTablePool = null;
static Configuration conf = null;
public HBaseDAOImp() {
conf = new Configuration();
// 设置HBase的ZooKeeper
// String zk_list = "node4:2181";
String zk_list = "node2:2181,node3:2181,node4:2181";
conf.set("hbase.zookeeper.quorum", zk_list);
try {
hTablePool = HConnectionManager.createConnection(conf);
} catch (IOException e) {
e.printStackTrace();
}
}
@Override
public void save(Put put, String tableName) {
// TODO Auto-generated method stub
HTableInterface table = null;
try {
table = hTablePool.getTable(tableName);
table.put(put);
} catch (Exception e) {
e.printStackTrace();
} finally {
try {
table.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
@Override
public void insert(String tableName, String rowKey, String family, String quailifer, String value) {
// TODO Auto-generated method stub
HTableInterface table = null;
try {
table = hTablePool.getTable(tableName);
Put put = new Put(rowKey.getBytes());
put.add(family.getBytes(), quailifer.getBytes(), value.getBytes());
table.put(put);
} catch (Exception e) {
e.printStackTrace();
} finally {
try {
table.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
@Override
public void insert(String tableName, String rowKey, String family, String quailifer[], String value[]) {
HTableInterface table = null;
try {
table = hTablePool.getTable(tableName);
Put put = new Put(rowKey.getBytes());
// 批量添加
for (int i = 0; i < quailifer.length; i++) {
String col = quailifer[i];
String val = value[i];
put.add(family.getBytes(), col.getBytes(), val.getBytes());
}
table.put(put);
} catch (Exception e) {
e.printStackTrace();
} finally {
try {
table.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
@Override
public void save(List<Put> Put, String tableName) {
// TODO Auto-generated method stub
HTableInterface table = null;
try {
table = hTablePool.getTable(tableName);
table.put(Put);
} catch (Exception e) {
// TODO: handle exception
} finally {
try {
table.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
@Override
public Result getOneRow(String tableName, String rowKey) {
// TODO Auto-generated method stub
HTableInterface table = null;
Result rsResult = null;
try {
table = hTablePool.getTable(tableName);
Get get = new Get(rowKey.getBytes());
rsResult = table.get(get);
} catch (Exception e) {
e.printStackTrace();
} finally {
try {
table.close();
} catch (IOException e) {
e.printStackTrace();
}
}
return rsResult;
}
@Override
public Result getOneRowAndMultiColumn(String tableName, String rowKey, String[] cols) {
// TODO Auto-generated method stub
HTableInterface table = null;
Result rsResult = null;
try {
table = hTablePool.getTable(tableName);
Get get = new Get(rowKey.getBytes());
for (int i = 0; i < cols.length; i++) {
get.addColumn("cf".getBytes(), cols[i].getBytes());
}
rsResult = table.get(get);
} catch (Exception e) {
e.printStackTrace();
} finally {
try {
table.close();
} catch (IOException e) {
e.printStackTrace();
}
}
return rsResult;
}
@Override
public List<Result> getRows(String tableName, String rowKeyLike) {
// TODO Auto-generated method stub
HTableInterface table = null;
List<Result> list = null;
try {
table = hTablePool.getTable(tableName);
PrefixFilter filter = new PrefixFilter(rowKeyLike.getBytes());
Scan scan = new Scan();
scan.setFilter(filter);
ResultScanner scanner = table.getScanner(scan);
list = new ArrayList<Result>();
for (Result rs : scanner) {
list.add(rs);
}
} catch (Exception e) {
e.printStackTrace();
} finally {
try {
table.close();
} catch (IOException e) {
e.printStackTrace();
}
}
return list;
}
@Override
public List<Result> getRows(String tableName, String rowKeyLike, String cols[]) {
// TODO Auto-generated method stub
HTableInterface table = null;
List<Result> list = null;
try {
table = hTablePool.getTable(tableName);
PrefixFilter filter = new PrefixFilter(rowKeyLike.getBytes());
Scan scan = new Scan();
for (int i = 0; i < cols.length; i++) {
scan.addColumn("cf".getBytes(), cols[i].getBytes());
}
scan.setFilter(filter);
ResultScanner scanner = table.getScanner(scan);
list = new ArrayList<Result>();
for (Result rs : scanner) {
list.add(rs);
}
} catch (Exception e) {
e.printStackTrace();
} finally {
try {
table.close();
} catch (IOException e) {
e.printStackTrace();
}
}
return list;
}
@Override
public List<Result> getRowsByOneKey(String tableName, String rowKeyLike, String cols[]) {
// TODO Auto-generated method stub
HTableInterface table = null;
List<Result> list = null;
try {
table = hTablePool.getTable(tableName);
PrefixFilter filter = new PrefixFilter(rowKeyLike.getBytes());
Scan scan = new Scan();
for (int i = 0; i < cols.length; i++) {
scan.addColumn("cf".getBytes(), cols[i].getBytes());
}
scan.setFilter(filter);
ResultScanner scanner = table.getScanner(scan);
list = new ArrayList<Result>();
for (Result rs : scanner) {
list.add(rs);
}
} catch (Exception e) {
e.printStackTrace();
} finally {
try {
table.close();
} catch (IOException e) {
e.printStackTrace();
}
}
return list;
}
@Override
public List<Result> getRows(String tableName, String startRow, String stopRow) {
HTableInterface table = null;
List<Result> list = null;
try {
table = hTablePool.getTable(tableName);
Scan scan = new Scan();
scan.setStartRow(startRow.getBytes());
scan.setStopRow(stopRow.getBytes());
ResultScanner scanner = table.getScanner(scan);
list = new ArrayList<Result>();
for (Result rsResult : scanner) {
list.add(rsResult);
}
} catch (Exception e) {
e.printStackTrace();
} finally {
try {
table.close();
} catch (IOException e) {
e.printStackTrace();
}
}
return list;
}
@Override
public void deleteRecords(String tableName, String rowKeyLike) {
HTableInterface table = null;
try {
table = hTablePool.getTable(tableName);
PrefixFilter filter = new PrefixFilter(rowKeyLike.getBytes());
Scan scan = new Scan();
scan.setFilter(filter);
ResultScanner scanner = table.getScanner(scan);
List<Delete> list = new ArrayList<Delete>();
for (Result rs : scanner) {
Delete del = new Delete(rs.getRow());
list.add(del);
}
table.delete(list);
} catch (Exception e) {
e.printStackTrace();
} finally {
try {
table.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
public void createTable(String tableName, String[] columnFamilys) {
try {
// admin 对象
HBaseAdmin admin = new HBaseAdmin(conf);
if (admin.tableExists(tableName)) {
System.err.println("此表,已存在!");
} else {
HTableDescriptor tableDesc = new HTableDescriptor(TableName.valueOf(tableName));
for (String columnFamily : columnFamilys) {
tableDesc.addFamily(new HColumnDescriptor(columnFamily));
}
admin.createTable(tableDesc);
System.err.println("建表成功!");
}
admin.close();// 关闭释放资源
} catch (MasterNotRunningException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (ZooKeeperConnectionException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
/**
* 删除一个表
*
* @param tableName
* 删除的表名
*/
public void deleteTable(String tableName) {
try {
HBaseAdmin admin = new HBaseAdmin(conf);
if (admin.tableExists(tableName)) {
admin.disableTable(tableName);// 禁用表
admin.deleteTable(tableName);// 删除表
System.err.println("删除表成功!");
} else {
System.err.println("删除的表不存在!");
}
admin.close();
} catch (MasterNotRunningException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (ZooKeeperConnectionException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
/**
* 查询表中所有行
*
* @param tablename
*/
public void scaner(String tablename) {
try {
HTable table = new HTable(conf, tablename);
Scan s = new Scan();
// s.addColumn(family, qualifier)
// s.addColumn(family, qualifier)
ResultScanner rs = table.getScanner(s);
for (Result r : rs) {
for (Cell cell : r.rawCells()) {
System.out.println("RowName:" + new String(CellUtil.cloneRow(cell)) + " ");
System.out.println("Timetamp:" + cell.getTimestamp() + " ");
System.out.println("column Family:" + new String(CellUtil.cloneFamily(cell)) + " ");
System.out.println("row Name:" + new String(CellUtil.cloneQualifier(cell)) + " ");
System.out.println("value:" + new String(CellUtil.cloneValue(cell)) + " ");
}
}
} catch (IOException e) {
e.printStackTrace();
}
}
public void scanerByColumn(String tablename) {
try {
HTable table = new HTable(conf, tablename);
Scan s = new Scan();
s.addColumn("cf".getBytes(), "201504052237".getBytes());
s.addColumn("cf".getBytes(), "201504052237".getBytes());
ResultScanner rs = table.getScanner(s);
for (Result r : rs) {
for (Cell cell : r.rawCells()) {
System.out.println("RowName:" + new String(CellUtil.cloneRow(cell)) + " ");
System.out.println("Timetamp:" + cell.getTimestamp() + " ");
System.out.println("column Family:" + new String(CellUtil.cloneFamily(cell)) + " ");
System.out.println("row Name:" + new String(CellUtil.cloneQualifier(cell)) + " ");
System.out.println("value:" + new String(CellUtil.cloneValue(cell)) + " ");
}
}
} catch (IOException e) {
e.printStackTrace();
}
}
public static void main(String[] args) {
HBaseDAO dao = new HBaseDAOImp();
// 创建表
// String tableName="test";
// String cfs[] = {"cf"};
// dao.createTable(tableName,cfs);
// 存入一条数据
// Put put = new Put("bjsxt".getBytes());
// put.add("cf".getBytes(), "name".getBytes(), "cai10".getBytes()) ;
// dao.save(put, "test") ;
// 插入多列数据
// Put put = new Put("bjsxt".getBytes());
// List<Put> list = new ArrayList<Put>();
// put.add("cf".getBytes(), "addr".getBytes(), "shanghai1".getBytes()) ;
// put.add("cf".getBytes(), "age".getBytes(), "30".getBytes()) ;
// put.add("cf".getBytes(), "tel".getBytes(), "13889891818".getBytes())
// ;
// list.add(put) ;
// dao.save(list, "test");
// 插入单行数据
// dao.insert("test", "testrow", "cf", "age", "35") ;
// dao.insert("test", "testrow", "cf", "cardid", "12312312335") ;
// dao.insert("test", "testrow", "cf", "tel", "13512312345") ;
// List<Result> list = dao.getRows("test", "testrow",new
// String[]{"age"}) ;
// for(Result rs : list)
// {
// for(Cell cell:rs.rawCells()){
// System.out.println("RowName:"+new String(CellUtil.cloneRow(cell))+"
// ");
// System.out.println("Timetamp:"+cell.getTimestamp()+" ");
// System.out.println("column Family:"+new
// String(CellUtil.cloneFamily(cell))+" ");
// System.out.println("row Name:"+new
// String(CellUtil.cloneQualifier(cell))+" ");
// System.out.println("value:"+new String(CellUtil.cloneValue(cell))+"
// ");
// }
// }
// Result rs = dao.getOneRow("test", "testrow");
// System.out.println(new String(rs.getValue("cf".getBytes(),
// "age".getBytes())));
// Result rs = dao.getOneRowAndMultiColumn("cell_monitor_table",
// "29448-513332015-04-05", new
// String[]{"201504052236","201504052237"});
// for(Cell cell:rs.rawCells()){
// System.out.println("RowName:"+new String(CellUtil.cloneRow(cell))+"
// ");
// System.out.println("Timetamp:"+cell.getTimestamp()+" ");
// System.out.println("column Family:"+new
// String(CellUtil.cloneFamily(cell))+" ");
// System.out.println("row Name:"+new
// String(CellUtil.cloneQualifier(cell))+" ");
// System.out.println("value:"+new String(CellUtil.cloneValue(cell))+"
// ");
// }
dao.deleteTable("cell_monitor_table");
// 创建表
String tableName = "cell_monitor_table";
String cfs[] = { "cf" };
dao.createTable(tableName, cfs);
}
public static void testRowFilter(String tableName) {
try {
HTable table = new HTable(conf, tableName);
Scan scan = new Scan();
scan.addColumn(Bytes.toBytes("column1"), Bytes.toBytes("qqqq"));
Filter filter1 = new RowFilter(CompareOp.LESS_OR_EQUAL, new BinaryComparator(Bytes.toBytes("laoxia157")));
scan.setFilter(filter1);
ResultScanner scanner1 = table.getScanner(scan);
for (Result res : scanner1) {
System.out.println(res);
}
scanner1.close();
//
// Filter filter2 = new RowFilter(CompareFilter.CompareOp.EQUAL,new
// RegexStringComparator("laoxia4\\d{2}"));
// scan.setFilter(filter2);
// ResultScanner scanner2 = table.getScanner(scan);
// for (Result res : scanner2) {
// System.out.println(res);
// }
// scanner2.close();
Filter filter3 = new RowFilter(CompareOp.EQUAL, new SubstringComparator("laoxia407"));
scan.setFilter(filter3);
ResultScanner scanner3 = table.getScanner(scan);
for (Result res : scanner3) {
System.out.println(res);
}
scanner3.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}


目录树位置
SpoutConfig spoutConfig = new SpoutConfig(zkHosts, "mylog_cmcc", "/MyKafka", // 偏移量offset的根目录 "MyTrack"); // 对应一个应用 ack 是信息完整性保护线程。




/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.sxt.storm.drpc;
import backtype.storm.Config;
import backtype.storm.LocalCluster;
import backtype.storm.LocalDRPC;
import backtype.storm.StormSubmitter;
import backtype.storm.drpc.LinearDRPCTopologyBuilder;
import backtype.storm.topology.BasicOutputCollector;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.topology.base.BaseBasicBolt;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Tuple;
import backtype.storm.tuple.Values;
/**
* This topology is a basic example of doing distributed RPC on top of Storm. It
* implements a function that appends a "!" to any string you send the DRPC
* function.
* <p/>
* See https://github.com/nathanmarz/storm/wiki/Distributed-RPC for more
* information on doing distributed RPC on top of Storm.
*/
public class BasicDRPCTopology {
public static class ExclaimBolt extends BaseBasicBolt {
@Override
public void execute(Tuple tuple, BasicOutputCollector collector) {
String input = tuple.getString(1);
collector.emit(new Values(tuple.getValue(0), input + "!"));
}
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("id", "result"));
}
}
public static void main(String[] args) throws Exception {
LinearDRPCTopologyBuilder builder = new LinearDRPCTopologyBuilder("exclamation");
builder.addBolt(new ExclaimBolt(), 3);
Config conf = new Config();
if (args == null || args.length == 0) {
LocalDRPC drpc = new LocalDRPC();
LocalCluster cluster = new LocalCluster();
cluster.submitTopology("drpc-demo", conf, builder.createLocalTopology(drpc));
for (String word : new String[] { "hello", "goodbye" }) {
System.err.println("Result for \"" + word + "\": " + drpc.execute("exclamation", word));
}
cluster.shutdown();
drpc.shutdown();
} else {
conf.setNumWorkers(3);
StormSubmitter.submitTopologyWithProgressBar(args[0], conf, builder.createRemoteTopology());
}
}
}
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.sxt.storm.drpc;
import backtype.storm.Config;
import backtype.storm.LocalCluster;
import backtype.storm.LocalDRPC;
import backtype.storm.drpc.DRPCSpout;
import backtype.storm.drpc.ReturnResults;
import backtype.storm.topology.BasicOutputCollector;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.topology.TopologyBuilder;
import backtype.storm.topology.base.BaseBasicBolt;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Tuple;
import backtype.storm.tuple.Values;
public class ManualDRPC {
public static class ExclamationBolt extends BaseBasicBolt {
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("result", "return-info"));
}
@Override
public void execute(Tuple tuple, BasicOutputCollector collector) {
String arg = tuple.getString(0);
Object retInfo = tuple.getValue(1);
collector.emit(new Values(arg + "!!!", retInfo));
}
}
public static void main(String[] args) {
TopologyBuilder builder = new TopologyBuilder();
LocalDRPC drpc = new LocalDRPC();
DRPCSpout spout = new DRPCSpout("exclamation", drpc);
builder.setSpout("drpc", spout);
builder.setBolt("exclaim", new ExclamationBolt(), 3).shuffleGrouping("drpc");
builder.setBolt("return", new ReturnResults(), 3).shuffleGrouping("exclaim");
LocalCluster cluster = new LocalCluster();
Config conf = new Config();
cluster.submitTopology("exclaim", conf, builder.createTopology());
System.err.println(drpc.execute("exclamation", "aaa"));
System.err.println(drpc.execute("exclamation", "bbb"));
}
}

配置和演示drpc
[root@node2 conf]# vi storm.yaml
drpc.servers:
- "node2"
## scp 到node3,4
启动strom和drpc
[root@node2 conf]# cd /opt/sxt/apache-storm-0.10.0
[root@node2 apache-storm-0.10.0]# ./bin/storm nimbus >> ./logs/nimbus.out 2>&1 &
./bin/storm ui >> ./logs/ui.out 2>&1 &
[root@node2 apache-storm-0.10.0]# ./bin/storm drpc >> ./logs/drpc.out 2>&1 &"
"supervisor
#node3,4
./bin/storm supervisor >> ./logs/supervisor.out 2>&1 &"
"supervisor
## 将BasicDRPCTopology.java 打为jar包
## 上传
[root@node2 apache-storm-0.10.0]# ./bin/storm jar ~/software/DRPCDemo.jar com.sxt.storm.drpc.BasicDRPCTopology drpc
## 在eclipse中使用客户端调用
package com.sxt.storm.drpc;
import org.apache.thrift7.TException;
import backtype.storm.generated.DRPCExecutionException;
import backtype.storm.utils.DRPCClient;
public class MyDRPCclient {
/**
* @param args
*/
public static void main(String[] args) {
DRPCClient client = new DRPCClient("node2", 3772);
try {
String result = client.execute("exclamation", "11,22");
System.out.println(result);
} catch (TException e) {
e.printStackTrace();
} catch (DRPCExecutionException e) {
e.printStackTrace();
}
}
}
## 上表代码输出 11!!!,22!!!



/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.sxt.storm.drpc;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import backtype.storm.Config;
import backtype.storm.LocalCluster;
import backtype.storm.LocalDRPC;
import backtype.storm.StormSubmitter;
import backtype.storm.coordination.BatchOutputCollector;
import backtype.storm.drpc.LinearDRPCTopologyBuilder;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.BasicOutputCollector;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.topology.base.BaseBasicBolt;
import backtype.storm.topology.base.BaseBatchBolt;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Tuple;
import backtype.storm.tuple.Values;
/**
* This is a good example of doing complex Distributed RPC on top of Storm. This
* program creates a topology that can compute the reach for any URL on Twitter
* in realtime by parallelizing the whole computation.
* <p/>
* Reach is the number of unique people exposed to a URL on Twitter. To compute
* reach, you have to get all the people who tweeted the URL, get all the
* followers of all those people, unique that set of followers, and then count
* the unique set. It's an intense computation that can involve thousands of
* database calls and tens of millions of follower records.
* <p/>
* This Storm topology does every piece of that computation in parallel, turning
* what would be a computation that takes minutes on a single machine into one
* that takes just a couple seconds.
* <p/>
* For the purposes of demonstration, this topology replaces the use of actual
* DBs with in-memory hashmaps.
* <p/>
* See https://github.com/nathanmarz/storm/wiki/Distributed-RPC for more
* information on Distributed RPC.
*/
public class ReachTopology {
public static Map<String, List<String>> TWEETERS_DB = new HashMap<String, List<String>>() {
{
put("foo.com/blog/1", Arrays.asList("sally", "bob", "tim", "george", "nathan"));
put("engineering.twitter.com/blog/5", Arrays.asList("adam", "david", "sally", "nathan"));
put("tech.backtype.com/blog/123", Arrays.asList("tim", "mike", "john"));
}
};
public static Map<String, List<String>> FOLLOWERS_DB = new HashMap<String, List<String>>() {
{
put("sally", Arrays.asList("bob", "tim", "alice", "adam", "jim", "chris", "jai"));
put("bob", Arrays.asList("sally", "nathan", "jim", "mary", "david", "vivian"));
put("tim", Arrays.asList("alex"));
put("nathan", Arrays.asList("sally", "bob", "adam", "harry", "chris", "vivian", "emily", "jordan"));
put("adam", Arrays.asList("david", "carissa"));
put("mike", Arrays.asList("john", "bob"));
put("john", Arrays.asList("alice", "nathan", "jim", "mike", "bob"));
}
};
public static class GetTweeters extends BaseBasicBolt {
@Override
public void execute(Tuple tuple, BasicOutputCollector collector) {
Object id = tuple.getValue(0);
String url = tuple.getString(1);
List<String> tweeters = TWEETERS_DB.get(url);
if (tweeters != null) {
for (String tweeter : tweeters) {
collector.emit(new Values(id, tweeter));
}
}
}
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("id", "tweeter"));
}
}
public static class GetFollowers extends BaseBasicBolt {
@Override
public void execute(Tuple tuple, BasicOutputCollector collector) {
Object id = tuple.getValue(0);
String tweeter = tuple.getString(1);
List<String> followers = FOLLOWERS_DB.get(tweeter);
if (followers != null) {
for (String follower : followers) {
collector.emit(new Values(id, follower));
}
}
}
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("id", "follower"));
}
}
public static class PartialUniquer extends BaseBatchBolt {
BatchOutputCollector _collector;
Object _id;
Set<String> _followers = new HashSet<String>();
@Override
public void prepare(Map conf, TopologyContext context, BatchOutputCollector collector, Object id) {
_collector = collector;
_id = id;
}
@Override
public void execute(Tuple tuple) {
_followers.add(tuple.getString(1));
}
@Override
public void finishBatch() {
_collector.emit(new Values(_id, _followers.size()));
}
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("id", "partial-count"));
}
}
public static class CountAggregator extends BaseBatchBolt {
BatchOutputCollector _collector;
Object _id;
int _count = 0;
@Override
public void prepare(Map conf, TopologyContext context, BatchOutputCollector collector, Object id) {
_collector = collector;
_id = id;
}
@Override
public void execute(Tuple tuple) {
_count += tuple.getInteger(1);
}
@Override
public void finishBatch() {
_collector.emit(new Values(_id, _count));
}
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("id", "reach"));
}
}
public static LinearDRPCTopologyBuilder construct() {
LinearDRPCTopologyBuilder builder = new LinearDRPCTopologyBuilder("reach");
builder.addBolt(new GetTweeters(), 4);
builder.addBolt(new GetFollowers(), 12).shuffleGrouping();
// builder.addBolt(new PartialUniquer(), 6).fieldsGrouping(new Fields("id", "follower"));
builder.addBolt(new PartialUniquer(), 6).fieldsGrouping(new Fields("id"));
builder.addBolt(new CountAggregator(), 3).fieldsGrouping(new Fields("id"));
return builder;
}
public static void main(String[] args) throws Exception {
LinearDRPCTopologyBuilder builder = construct();
Config conf = new Config();
if (args == null || args.length == 0) {
conf.setMaxTaskParallelism(3);
LocalDRPC drpc = new LocalDRPC();
LocalCluster cluster = new LocalCluster();
cluster.submitTopology("reach-drpc", conf, builder.createLocalTopology(drpc));
String[] urlsToTry = new String[] { "foo.com/blog/1", "engineering.twitter.com/blog/5", "notaurl.com" };
for (String url : urlsToTry) {
System.err.println("Reach of " + url + ": " + drpc.execute("reach", url));
}
cluster.shutdown();
drpc.shutdown();
} else {
conf.setNumWorkers(6);
StormSubmitter.submitTopologyWithProgressBar(args[0], conf, builder.createRemoteTopology());
}
}
}
kafka comsumer 两个消费者可以消费同一条数据。与生活中的吃包子不一样。(相当于查看数据),各个分区之间的数据不一定有序,分区内的数据有序

./kafka-topics.sh --zookeeper node2:2181,node3:2181,node4:2181 --create --replication-factor 2 --partitions 3 --topic test ./kafka-topics.sh --zookeeper node2:2181,node3:2181,node4:2181 --describe --topic test ./kafka-console-producer.sh --broker-list node2:9092,node3:9092,node4:9092 --topic test ./kafka-console-consumer.sh --zookeeper node2:2181,node3:2181,node4:2181 --from-beginning --topic test
storm kafka 集成文档 https://github.com/apache/storm/tree/master/external/storm-kafka
https://www.tutorialspoint.com/apache_kafka/apache_kafka_integration_storm.htm
flume + kafka
http://flume.apache.org/releases/content/1.9.0/FlumeUserGuide.html#kafka-sink
[root@node2 conf]# cat flume-env.sh | grep export export JAVA_HOME=/usr/java/jdk1.8.0_221 [root@node2 conf]# cat fk.conf a1.sources = r1 a1.sinks = k1 a1.channels = c1 # Describe/configure the source a1.sources.r1.type = avro a1.sources.r1.bind = node2 a1.sources.r1.port = 41414 # Describe the sink a1.sinks.k1.type = org.apache.flume.sink.kafka.KafkaSink a1.sinks.k1.topic = testflume a1.sinks.k1.brokerList = node2:9092,node3:9092,node3:9092 a1.sinks.k1.requiredAcks = 1 a1.sinks.k1.batchSize = 20 # Use a channel which buffers events in memory a1.channels.c1.type = memory a1.channels.c1.capacity = 1000000 a1.channels.c1.transactionCapacity = 10000 # Bind the source and sink to the channel a1.sources.r1.channels = c1 a1.sinks.k1.channel = c1 [root@node2 conf]# pwd /opt/sxt/apache-flume-1.6.0-bin/conf
## 启动zk
## 启动kafka
## 启动 flume
[root@node2 apache-flume-1.6.0-bin]# bin/flume-ng agent -n a1 -c conf -f conf/fk.conf -Dflume.root.logger=DEBUG,console
## 消费 kafka topic testflume
[root@node3 bin]# ./kafka-console-consumer.sh --zookeeper node2:2181,node3:2181,node4:2181 --from-beginning --topic testflume
## 运行程序
package com.sxt.flume;
import org.apache.flume.Event;
import org.apache.flume.EventDeliveryException;
import org.apache.flume.api.RpcClient;
import org.apache.flume.api.RpcClientFactory;
import org.apache.flume.event.EventBuilder;
import java.nio.charset.Charset;
/**
* Flume官网案例
* http://flume.apache.org/FlumeDeveloperGuide.html
* @author root
*/
public class RpcClientDemo {
public static void main(String[] args) {
MyRpcClientFacade client = new MyRpcClientFacade();
// Initialize client with the remote Flume agent's host and port
client.init("node2", 41414);
// Send 10 events to the remote Flume agent. That agent should be
// configured to listen with an AvroSource.
for (int i = 10; i < 20; i++) {
String sampleData = "Hello Flume!ERROR" + i;
client.sendDataToFlume(sampleData);
System.out.println("发送数据:" + sampleData);
}
client.cleanUp();
}
}
class MyRpcClientFacade {
private RpcClient client;
private String hostname;
private int port;
public void init(String hostname, int port) {
// Setup the RPC connection
this.hostname = hostname;
this.port = port;
this.client = RpcClientFactory.getDefaultInstance(hostname, port);
// Use the following method to create a thrift client (instead of the
// above line):
// this.client = RpcClientFactory.getThriftInstance(hostname, port);
}
public void sendDataToFlume(String data) {
// Create a Flume Event object that encapsulates the sample data
Event event = EventBuilder.withBody(data, Charset.forName("UTF-8"));
// Send the event
try {
client.append(event);
} catch (EventDeliveryException e) {
// clean up and recreate the client
client.close();
client = null;
client = RpcClientFactory.getDefaultInstance(hostname, port);
// Use the following method to create a thrift client (instead of
// the above line):
// this.client = RpcClientFactory.getThriftInstance(hostname, port);
}
}
public void cleanUp() {
// Close the RPC connection
client.close();
}
}
## kafka 消费如下内容
[root@node3 bin]# ./kafka-console-consumer.sh --zookeeper node2:2181,node3:2181,node4:2181 --from-beginning --topic testflume
Hello Flume!ERROR10
Hello Flume!ERROR11
Hello Flume!ERROR12

[root@node3 bin]# ./kafka-topics.sh --zookeeper node2:2181,node3:2181,node4:2181 --create --replication-factor 2 --partitions 1 --topic LogError
Created topic "LogError".
## 为kafkaBolt做准备
[root@node3 bin]# ./kafka-topics.sh --zookeeper node2:2181,node3:2181,node4:2181 --list
LogError
[root@node3 bin]# ./kafka-console-consumer.sh --zookeeper node2:2181,node3:2181,node4:2181 --from-beginning --topic testflume
[root@node4 bin]# ./kafka-console-consumer.sh --zookeeper node2:2181,node3:2181,node4:2181 --from-beginning --topic LogError
## 首先运行
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.sxt.storm.logfileter;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Properties;
import backtype.storm.Config;
import backtype.storm.LocalCluster;
import backtype.storm.spout.SchemeAsMultiScheme;
import backtype.storm.topology.BasicOutputCollector;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.topology.TopologyBuilder;
import backtype.storm.topology.base.BaseBasicBolt;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Tuple;
import backtype.storm.tuple.Values;
import storm.kafka.KafkaSpout;
import storm.kafka.SpoutConfig;
import storm.kafka.StringScheme;
import storm.kafka.ZkHosts;
import storm.kafka.bolt.KafkaBolt;
import storm.kafka.bolt.mapper.FieldNameBasedTupleToKafkaMapper;
import storm.kafka.bolt.selector.DefaultTopicSelector;
/**
* This topology demonstrates Storm's stream groupings and multilang
* capabilities.
*/
public class LogFilterTopology {
public static class FilterBolt extends BaseBasicBolt {
@Override
public void execute(Tuple tuple, BasicOutputCollector collector) {
String line = tuple.getString(0);
System.err.println("Accept: " + line);
// 包含ERROR的行留下
if (line.contains("ERROR")) {
System.err.println("Filter: " + line);
collector.emit(new Values(line));
}
}
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
// 定义message提供给后面FieldNameBasedTupleToKafkaMapper使用
declarer.declare(new Fields("message"));
}
}
public static void main(String[] args) throws Exception {
TopologyBuilder builder = new TopologyBuilder();
// https://github.com/apache/storm/tree/master/external/storm-kafka
// config kafka spout,话题
String topic = "testflume";
ZkHosts zkHosts = new ZkHosts("node2:2181,node3:2181,node4:2181");
// /MyKafka,偏移量offset的根目录,记录队列取到了哪里
SpoutConfig spoutConfig = new SpoutConfig(zkHosts, topic, "/MyKafka", "MyTrack");// 对应一个应用
List<String> zkServers = new ArrayList<String>();
System.out.println(zkHosts.brokerZkStr);
for (String host : zkHosts.brokerZkStr.split(",")) {
zkServers.add(host.split(":")[0]);
}
spoutConfig.zkServers = zkServers;
spoutConfig.zkPort = 2181;
// 是否从头开始消费
spoutConfig.forceFromStart = true;
spoutConfig.socketTimeoutMs = 60 * 1000;
// StringScheme将字节流转解码成某种编码的字符串
spoutConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
KafkaSpout kafkaSpout = new KafkaSpout(spoutConfig);
// set kafka spout
builder.setSpout("kafka_spout", kafkaSpout, 3);
// set bolt
builder.setBolt("filter", new FilterBolt(), 8).shuffleGrouping("kafka_spout");
// 数据写出
// set kafka bolt
// withTopicSelector使用缺省的选择器指定写入的topic: LogError
// withTupleToKafkaMapper tuple==>kafka的key和message
KafkaBolt kafka_bolt = new KafkaBolt().withTopicSelector(new DefaultTopicSelector("LogError"))
.withTupleToKafkaMapper(new FieldNameBasedTupleToKafkaMapper());
builder.setBolt("kafka_bolt", kafka_bolt, 2).shuffleGrouping("filter");
Config conf = new Config();
// set producer properties.
Properties props = new Properties();
props.put("metadata.broker.list", "node2:9092,node3:9092,node4:9092");
/**
* Kafka生产者ACK机制 0 : 生产者不等待Kafka broker完成确认,继续发送下一条数据 1 :
* 生产者等待消息在leader接收成功确认之后,继续发送下一条数据 -1 :
* 生产者等待消息在follower副本接收到数据确认之后,继续发送下一条数据
*/
props.put("request.required.acks", "1");
props.put("serializer.class", "kafka.serializer.StringEncoder");
conf.put("kafka.broker.properties", props);
conf.put(Config.STORM_ZOOKEEPER_SERVERS, Arrays.asList(new String[] { "node2", "node3", "node4" }));
// 本地方式运行
LocalCluster localCluster = new LocalCluster();
localCluster.submitTopology("mytopology", conf, builder.createTopology());
}
}
## 再运行
package com.sxt.flume;
import org.apache.flume.Event;
import org.apache.flume.EventDeliveryException;
import org.apache.flume.api.RpcClient;
import org.apache.flume.api.RpcClientFactory;
import org.apache.flume.event.EventBuilder;
import java.nio.charset.Charset;
/**
* Flume官网案例
* http://flume.apache.org/FlumeDeveloperGuide.html
* @author root
*/
public class RpcClientDemo {
public static void main(String[] args) {
MyRpcClientFacade client = new MyRpcClientFacade();
// Initialize client with the remote Flume agent's host and port
client.init("node2", 41414);
// Send 10 events to the remote Flume agent. That agent should be
// configured to listen with an AvroSource.
for (int i = 10; i < 20; i++) {
// String sampleData = "Hello Flume!wawa" + i;
String sampleData = "Hello Flume!ERROR" + i;
client.sendDataToFlume(sampleData);
System.out.println("发送数据:" + sampleData);
}
client.cleanUp();
}
}
class MyRpcClientFacade {
private RpcClient client;
private String hostname;
private int port;
public void init(String hostname, int port) {
// Setup the RPC connection
this.hostname = hostname;
this.port = port;
this.client = RpcClientFactory.getDefaultInstance(hostname, port);
// Use the following method to create a thrift client (instead of the
// above line):
// this.client = RpcClientFactory.getThriftInstance(hostname, port);
}
public void sendDataToFlume(String data) {
// Create a Flume Event object that encapsulates the sample data
Event event = EventBuilder.withBody(data, Charset.forName("UTF-8"));
// Send the event
try {
client.append(event);
} catch (EventDeliveryException e) {
// clean up and recreate the client
client.close();
client = null;
client = RpcClientFactory.getDefaultInstance(hostname, port);
// Use the following method to create a thrift client (instead of
// the above line):
// this.client = RpcClientFactory.getThriftInstance(hostname, port);
}
}
public void cleanUp() {
// Close the RPC connection
client.close();
}
}
## 查看kafka监控
Storm – 事务 http://storm.apache.org/releases/1.2.3/Transactional-topologies.html http://storm.apache.org/releases/0.9.6/Transactional-topologies.html 事务性拓扑(Transactional Topologies) 保证消息(tuple)被且仅被处理一次



例子 package com.sxt.storm.transactional; import backtype.storm.Config; import backtype.storm.LocalCluster; import backtype.storm.StormSubmitter; import backtype.storm.generated.AlreadyAliveException; import backtype.storm.generated.InvalidTopologyException; import backtype.storm.transactional.TransactionalTopologyBuilder; public class MyTopo { /** * @param args */ public static void main(String[] args) { TransactionalTopologyBuilder builder = new TransactionalTopologyBuilder("ttbId","spoutid",new MyTxSpout(),1); builder.setBolt("bolt1", new MyTransactionBolt(),3).shuffleGrouping("spoutid"); builder.setBolt("committer", new MyCommitter(),1).shuffleGrouping("bolt1") ; Config conf = new Config() ; conf.setDebug(false); if (args.length > 0) { try { StormSubmitter.submitTopology(args[0], conf, builder.buildTopology()); } catch (AlreadyAliveException e) { e.printStackTrace(); } catch (InvalidTopologyException e) { e.printStackTrace(); } }else { LocalCluster localCluster = new LocalCluster(); localCluster.submitTopology("mytopology", conf, builder.buildTopology()); } } } package com.sxt.storm.transactional; import java.util.HashMap; import java.util.Map; import java.util.Random; import backtype.storm.task.TopologyContext; import backtype.storm.topology.OutputFieldsDeclarer; import backtype.storm.transactional.ITransactionalSpout; import backtype.storm.tuple.Fields; public class MyTxSpout implements ITransactionalSpout<MyMeta> { /** * 数据源 */ Map<Long, String> dbMap = null; public MyTxSpout() { Random random = new Random(); dbMap = new HashMap<Long, String>(); String[] hosts = { "www.taobao.com" }; String[] session_id = { "ABYH6Y4V4SCVXTG6DPB4VH9U123", "XXYH6YCGFJYERTT834R52FDXV9U34", "BBYH61456FGHHJ7JL89RG5VV9UYU7", "CYYH6Y2345GHI899OFG4V9U567", "VVVYH6Y4V4SFXZ56JIPDPB4V678" }; String[] time = { "2017-02-21 08:40:50", "2017-02-21 08:40:51", "2017-02-21 08:40:52", "2017-02-21 08:40:53", "2017-02-21 09:40:49", "2017-02-21 10:40:49", "2017-02-21 11:40:49", "2017-02-21 12:40:49" }; for (long i = 0; i < 100; i++) { dbMap.put(i, hosts[0] + "\t" + session_id[random.nextInt(5)] + "\t" + time[random.nextInt(8)]); } } private static final long serialVersionUID = 1L; @Override public backtype.storm.transactional.ITransactionalSpout.Coordinator<MyMeta> getCoordinator(Map conf, TopologyContext context) { return new MyCoordinator(); } @Override public backtype.storm.transactional.ITransactionalSpout.Emitter<MyMeta> getEmitter(Map conf, TopologyContext context) { return new MyEmitter(dbMap); } @Override public void declareOutputFields(OutputFieldsDeclarer declarer) { declarer.declare(new Fields("tx", "log")); } @Override public Map<String, Object> getComponentConfiguration() { return null; } } package com.sxt.storm.transactional; import java.util.Map; import backtype.storm.coordination.BatchOutputCollector; import backtype.storm.task.TopologyContext; import backtype.storm.topology.OutputFieldsDeclarer; import backtype.storm.topology.base.BaseTransactionalBolt; import backtype.storm.transactional.TransactionAttempt; import backtype.storm.tuple.Fields; import backtype.storm.tuple.Tuple; import backtype.storm.tuple.Values; public class MyTransactionBolt extends BaseTransactionalBolt { /** * */ private static final long serialVersionUID = 1L; Integer count = 0; BatchOutputCollector collector; TransactionAttempt tx ; @Override public void prepare(Map conf, TopologyContext context, BatchOutputCollector collector, TransactionAttempt id) { this.collector = collector; System.err.println("MyTransactionBolt prepare txid:"+id.getTransactionId() +"; attemptid: "+id.getAttemptId()); } /** * 处理batch中每一个tuple */ @Override public void execute(Tuple tuple) { tx = (TransactionAttempt) tuple.getValue(0); System.err.println("MyTransactionBolt TransactionAttempt txid:"+tx.getTransactionId() +"; attemptid:"+tx.getAttemptId()); String log = tuple.getString(1); if (log != null && log.length()>0) { count ++ ; } } /** * 同一个batch处理完成后,会调用一次finishBatch方法 */ @Override public void finishBatch() { System.err.println("finishBatch: "+count ); collector.emit(new Values(tx,count)); } @Override public void declareOutputFields(OutputFieldsDeclarer declarer) { declarer.declare(new Fields("tx","count")); } } package com.sxt.storm.transactional; import java.math.BigInteger; import java.util.HashMap; import java.util.Map; import backtype.storm.coordination.BatchOutputCollector; import backtype.storm.task.TopologyContext; import backtype.storm.topology.OutputFieldsDeclarer; import backtype.storm.topology.base.BaseTransactionalBolt; import backtype.storm.transactional.ICommitter; import backtype.storm.transactional.TransactionAttempt; import backtype.storm.tuple.Tuple; public class MyCommitter extends BaseTransactionalBolt implements ICommitter { /** * */ private static final long serialVersionUID = 1L; public static final String GLOBAL_KEY = "GLOBAL_KEY"; public static Map<String, DbValue> dbMap = new HashMap<String, DbValue>(); int sum = 0; TransactionAttempt id; BatchOutputCollector collector; @Override public void execute(Tuple tuple) { sum += tuple.getInteger(1); } @Override public void finishBatch() { DbValue value = dbMap.get(GLOBAL_KEY); DbValue newValue; if (value == null || !value.txid.equals(id.getTransactionId())) { // 更新数据库 newValue = new DbValue(); newValue.txid = id.getTransactionId(); if (value == null) { newValue.count = sum; } else { newValue.count = value.count + sum; } dbMap.put(GLOBAL_KEY, newValue); } else { newValue = value; } System.out.println("total==========================:" + dbMap.get(GLOBAL_KEY).count); // collector.emit(tuple) } @Override public void prepare(Map conf, TopologyContext context, BatchOutputCollector collector, TransactionAttempt id) { this.id = id; this.collector = collector; } @Override public void declareOutputFields(OutputFieldsDeclarer declarer) { } public static class DbValue { BigInteger txid; int count = 0; } }


浙公网安备 33010602011771号