一、场景-移动呼叫日志分析器
spout类
public class FakeCallLogReaderSpout implements IRichSpout { private static final long serialVersionUID = -9154076535681552701L; // Create instance for SpoutOutputCollector which passes tuples to bolt. private SpoutOutputCollector collector; // Create instance for TopologyContext which contains topology data. private TopologyContext context; private Random randomGenerator = new Random(); private Integer idx = 0; /** * 下一个元组 */ @Override public void nextTuple() { if (this.idx <= 1000) { // 创建虚拟的电话号码列表 List<String> mobileNumbers = new ArrayList<String>(); mobileNumbers.add("1234123401"); mobileNumbers.add("1234123402"); mobileNumbers.add("1234123403"); mobileNumbers.add("1234123404"); Integer localIdx = 0; while (localIdx++ < 100 && this.idx++ < 1000) { // 随机取出号码作为主叫 String fromMobileNumber = mobileNumbers.get(randomGenerator .nextInt(4)); // 随机取出号码作为被叫 String toMobileNumber = mobileNumbers.get(randomGenerator .nextInt(4)); while (fromMobileNumber == toMobileNumber) { toMobileNumber = mobileNumbers.get(randomGenerator .nextInt(4)); } // 通话时长 Integer duration = randomGenerator.nextInt(60); // 输出通话记录 this.collector.emit(new Values(fromMobileNumber, toMobileNumber, duration)); } } } // 声明输出的字段 @Override public void declareOutputFields(OutputFieldsDeclarer declarer) { declarer.declare(new Fields("from", "to", "duration")); } @Override public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) { this.context = context; this.collector = collector; } @Override public Map<String, Object> getComponentConfiguration() { return null; } public boolean isDistributed() { return false; } @Override public void ack(Object arg0) { } @Override public void activate() { } @Override public void close() { } @Override public void deactivate() { } @Override public void fail(Object arg0) { } }
CallLogCreatorBolt
public class CallLogCreatorBolt implements IRichBolt { private static final long serialVersionUID = -3848309443722324775L; // Create instance for OutputCollector which collects and emits tuples to // produce output(输出收集器) private OutputCollector collector; @Override public void prepare(Map conf, TopologyContext context, OutputCollector collector) { this.collector = collector; } @Override public void execute(Tuple tuple) { String from = tuple.getString(0); String to = tuple.getString(1); Integer duration = tuple.getInteger(2); //输出新的数据 collector.emit(new Values(from + " - " + to, duration)); } @Override public void cleanup() { } @Override public void declareOutputFields(OutputFieldsDeclarer declarer) { declarer.declare(new Fields("call", "duration")); } @Override public Map<String, Object> getComponentConfiguration() { return null; } }
CallLogCounterBolt
public class CallLogCounterBolt implements IRichBolt { Map<String, Integer> counterMap; private OutputCollector collector; @Override public void prepare(Map conf, TopologyContext context, OutputCollector collector) { this.counterMap = new HashMap<String, Integer>(); this.collector = collector; } @Override public void execute(Tuple tuple) { String call = tuple.getString(0); Integer duration = tuple.getInteger(1); if (!counterMap.containsKey(call)) { counterMap.put(call, 1); } else { Integer c = counterMap.get(call) + 1; counterMap.put(call, c); } //到达最后一个bolt进行ack确认,表示元组被处理了 collector.ack(tuple); } @Override public void cleanup() { for (Map.Entry<String, Integer> entry : counterMap.entrySet()) { System.out.println(entry.getKey() + " : " + entry.getValue()); } } @Override public void declareOutputFields(OutputFieldsDeclarer declarer) {
//最后一个节点这里可以不要 declarer.declare(new Fields("call")); } @Override public Map<String, Object> getComponentConfiguration() { return null; } }
LogAnalyserStorm
public class LogAnalyserStorm { public static void main(String[] args) throws InterruptedException { // Create Config instance for cluster configuration Config config = new Config(); config.setDebug(true); // 创建topology构建器 TopologyBuilder builder = new TopologyBuilder(); // 设置spout,id是唯一的string builder.setSpout("call-log-reader-spout", new FakeCallLogReaderSpout()); // 设置bolt,shuffleGrouping控制spout出来的tuple数据怎样进入bolt builder.setBolt("call-log-creator-bolt", new CallLogCreatorBolt()) .shuffleGrouping("call-log-reader-spout"); // 设置bolt,fieldsGrouping:按照指定的字段值进行分组tuple builder.setBolt("call-log-counter-bolt", new CallLogCounterBolt()) .fieldsGrouping("call-log-creator-bolt", new Fields("call")); //本地集群 LocalCluster cluster = new LocalCluster(); cluster.submitTopology("LogAnalyserStorm", config, builder.createTopology()); Thread.sleep(30000); // Stop the topology cluster.shutdown(); } }
运行结果
1234123402 - 1234123401 : 85 1234123402 - 1234123404 : 90 1234123402 - 1234123403 : 86 1234123401 - 1234123404 : 76 1234123401 - 1234123403 : 96 1234123401 - 1234123402 : 78 1234123403 - 1234123404 : 85 1234123404 - 1234123401 : 75 1234123403 - 1234123402 : 92 1234123404 - 1234123402 : 80 1234123404 - 1234123403 : 72 1234123403 - 1234123401 : 85
ISpout
负责生成消费给top处理
每个tuple,storm都会进行跟踪,使用DAG图来跟踪
storm如果检测到DAG中每个tuple都被成功处理了,就会发送ack给spout
tuple需要携带messageId,storm才能对其进行跟踪
spout在同一线程中执行ack/fail/nextTuple方法
部署topology到storm的集群(不是本地模式)
1、修改 [App.java] main方法里
StormSubmitter.submitTopology("mytopology", conf, builder.createTopology())
2、导出jar包
3、提交到storm集群运行
storm jar /usr/local/apache-storm-1.1.1/stormdemo-0.0.1-SNAPSHOT.jar com.suxiaodong.storm.LogAnalyserStorm
语法:Syntax: [storm jar topology-jar-path class ...]
二、storm常用命令
storm activate激活
storm deactivate钝化
storm kill 杀死