建立一个maven项目,在pom.xml中进行如下配置:
4.0.0 cn.darrenchan StormDemo 0.0.1-SNAPSHOT StormDemo org.apache.storm storm-core 0.9.5 maven-assembly-plugin jar-with-dependencies cn.itcast.bigdata.hadoop.mapreduce.wordcount.WordCount make-assembly package single org.apache.maven.plugins maven-compiler-plugin
项目目录为:
MySpout.java:
package cn.darrenchan.storm;import java.util.Map;import backtype.storm.spout.SpoutOutputCollector;import backtype.storm.task.TopologyContext;import backtype.storm.topology.OutputFieldsDeclarer;import backtype.storm.topology.base.BaseRichSpout;import backtype.storm.tuple.Fields;import backtype.storm.tuple.Values;public class MySpout extends BaseRichSpout { private SpoutOutputCollector collector; //storm框架不停地调用nextTuple方法 //values继承ArrayList @Override public void nextTuple() { collector.emit(new Values("i am lilei love hanmeimei")); } //初始化方法 @Override public void open(Map config, TopologyContext context, SpoutOutputCollector collector) { this.collector = collector; } //声明本spout组件发送出去的tuple中的数据的字段名 @Override public void declareOutputFields(OutputFieldsDeclarer declarer) { declarer.declare(new Fields("love")); }}
MySplitBolt.java:
package cn.darrenchan.storm;import java.util.Map;import backtype.storm.task.OutputCollector;import backtype.storm.task.TopologyContext;import backtype.storm.topology.OutputFieldsDeclarer;import backtype.storm.topology.base.BaseRichBolt;import backtype.storm.tuple.Fields;import backtype.storm.tuple.Tuple;import backtype.storm.tuple.Values;public class MySplitBolt extends BaseRichBolt { private OutputCollector collector; //storm框架不停地调用,传入参数是tutle @Override public void execute(Tuple input) { String line = input.getString(0); String[] words = line.split(" "); for (String word : words) { //Values有两个,对应下面Fields有两个 collector.emit(new Values(word, 1)); } } //初始化方法 @Override public void prepare(Map config, TopologyContext context, OutputCollector collector) { this.collector = collector; } @Override public void declareOutputFields(OutputFieldsDeclarer declarer) { //Fields有两个,对应上面Values有两个 declarer.declare(new Fields("word", "num")); }}
MyCountBolt.java:
package cn.darrenchan.storm;import java.util.HashMap;import java.util.Map;import backtype.storm.task.OutputCollector;import backtype.storm.task.TopologyContext;import backtype.storm.topology.OutputFieldsDeclarer;import backtype.storm.topology.base.BaseRichBolt;import backtype.storm.tuple.Tuple;public class MyCountBolt extends BaseRichBolt { private OutputCollector collector; private Mapmap; @Override public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) { this.collector = collector; map = new HashMap (); } @Override public void execute(Tuple input) { String word = input.getString(0); Integer num = input.getInteger(1); if(map.containsKey(word)){ map.put(word, map.get(word) + num); } else { map.put(word, 1); } System.out.println(map); } @Override public void declareOutputFields(OutputFieldsDeclarer declarer) { }}
WordCountTopoloyMain.java:
package cn.darrenchan.storm;import backtype.storm.Config;import backtype.storm.LocalCluster;import backtype.storm.topology.TopologyBuilder;import backtype.storm.tuple.Fields;public class WordCountTopoloyMain { public static void main(String[] args) throws Exception { //1.准备一个TopologyBuilder TopologyBuilder builder = new TopologyBuilder(); builder.setSpout("mySpout", new MySpout(), 1); builder.setBolt("mySplitBolt", new MySplitBolt(), 2).shuffleGrouping("mySpout"); builder.setBolt("myCountBolt", new MyCountBolt(), 2).fieldsGrouping("mySplitBolt", new Fields("word")); //2.创建一个configuration,用来指定当前的topology需要的worker的数量 Config config = new Config(); config.setNumWorkers(4); //3.任务提交 两种模式————本地模式和集群模式 //集群模式 //StormSubmitter.submitTopology("myWordCount", config, builder.createTopology()); //本地模式 LocalCluster localCluster = new LocalCluster(); localCluster.submitTopology("myWordCount", config, builder.createTopology()); }}
三种求wordcount方式 比较:
整体运行架构图: