1、添加依赖
<properties>
<java.version>1.8</java.version>
<storm.version>1.2.2</storm.version>
</properties>
<dependencies>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter</artifactId>
<exclusions>
<exclusion>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-to-slf4j</artifactId>
</exclusion>
<exclusion>
<groupId>ch.qos.logback</groupId>
<artifactId>logback-classic</artifactId>
</exclusion>
</exclusions>
</dependency>
<!-- kafka -->
<dependency>
<groupId>org.apache.storm</groupId>
<artifactId>storm-kafka-client</artifactId>
<version>1.2.2</version>
</dependency>
<!-- test -->
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-test</artifactId>
<scope>test</scope>
</dependency>
<!-- storm -->
<dependency>
<groupId>org.apache.storm</groupId>
<artifactId>storm-core</artifactId>
<version>${storm.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>com.codahale.metrics</groupId>
<artifactId>metrics-core</artifactId>
<version>3.0.2</version>
<scope>provided</scope>
</dependency>
</dependencies>
<build>
<plugins>
<!-- <plugin> <groupId>org.springframework.boot</groupId> <artifactId>spring-boot-maven-plugin</artifactId>
<configuration> <fork>true</fork> </configuration> </plugin> -->
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>shade</goal>
</goals>
</execution>
</executions>
<configuration>
<createDependencyReducedPom>false</createDependencyReducedPom>
<artifactSet>
<excludes>
<exclude>commons-logging:commons-logging</exclude>
<exclude>javax.servlet:servlet-api</exclude>
<exclude>javax.mail:javax.mail-api</exclude>
</excludes>
</artifactSet>
</configuration>
</plugin>
</plugins>
</build>
2、编写application工具类
@Component
public class SpringBeanUtils implements ApplicationContextAware {
/**
* 上下文对象实例
*/
private static ApplicationContext applicationContext;
@Override
public void setApplicationContext(ApplicationContext applicationContext)
throws BeansException {
this.applicationContext = applicationContext;
}
/**
* 获取applicationContext
*
* @return
*/
public static ApplicationContext getApplicationContext() {
return applicationContext;
}
/**
* 通过name获取Bean.
*
* @param name
* @return
*/
public static Object getBean(String name) {
return getApplicationContext().getBean(name);
}
/**
* 通过class获取Bean.
*
* @param clazz
* @param <T>
* @return
*/
public static <T> T getBean(Class<T> clazz) {
return getApplicationContext().getBean(clazz);
}
/**
* 通过name以及Class返回指定的Bean
*
* @param name
* @param clazz
* @param <T>
* @return
*/
public static <T> T getBean(String name, Class<T> clazz) {
return getApplicationContext().getBean(name, clazz);
}
}
3、编写spout
/**
* 字母统计Spout 要开启ack机制,必须设置ack>0,并且传递消息时,带上msgId
*
* @Author:190503
* @Since:2019年5月28日
* @Version:1.1.0
*/
public class WorldCountSpout extends BaseRichSpout {
private SpoutOutputCollector collector;
private Queue<String> wordQueue = new ConcurrentLinkedQueue<String>();
/**
** 发送失败集合,用于重发
*/
private Map<String, Object> failMap = new ConcurrentHashMap<String, Object>();
@Override
/**
* open()方法中是在ISpout接口中定义,在Spout组件初始化时被调用。 有三个参数: 1.Storm配置的Map;
* 2.topology中组件的信息; 3.发射tuple的方法;
*/
public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) {
System.out.println(conf.get("myconfParam"));
wordQueue.add("Hello");
wordQueue.add("World");
wordQueue.add("Drew");
//启动spring容器
SpringBootStormApplication.run();
this.collector = collector;
}
@Override
/**
* nextTuple()方法是Spout实现的核心。 也就是主要执行方法,用于输出信息,通过collector.emit方法发射。
*/
public void nextTuple() {
while (!wordQueue.isEmpty()) {
String world = wordQueue.poll();
if (Optional.ofNullable(world).isPresent()) {
//collector.emit(new Values(world));//不传msgId
//传递消息时加上msgId,用于定位消息
String key = UUID.randomUUID().toString().replace("-", "");
//记录消息,方便失败重发
failMap.put(key, world);
collector.emit(new Values(world), key);
}
}
}
@Override
/**
* declareOutputFields是在IComponent接口中定义,用于声明数据格式。 即输出的一个Tuple中,包含几个字段。
*/
public void declareOutputFields(OutputFieldsDeclarer declarer) {
Fields fields = new Fields("world");
declarer.declare(fields);
}
/**
** 当一个Tuple处理成功时,会调用这个方法 param obj emit方法中的msgId
*/
@Override
public void ack(Object obj) {
//清除消息
failMap.remove(obj);
System.out.println("成功:" + obj);
}
/**
** 当Topology停止时,会调用这个方法
*/
@Override
public void close() {
System.out.println("关闭...");
}
/**
** 当一个Tuple处理失败时,会调用这个方法
*/
@Override
public void fail(Object obj) {
System.out.println("失败:" + obj);
String world = (String)failMap.get(obj);
//清除消息,只重发一次
failMap.remove(obj);
if (Optional.ofNullable(world).isPresent()) {
collector.emit(new Values(world));
}
}
}
4、编写bolt,storm提供了两种bolt,BasicBolt和RichBolt,RichBolt在执行execute后要手动提交ack或者fail,BasicBolt在execute执行后会自动提交ack,但是只对FailedException异常捕获并自动执行fail方法,其他异常需自己处理。
/**
* 单词统计Bolt,继承BaseRichBolt抽象类,如果要实现ack机制,在execute方法处理完后,要手动执行ack方法或者fail方法
*
* @Author:190503
* @Since:2019年5月28日
* @Version:1.1.0
*/
public class WorldCutRichBolt extends BaseRichBolt {
private OutputCollector collector;
@Override
/**
** 在Bolt启动前执行,提供Bolt启动环境配置的入口 一般对于不可序列化的对象进行实例化。
*/
public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
//启动spring容器
SpringBootStormApplication.run();
this.collector = collector;
}
@Override
public void execute(Tuple input) {
try {
String world = input.getStringByField("world");
//要使用spring容器的bean对象,不能直接用注解的方式,只能通过ApplicationContext获取
WorldService worldService = (WorldService)SpringBeanUtils.getBean("worldService");
char[] charArrays = worldService.worldCut(world);
for (char c : charArrays) {
collector.emit(new Values(String.valueOf(c)));
}
collector.ack(input);
} catch (Exception e) {
e.printStackTrace();
collector.fail(input);
}
}
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
Fields fields = new Fields("char");
declarer.declare(fields);
}
}
/**
* 单词统计Bolt,继承BaseBasicBolt抽象类,执行成功时自动执行ack方法,但是只对FailedException异常捕获并自动执行fail方法,其他异常需自己处理
*
* @Author:190503
* @Since:2019年5月28日
* @Version:1.1.0
*/
public class WorldCutBasicBolt extends BaseBasicBolt {
@Override
public void prepare(Map stormConf, TopologyContext context) {
//启动spring容器
SpringBootStormApplication.run();
super.prepare(stormConf, context);
}
@Override
public void execute(Tuple input, BasicOutputCollector collector) {
try {
//throw new Exception();
String world = input.getStringByField("world");
//要使用spring容器的bean对象,不能直接用注解的方式,只能通过ApplicationContext获取
WorldService worldService = (WorldService)SpringBeanUtils.getBean("worldService");
char[] charArrays = worldService.worldCut(world);
for (char c : charArrays) {
collector.emit(new Values(String.valueOf(c)));
}
} catch (Exception e) {
e.printStackTrace();
throw new FailedException();
}
}
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
Fields fields = new Fields("char");
declarer.declare(fields);
}
}
public class WorldCountBolt extends BaseBasicBolt {
private Map<String, Integer> worldcountMap = new ConcurrentHashMap<String, Integer>();
@Override
public void prepare(Map stormConf, TopologyContext context) {
//启动spring容器
SpringBootStormApplication.run();
super.prepare(stormConf, context);
}
@Override
public void execute(Tuple input, BasicOutputCollector collector) {
String c = input.getStringByField("char");
int count = worldcountMap.getOrDefault(c, 0) + 1;
worldcountMap.put(c, count);
System.out.println("线程:" + Thread.currentThread().getName() + "字符" + c + "目前个数为" + count);
}
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
}
}
4、springBoot启动类
@SpringBootApplication
public class SpringBootStormApplication {
private static ConfigurableApplicationContext context = null;
public static synchronized void run(String... args) {
if (context == null) {
context = SpringApplication.run(SpringBootStormApplication.class, args);
}
}
}
5、Topology方法
public class WorldCountTopology {
public static void main(String[] args)
throws Exception {
TopologyBuilder topologyBuilder = new TopologyBuilder();
//parallelism_hint 执行线程数 setNumTasks 所有线程运行任务总数,以下配置是2个spout线程各自运行一个任务
topologyBuilder.setSpout("worldCountSpout", new WorldCountSpout(), 1).setNumTasks(1);
//topologyBuilder.setBolt("worldCutBolt", new WorldCutRichBolt(), 2).shuffleGrouping("worldCountSpout");
//tuple随机分发给下一阶段的bolt ; parallelism_hint 执行线程数 ; setNumTasks 所有线程运行任务总数,以下配置是2个线程各自运行一个Bolt任务
topologyBuilder.setBolt("worldCutBolt", new WorldCutBasicBolt(), 2)
.setNumTasks(2)
.shuffleGrouping("worldCountSpout");
//tuple按字段char的值分发给下一阶段的bolt
topologyBuilder.setBolt("worldCountBolt", new WorldCountBolt(), 2)
.fieldsGrouping("worldCutBolt", new Fields("char"));
Config conf = new Config();
//关闭ack
//conf.setNumAckers(0);
conf.put("myconfParam", "test");
//本地模式
/*LocalCluster cluster = new LocalCluster();
cluster.submitTopology("myTopology", conf, topologyBuilder.createTopology());
// 关闭本地集群
Thread.sleep(10000);
cluster.shutdown();*/
//集群模式
StormSubmitter.submitTopology("myTopology", conf, topologyBuilder.createTopology());
}
}
6、注意说明
- storm的bolt和spout会分发到集群的各个supervisor中,bolt和spout在传输会被序列化和反序列化,因此在bolt和spout中要尽量避免需要传输不能序列化的对象,若必须使用,可以放到初始化方法里赋值
- 因为第一点提及的问题,bolt和spout同样不要引用其他类的公共变量,因为各个bolt和spout使用的公共变量其实不是同一个对象。
- storm嵌入springBoot程序,不能使用正常springBoot启动的方式,因为在集群中,由于第一点提及的问题,bolt和spout中不能通过注解的方式获取spring容器中的bean对象,并且因为bolt和spout分发到supervisor中时,都需要启动spring环境,所以要放在bolt和spout初始化方法中启动
@Override public void prepare(Map stormConf, TopologyContext context) { //启动spring容器 SpringBootStormApplication.run(); super.prepare(stormConf, context); }
- storm整合springBoot打包项目的时候,不能使用原生springBoot的打包方式,那种方式的启动是通过org.springframework.boot.loader.JarLauncher类来启动服务的,可以使用以下打包方式
<plugin> <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-shade-plugin</artifactId> <executions> <execution> <phase>package</phase> <goals> <goal>shade</goal> </goals> </execution> </executions> <configuration> <createDependencyReducedPom>false</createDependencyReducedPom> <artifactSet> <excludes> <exclude>commons-logging:commons-logging</exclude> <exclude>javax.servlet:servlet-api</exclude> <exclude>javax.mail:javax.mail-api</exclude> </excludes> </artifactSet> </configuration> </plugin>