springBoot 整合storm

1,245 阅读7分钟

1、添加依赖

<properties>
		<java.version>1.8</java.version>
		<storm.version>1.2.2</storm.version>
	</properties>

	<dependencies>
		<dependency>
			<groupId>org.springframework.boot</groupId>
			<artifactId>spring-boot-starter</artifactId>
			<exclusions>
				<exclusion>
					<groupId>org.apache.logging.log4j</groupId>
					<artifactId>log4j-to-slf4j</artifactId>
				</exclusion>
				<exclusion>
					<groupId>ch.qos.logback</groupId>
					<artifactId>logback-classic</artifactId>
				</exclusion>
			</exclusions>
		</dependency>

		<!-- kafka -->
		<dependency>
            <groupId>org.apache.storm</groupId>
            <artifactId>storm-kafka-client</artifactId>
            <version>1.2.2</version>
        </dependency>

		<!-- test -->
		<dependency>
			<groupId>org.springframework.boot</groupId>
			<artifactId>spring-boot-starter-test</artifactId>
			<scope>test</scope>
		</dependency>

		<!-- storm -->
		<dependency>
			<groupId>org.apache.storm</groupId>
			<artifactId>storm-core</artifactId>
			<version>${storm.version}</version>
			<scope>provided</scope>
		</dependency>

		<dependency>
			<groupId>com.codahale.metrics</groupId>
			<artifactId>metrics-core</artifactId>
			<version>3.0.2</version>
			<scope>provided</scope>
		</dependency>
	</dependencies>

	<build>
		<plugins>
			<!-- <plugin> <groupId>org.springframework.boot</groupId> <artifactId>spring-boot-maven-plugin</artifactId> 
				<configuration> <fork>true</fork> </configuration> </plugin> -->
			<plugin>
				<groupId>org.apache.maven.plugins</groupId>
				<artifactId>maven-shade-plugin</artifactId>
				<executions>
					<execution>
						<phase>package</phase>
						<goals>
							<goal>shade</goal>
						</goals>
					</execution>
				</executions>
				<configuration>
					<createDependencyReducedPom>false</createDependencyReducedPom>
					<artifactSet>
						<excludes>
							<exclude>commons-logging:commons-logging</exclude>
							<exclude>javax.servlet:servlet-api</exclude>
							<exclude>javax.mail:javax.mail-api</exclude>
						</excludes>
					</artifactSet>
				</configuration>
			</plugin>
		</plugins>
	</build>

2、编写application工具类

@Component
public class SpringBeanUtils implements ApplicationContextAware {
    
    /**
     * 上下文对象实例
     */
    private static ApplicationContext applicationContext;
    
    @Override
    public void setApplicationContext(ApplicationContext applicationContext)
        throws BeansException {
        this.applicationContext = applicationContext;
    }
    
    /**
     * 获取applicationContext
     *
     * @return
     */
    public static ApplicationContext getApplicationContext() {
        return applicationContext;
    }
    
    /**
     * 通过name获取Bean.
     *
     * @param name
     * @return
     */
    public static Object getBean(String name) {
        return getApplicationContext().getBean(name);
    }
    
    /**
     * 通过class获取Bean.
     *
     * @param clazz
     * @param <T>
     * @return
     */
    public static <T> T getBean(Class<T> clazz) {
        return getApplicationContext().getBean(clazz);
    }
    
    /**
     * 通过name以及Class返回指定的Bean
     *
     * @param name
     * @param clazz
     * @param <T>
     * @return
     */
    public static <T> T getBean(String name, Class<T> clazz) {
        return getApplicationContext().getBean(name, clazz);
    }
}

3、编写spout

/**
 * 字母统计Spout 要开启ack机制,必须设置ack>0,并且传递消息时,带上msgId
 * 
 * @Author:190503
 * @Since:2019年5月28日
 * @Version:1.1.0
 */
public class WorldCountSpout extends BaseRichSpout {
    
    private SpoutOutputCollector collector;
    
    private Queue<String> wordQueue = new ConcurrentLinkedQueue<String>();
    
    /**
     ** 发送失败集合,用于重发
     */
    private Map<String, Object> failMap = new ConcurrentHashMap<String, Object>();
    
    @Override
    /**
     * open()方法中是在ISpout接口中定义,在Spout组件初始化时被调用。 有三个参数: 1.Storm配置的Map;
     * 2.topology中组件的信息; 3.发射tuple的方法;
     */
    public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) {
        
        System.out.println(conf.get("myconfParam"));
        wordQueue.add("Hello");
        wordQueue.add("World");
        wordQueue.add("Drew");
        //启动spring容器
        SpringBootStormApplication.run();
        this.collector = collector;
    }
    
    @Override
    /**
     * nextTuple()方法是Spout实现的核心。 也就是主要执行方法,用于输出信息,通过collector.emit方法发射。
     */
    public void nextTuple() {
        while (!wordQueue.isEmpty()) {
            String world = wordQueue.poll();
            if (Optional.ofNullable(world).isPresent()) {
                //collector.emit(new Values(world));//不传msgId
                //传递消息时加上msgId,用于定位消息
                String key = UUID.randomUUID().toString().replace("-", "");
                //记录消息,方便失败重发
                failMap.put(key, world);
                collector.emit(new Values(world), key);
            }
        }
        
    }
    
    @Override
    /**
     * declareOutputFields是在IComponent接口中定义,用于声明数据格式。 即输出的一个Tuple中,包含几个字段。
     */
    public void declareOutputFields(OutputFieldsDeclarer declarer) {
        Fields fields = new Fields("world");
        declarer.declare(fields);
    }
    
    /**
     ** 当一个Tuple处理成功时,会调用这个方法 param obj emit方法中的msgId
     */
    @Override
    public void ack(Object obj) {
        //清除消息
        failMap.remove(obj);
        System.out.println("成功:" + obj);
    }
    
    /**
     ** 当Topology停止时,会调用这个方法
     */
    @Override
    public void close() {
        System.out.println("关闭...");
    }
    
    /**
     ** 当一个Tuple处理失败时,会调用这个方法
     */
    @Override
    public void fail(Object obj) {
        System.out.println("失败:" + obj);
        String world = (String)failMap.get(obj);
        //清除消息,只重发一次
        failMap.remove(obj);
        if (Optional.ofNullable(world).isPresent()) {
            collector.emit(new Values(world));
        }
    }
}

4、编写bolt,storm提供了两种bolt,BasicBolt和RichBolt,RichBolt在执行execute后要手动提交ack或者fail,BasicBolt在execute执行后会自动提交ack,但是只对FailedException异常捕获并自动执行fail方法,其他异常需自己处理。

/**
 * 单词统计Bolt,继承BaseRichBolt抽象类,如果要实现ack机制,在execute方法处理完后,要手动执行ack方法或者fail方法
 * 
 * @Author:190503
 * @Since:2019年5月28日
 * @Version:1.1.0
 */
public class WorldCutRichBolt extends BaseRichBolt {
    
    private OutputCollector collector;
    
    @Override
    /**
     ** 在Bolt启动前执行,提供Bolt启动环境配置的入口 一般对于不可序列化的对象进行实例化。
     */
    public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
        //启动spring容器
        SpringBootStormApplication.run();
        this.collector = collector;
    }
    
    @Override
    public void execute(Tuple input) {
        try {
            String world = input.getStringByField("world");
            //要使用spring容器的bean对象,不能直接用注解的方式,只能通过ApplicationContext获取
            WorldService worldService = (WorldService)SpringBeanUtils.getBean("worldService");
            char[] charArrays = worldService.worldCut(world);
            for (char c : charArrays) {
                collector.emit(new Values(String.valueOf(c)));
            }
            collector.ack(input);
        } catch (Exception e) {
            e.printStackTrace();
            collector.fail(input);
        }
    }
    
    @Override
    public void declareOutputFields(OutputFieldsDeclarer declarer) {
        Fields fields = new Fields("char");
        declarer.declare(fields);
    }
    
}



/**
 * 单词统计Bolt,继承BaseBasicBolt抽象类,执行成功时自动执行ack方法,但是只对FailedException异常捕获并自动执行fail方法,其他异常需自己处理
 * 
 * @Author:190503
 * @Since:2019年5月28日
 * @Version:1.1.0
 */
public class WorldCutBasicBolt extends BaseBasicBolt {
    
    @Override
    public void prepare(Map stormConf, TopologyContext context) {
        //启动spring容器
        SpringBootStormApplication.run();
        super.prepare(stormConf, context);
    }
    
    @Override
    public void execute(Tuple input, BasicOutputCollector collector) {
        try {
            //throw new Exception();
            String world = input.getStringByField("world");
            //要使用spring容器的bean对象,不能直接用注解的方式,只能通过ApplicationContext获取
            WorldService worldService = (WorldService)SpringBeanUtils.getBean("worldService");
            char[] charArrays = worldService.worldCut(world);
            for (char c : charArrays) {
                collector.emit(new Values(String.valueOf(c)));
            }
        } catch (Exception e) {
            e.printStackTrace();
            throw new FailedException();
        }
        
    }
    
    @Override
    public void declareOutputFields(OutputFieldsDeclarer declarer) {
        Fields fields = new Fields("char");
        declarer.declare(fields);
    }
    
}


public class WorldCountBolt extends BaseBasicBolt {
    
    private Map<String, Integer> worldcountMap = new ConcurrentHashMap<String, Integer>();
    
    @Override
    public void prepare(Map stormConf, TopologyContext context) {
        //启动spring容器
        SpringBootStormApplication.run();
        super.prepare(stormConf, context);
    }
    
    @Override
    public void execute(Tuple input, BasicOutputCollector collector) {
        String c = input.getStringByField("char");
        int count = worldcountMap.getOrDefault(c, 0) + 1;
        worldcountMap.put(c, count);
        System.out.println("线程:" + Thread.currentThread().getName() + "字符" + c + "目前个数为" + count);
        
    }
    
    @Override
    public void declareOutputFields(OutputFieldsDeclarer declarer) {
        
    }
    
}

4、springBoot启动类

@SpringBootApplication
public class SpringBootStormApplication {
    
    private static ConfigurableApplicationContext context = null;
    
    public static synchronized void run(String... args) {
        if (context == null) {
            context = SpringApplication.run(SpringBootStormApplication.class, args);
        }
    }
    
}

5、Topology方法

public class WorldCountTopology {
    
    public static void main(String[] args)
        throws Exception {
        
        TopologyBuilder topologyBuilder = new TopologyBuilder();
        //parallelism_hint 执行线程数 setNumTasks 所有线程运行任务总数,以下配置是2个spout线程各自运行一个任务
        topologyBuilder.setSpout("worldCountSpout", new WorldCountSpout(), 1).setNumTasks(1);
        //topologyBuilder.setBolt("worldCutBolt", new WorldCutRichBolt(), 2).shuffleGrouping("worldCountSpout");
        //tuple随机分发给下一阶段的bolt ; parallelism_hint 执行线程数  ;  setNumTasks 所有线程运行任务总数,以下配置是2个线程各自运行一个Bolt任务
        topologyBuilder.setBolt("worldCutBolt", new WorldCutBasicBolt(), 2)
            .setNumTasks(2)
            .shuffleGrouping("worldCountSpout");
        //tuple按字段char的值分发给下一阶段的bolt
        topologyBuilder.setBolt("worldCountBolt", new WorldCountBolt(), 2)
            .fieldsGrouping("worldCutBolt", new Fields("char"));
        Config conf = new Config();
        //关闭ack
        //conf.setNumAckers(0);
        conf.put("myconfParam", "test");
        //本地模式
        /*LocalCluster cluster = new LocalCluster();
        cluster.submitTopology("myTopology", conf, topologyBuilder.createTopology());
        //  关闭本地集群
        Thread.sleep(10000);
        cluster.shutdown();*/
        
        //集群模式
        StormSubmitter.submitTopology("myTopology", conf, topologyBuilder.createTopology());
    }
}

6、注意说明

  • storm的bolt和spout会分发到集群的各个supervisor中,bolt和spout在传输会被序列化和反序列化,因此在bolt和spout中要尽量避免需要传输不能序列化的对象,若必须使用,可以放到初始化方法里赋值
  • 因为第一点提及的问题,bolt和spout同样不要引用其他类的公共变量,因为各个bolt和spout使用的公共变量其实不是同一个对象。
  • storm嵌入springBoot程序,不能使用正常springBoot启动的方式,因为在集群中,由于第一点提及的问题,bolt和spout中不能通过注解的方式获取spring容器中的bean对象,并且因为bolt和spout分发到supervisor中时,都需要启动spring环境,所以要放在bolt和spout初始化方法中启动
    @Override
        public void prepare(Map stormConf, TopologyContext context) {
            //启动spring容器
            SpringBootStormApplication.run();
            super.prepare(stormConf, context);
        }


  • storm整合springBoot打包项目的时候,不能使用原生springBoot的打包方式,那种方式的启动是通过org.springframework.boot.loader.JarLauncher类来启动服务的,可以使用以下打包方式
                <plugin>
                    <groupId>org.apache.maven.plugins</groupId>
                    <artifactId>maven-shade-plugin</artifactId>
                    <executions>
                        <execution>
                            <phase>package</phase>
                            <goals>
                                <goal>shade</goal>
                            </goals>
                        </execution>
                    </executions>
                    <configuration>
                        <createDependencyReducedPom>false</createDependencyReducedPom>
                        <artifactSet>
                            <excludes>
                                <exclude>commons-logging:commons-logging</exclude>
                                <exclude>javax.servlet:servlet-api</exclude>
                                <exclude>javax.mail:javax.mail-api</exclude>
                            </excludes>
                        </artifactSet>
                    </configuration>
                </plugin>