这是我参与2022首次更文挑战的第23天,活动详情查看:2022首次更文挑战
简介
本篇中介绍大数据处理中的Storm的示例使用,将详细从依赖、代码等展示如果运行一个Storm示例
本地模式
Maven 配置
在pom.xml文件添加下面的依赖,本地模式运行需要去掉scope字段,暂时把它注释掉,在项目运行的vm参数中填写-Djava.net.preferIPv4Stack=true,避免一些运行错误
<!-- storm -->
<dependency>
<groupId>org.apache.storm</groupId>
<artifactId>storm-core</artifactId>
<version>1.2.2</version>
<!--<scope>provided</scope>-->
</dependency>
spout类
可以把它看做生产消息的组件,任意建立一个类继承BaseRichSpout即可,在IDEA中继承后点击错误提示即可自动添加需要实现的方法。
open函数可以看做初始化函数。nextTuple函数是处理函数,在其中进行数据的获取或者初步的处理,通过collector.emit提交到下一个组件中。
package spout;
import org.apache.storm.spout.SpoutOutputCollector;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.IRichSpout;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.topology.base.BaseRichSpout;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Values;
import org.apache.storm.utils.Utils;
import java.util.Map;
public class ReadEventSpout extends BaseRichSpout{
private int count;
private SpoutOutputCollector collector;
public void open(Map map, TopologyContext topologyContext, SpoutOutputCollector spoutOutputCollector) {
this.collector = spoutOutputCollector;
++count;
}
public void nextTuple() {
++count;
collector.emit(new Values(String.valueOf(count)));
Utils.sleep(1000);
}
public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer) {
outputFieldsDeclarer.declare(new Fields("event"));
}
}
bolt类
接收spout组件的消息进行处理,execute函数为处理函数,spout传过来的数据在tuple中,获取后进行处理
package bolt;
import org.apache.storm.topology.BasicOutputCollector;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.topology.base.BaseBasicBolt;
import org.apache.storm.tuple.Tuple;
public class AnalysisBolt extends BaseBasicBolt{
public void execute(Tuple tuple, BasicOutputCollector basicOutputCollector) {
System.out.println(tuple.getString(0));
}
public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer) {
}
}
topology类
package trident;
import bolt.AnalysisBolt;
import org.apache.storm.Config;
import org.apache.storm.LocalCluster;
import org.apache.storm.topology.TopologyBuilder;
import spout.ReadEventSpout;
public class CorrelationAnalysisTopology {
public static void main(String[] args) {
TopologyBuilder builder = new TopologyBuilder();
builder.setSpout("spout", new ReadEventSpout());
builder.setBolt("bolt", new AnalysisBolt()).shuffleGrouping("spout");
Config config = new Config();
config.setDebug(false);
String topologyName = "CorrelationAnalysis";
if (args != null && args.length > 0) {
topologyName = args[0];
}
config.setNumWorkers(3);
// StormSubmitter.submitTopologyWithProgressBar(topologyName, config, builder.createTopology());
LocalCluster cluster = new LocalCluster();
cluster.submitTopology(topologyName, config, builder.createTopology());
}
}
集群提交运行
Maven配置
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>NewCorrelationAnalysis</groupId>
<artifactId>NewCorrelationAnalysis</artifactId>
<version>1.0-SNAPSHOT</version>
<dependencies>
<!-- log4j support -->
<dependency>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
<version>1.2.17</version>
<scope>compile</scope>
</dependency>
<!-- kafka -->
<dependency>
<groupId>org.apache.kafka</groupId>
<artifactId>kafka-clients</artifactId>
<version>0.11.0.0</version>
</dependency>
<!-- json gson -->
<dependency>
<groupId>com.google.code.gson</groupId>
<artifactId>gson</artifactId>
<version>2.8.0</version>
</dependency>
<!-- jedis redis -->
<dependency>
<groupId>redis.clients</groupId>
<artifactId>jedis</artifactId>
<version>2.8.1</version>
</dependency>
<!-- csv -->
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-csv</artifactId>
<version>1.6</version>
</dependency>
<!-- storm -->
<dependency>
<groupId>org.apache.storm</groupId>
<artifactId>storm-core</artifactId>
<version>1.2.2</version>
<!--<scope>provided</scope>-->
</dependency>
<!-- junit test -->
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>RELEASE</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.junit.jupiter</groupId>
<artifactId>junit-jupiter-api</artifactId>
<version>RELEASE</version>
<scope>test</scope>
</dependency>
<!-- mongodb -->
<dependency>
<groupId>org.mongodb</groupId>
<artifactId>mongo-java-driver</artifactId>
<version>3.2.2</version>
</dependency>
<dependency>
<groupId>org.mongodb</groupId>
<artifactId>bson</artifactId>
<version>3.7.1</version>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
<!--<version>2.4.1</version>-->
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>shade</goal>
</goals>
<configuration>
<transformers>
<transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
<mainClass>trident.CorrelationAnalysisTopology</mainClass>
</transformer>
</transformers>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>
Topology类修改
package trident;
import bolt.AnalysisBolt;
import org.apache.storm.Config;
import org.apache.storm.LocalCluster;
import org.apache.storm.StormSubmitter;
import org.apache.storm.generated.AlreadyAliveException;
import org.apache.storm.generated.AuthorizationException;
import org.apache.storm.generated.InvalidTopologyException;
import org.apache.storm.topology.TopologyBuilder;
import spout.ReadEventSpout;
public class CorrelationAnalysisTopology {
public static void main(String[] args) throws InvalidTopologyException, AuthorizationException, AlreadyAliveException {
TopologyBuilder builder = new TopologyBuilder();
builder.setSpout("spout", new ReadEventSpout());
builder.setBolt("bolt", new AnalysisBolt()).shuffleGrouping("spout");
Config config = new Config();
config.setDebug(false);
String topologyName = "CorrelationAnalysis";
if (args != null && args.length > 0) {
topologyName = args[0];
}
config.setNumWorkers(3);
StormSubmitter.submitTopologyWithProgressBar(topologyName, config, builder.createTopology());
// LocalCluster cluster = new LocalCluster();
// cluster.submitTopology(topologyName, config, builder.createTopology());
}
}
服务器提交
/opt/apache-storm-1.2.2/bin/storm jar ./target/NewCorrelationAnalysis-1.0-SNAPSHOT.jar trident.CorrelationAnalysisTopology ca