Flink Stream 多数据源

2,671 阅读3分钟

Mysql

    <dependency>
            <groupId>mysql</groupId>
            <artifactId>mysql-connector-java</artifactId>
            <version>8.0.15</version>
    </dependency>
//Mysql Source
import org.apache.flink.configuration.Configuration
import org.apache.flink.streaming.api.functions.source.{RichSourceFunction, SourceFunction}

class MysqlSource1 extends RichSourceFunction[Int] {
  private var isRun = true;
  private var connection: Connection = null;
  private var statement: Statement = null;

  override def run(sourceContext: SourceFunction.SourceContext[Int]): Unit = {
    while (isRun) {
      val set = statement.executeQuery("select * from flink.ad_blacklist")
      while (set.next()) {
        val i = set.getInt(1)
        sourceContext.collect(i)
      }
    }
  }

  override def cancel(): Unit = isRun = false

//一定要用Open函数建立连接,不然会爆连接过多异常。
  override def open(parameters: Configuration): Unit = {
    val url = "jdbc:mysql://192.168.30.102:3306/flink?characterEncoding=utf8&useSSL=false"
    Class.forName("com.mysql.cj.jdbc.Driver")
    connection = DriverManager.getConnection(url, "root", "123")
    statement = connection.createStatement()
  }

  override def close(): Unit = {
    if (connection != null) {
      connection.close()
    }
    if (statement != null) {
      statement.close()
    }
  }
}
//Mysql Sink 
import org.apache.flink.configuration.Configuration
import org.apache.flink.streaming.api.functions.sink.{RichSinkFunction, SinkFunction}

class mysqlSink extends RichSinkFunction[(String, Long )]{

  var connect: Connection = _
  var insertStmt: PreparedStatement = _
  var update : PreparedStatement = _
  var select : PreparedStatement = _

  val url = "jdbc:mysql://192.168.30.102:3306/flink?characterEncoding=utf8&useSSL=false"


  override def invoke(value: (String, Long), context: SinkFunction.Context[_]): Unit = {
    // value为 Stream中的值
    //在这里进行mysql sink 逻辑。
  }

  override def open(parameters: Configuration): Unit = {
    Class.forName("com.mysql.cj.jdbc.Driver")
    connect = DriverManager.getConnection(url, "root", "123")
    insertStmt  = connect.prepareStatement("这里写sql,用 ? 占位")
    select = connect.prepareStatement("这里写sql,用 ? 占位")
    update = connect.prepareStatement("这里写sql,用 ? 占位")
  }

  override def close(): Unit = {
    insertStmt.close()
    update.close()
    select.close()
    connect.close()
  }
}

SpringData Jpa + HikariCp Mysql数据源

<!-- https://mvnrepository.com/artifact/org.springframework.data/spring-data-jpa -->
<dependency>
    <groupId>org.springframework.data</groupId>
    <artifactId>spring-data-jpa</artifactId>
    <version>2.2.4.RELEASE</version>
</dependency>
// source
没写的就是待更新
// sink

SpringData jdbc + HikariCp

<!-- https://mvnrepository.com/artifact/org.springframework.data/spring-data-jdbc -->
<dependency>
    <groupId>org.springframework.data</groupId>
    <artifactId>spring-data-jdbc</artifactId>
    <version>1.1.4.RELEASE</version>
</dependency>
// source
// sink

Redis

    <!--        redis client-->
        <dependency>
            <groupId>io.lettuce</groupId>
            <artifactId>lettuce-core</artifactId>
            <version>5.2.1.RELEASE</version>
        </dependency>
//redis Source
//redis sink

SpringData Redis

<!-- https://mvnrepository.com/artifact/org.springframework.data/spring-data-redis -->
<dependency>
    <groupId>org.springframework.data</groupId>
    <artifactId>spring-data-redis</artifactId>
    <version>2.2.4.RELEASE</version>
</dependency>

//source
//sink

Apach Bahir Redis

<!-- https://mvnrepository.com/artifact/org.springframework.data/spring-data-redis -->
<dependency>
    <groupId>org.apache.bahir</groupId>
    <artifactId>flink-connector-redis_2.11</artifactId>
    <version>1.0</version>
</dependency>
//sink
object bahirDemo{
  def main(args: Array[String]): Unit = {
    val env = StreamExecutionEnvironment.getExecutionEnvironment
    // 创建Redis连接池
    val config = new FlinkJedisPoolConfig.Builder().setHost("192.168.30.102").build()
    // 描述Redis Command ,描述K V对应的值
    class  myRedisMapper extends RedisMapper[(String, String)] {
      override def getCommandDescription: RedisCommandDescription = new RedisCommandDescription(RedisCommand.HSET, "hash_name")

      override def getKeyFromData(t: (String, String)): String = t._1

      override def getValueFromData(t: (String, String)): String = t._2
    }

    val value = env.readTextFile("E:\\ideaproject\\FlinkTemplate\\Stream\\src\\main\\resources\\hello.txt")
    val stringString = value.map(e => (e, "1"))
    stringString.addSink(new RedisSink[(String, String)](config, new myRedisMapper))
  }
}
// Redis Cluster
val conf = new FlinkJedisPoolConfig.Builder().setNodes(...).build()
stream.addSink(new RedisSink[(String, String)](conf, new RedisExampleMapper))
// Redis Sentinel
val conf = new FlinkJedisSentinelConfig.Builder().setMasterName("master").setSentinels(...).build()
stream.addSink(new RedisSink[(String, String)](conf, new RedisExampleMapper))

Es6 及以上

es的话,官网有就用官方的。

<!-- https://mvnrepository.com/artifact/org.apache.flink/flink-connector-elasticsearch6 -->
<dependency>
    <groupId>org.apache.flink</groupId>
    <artifactId>flink-connector-elasticsearch6_2.11</artifactId>
    <version>1.9.0</version>
</dependency>
   <!---注意这里的Scala Version 为2.11 如果你用的是Scala2.12,请修改->

// source
// sink
object esSink {
  def main(args: Array[String]): Unit = {
    val env = StreamExecutionEnvironment.getExecutionEnvironment
    val value = env.readTextFile("E:\\ideaproject\\FlinkTemplate\\Stream\\src\\main\\resources\\hello.txt")

    val hosts = new util.ArrayList[HttpHost]()
    hosts.add(new HttpHost("192.168.30.102", 9200, "http"))

    val esSink = new ElasticsearchSink.Builder[String](hosts, new ElasticsearchSinkFunction[String] {
      override def process(t: String, runtimeContext: RuntimeContext, requestIndexer: RequestIndexer): Unit = {
        val json = new util.HashMap[String, String]()
        json.put("data", t)
        val request = Requests.indexRequest()
          .index("hello")
          .`type`("helloData")
          .source(json)
        requestIndexer.add(request)
      }
    })

    value.addSink(esSink.build())

    env.execute()
  }
}

Kafka

<!-- https://mvnrepository.com/artifact/org.apache.flink/flink-connector-kafka -->
<!-- https://mvnrepository.com/artifact/org.apache.flink/flink-connector-kafka -->
<dependency>
    <groupId>org.apache.flink</groupId>
    <artifactId>flink-connector-kafka_2.11</artifactId>
    <version>1.9.2</version>
</dependency>
// source
val broker = "192.168.30.102:9092"
val topic = "realAD"
env.setStreamTimeCharacteristic(TimeCharacteristic.IngestionTime)
val kafkaPro = new Properties()
kafkaPro.setProperty(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, broker)
kafkaPro.setProperty(ConsumerConfig.GROUP_ID_CONFIG, "adJob")
kafkaPro.setProperty("zookeeper.connect", "192.168.30.102:2181")
val KafkaAdData = env.addSource(new FlinkKafkaConsumer(topic, new SimpleStringSchema(), kafkaPro))
// sink
val stream: DataStream[String] = ...

val myProducer = new FlinkKafkaProducer011[String](
        "localhost:9092",         // broker list
        "my-topic",               // target topic
        new SimpleStringSchema)   // serialization schema

// versions 0.10+ allow attaching the records' event timestamp when writing them to Kafka;
// this method is not available for earlier Kafka versions
myProducer.setWriteTimestampToKafka(true)

stream.addSink(myProducer)

hdfs

// source
// sink

最简易版本

// source
import org.apache.flink.streaming.api.functions.source.SourceFunction

class demo() extends SourceFunction[Int] {
  var running = true

  override def run(sourceContext: SourceFunction.SourceContext[Int]): Unit = {
    while (running) {
      //产生数据 与泛型 一致
       sourceContext.collect(1)
    }
  }

  override def cancel(): Unit = running = false
}
// 使用方法
env.addSource(new demo)