01.Flink实时数据管理—自定义MysqlSource + 广播变量创建

696 阅读1分钟

引言

需求背景:ETL离线作业,需要实时监控运行状况,由于调度使用Azkaban,故同步获取其后台配置库Mysql;本文记录第一步:获取projects工程码表,并作为广播变量,供后续使用。

功能部件

Flink + Scalikejdbc + Scala

实现逻辑

  1. 使用scalikejdbc 构建jdbc连接池,链接Azkaban元数据库Mysql;
  2. 自定义MysqlSource,继承实现RichSourceFunction方法;
  3. flink,添加数据源,链接Mysql,获取projects工程属性表,并设置广播变量;

实现Demo

  1. 使用scalikejdbc构建,mysql-jdbc连接池

import scalikejdbc._
import scalikejdbc.config.DBs

/**
 * @Title: DBUtil
 * @Author: moun
 * @Desc:  链接数据库工具类
 *
 */

object DBUtils {

  DBs.setupAll()

  def select[A](dbName: Symbol,selectSQL: SQL[Nothing,NoExtractor],resultSet: WrappedResultSet => A) : List[A] =
    NamedDB(dbName) readOnly{
      implicit session => selectSQL.map(resultSet).list().apply()
    }

  def closeOne(dbName: Symbol): Unit = DBs.close(dbName)


  def close(): Unit = DBs.closeAll()

}

注意事项:

  • application.conf文件,放置在resources目录下;
  • 配置文件,key命名需要和监控对应的库名保持一致;
  1. 自定义MysqlSource,继承RichSourceFunction富函数,实现open、run、close方法

import com.haierubic.bigdata.dataflow.models.Domain.{ProjectsClass, projectsSet}
import com.haierubic.bigdata.dataflow.utils.{ConfigParse, DBUtils}
import org.apache.flink.configuration.Configuration
import org.apache.flink.streaming.api.functions.source.{RichSourceFunction, SourceFunction}
import scalikejdbc.SQL

class CustomMysqlSource(exec_sql: String, hourInterval: Int = 0) extends RichSourceFunction[ProjectsClass] {

  var isRunning = true
  var queryDbName: Symbol = _

  override def open(parameters: Configuration): Unit = {
    queryDbName = Symbol(ConfigParse.getString("bg.broadcast_dim.database"))  // 创建数据库链接
  }

  override def run(sourceContext: SourceFunction.SourceContext[ProjectsClass]): Unit = {

    val resultList = DBUtils.select[ProjectsClass](queryDbName, SQL(exec_sql), projectsSet)  // 获取执行结果List[Class]
    resultList.map(x => sourceContext.collect(x))  // 数据写入collect中

    //    Thread.sleep(3600000 * hourInterval)
  }

  override def close(): Unit = {
    DBUtils.closeOne(queryDbName)
  }

  override def cancel(): Unit = isRunning = false

}

  1. flink添加数据源,读取数据后并广播出去

import com.haierubic.bigdata.dataflow.models.Sentence
import com.haierubic.bigdata.dataflow.udf.CustomMysqlSource
import com.haierubic.bigdata.dataflow.utils.ConfigParse
import org.apache.flink.api.common.state.MapStateDescriptor
import org.apache.flink.api.common.typeinfo.BasicTypeInfo
import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
import org.apache.flink.streaming.api.scala._

/**
 * 广播 projects表, 自定义MysqlSource,获取projects表数据,并进行广播出去;
 * created by moun
 */

object QueryProjects extends Sentence{

  def getProjects(env: StreamExecutionEnvironment) = {

    val broadCastDBName = ConfigParse.getString("bg.broadcast_dim.database")
    val broadCastTbName = ConfigParse.getString("bg.broadcast_dim.tablename")
    val exec_sql = getBroadCastInfo(broadCastDBName, broadCastTbName)

    // 建立MapStateDescriptor
    val projectsDimDesc = new MapStateDescriptor(
      "projects",
      BasicTypeInfo.STRING_TYPE_INFO,
      BasicTypeInfo.STRING_TYPE_INFO
    )

    val projects = env.addSource(new CustomMysqlSource(exec_sql))


    val dimProjects = projects.broadcast(projectsDimDesc)
    dimProjects

  }

}