引言
需求背景:ETL离线作业,需要实时监控运行状况,由于调度使用Azkaban,故同步获取其后台配置库Mysql;本文记录第一步:获取projects工程码表,并作为广播变量,供后续使用。
功能部件
Flink + Scalikejdbc + Scala
实现逻辑
- 使用scalikejdbc 构建jdbc连接池,链接Azkaban元数据库Mysql;
- 自定义MysqlSource,继承实现RichSourceFunction方法;
- flink,添加数据源,链接Mysql,获取projects工程属性表,并设置广播变量;
实现Demo
- 使用scalikejdbc构建,mysql-jdbc连接池
import scalikejdbc._
import scalikejdbc.config.DBs
/**
* @Title: DBUtil
* @Author: moun
* @Desc: 链接数据库工具类
*
*/
object DBUtils {
DBs.setupAll()
def select[A](dbName: Symbol,selectSQL: SQL[Nothing,NoExtractor],resultSet: WrappedResultSet => A) : List[A] =
NamedDB(dbName) readOnly{
implicit session => selectSQL.map(resultSet).list().apply()
}
def closeOne(dbName: Symbol): Unit = DBs.close(dbName)
def close(): Unit = DBs.closeAll()
}
注意事项:
- application.conf文件,放置在resources目录下;
- 配置文件,key命名需要和监控对应的库名保持一致;
- 自定义MysqlSource,继承RichSourceFunction富函数,实现open、run、close方法
import com.haierubic.bigdata.dataflow.models.Domain.{ProjectsClass, projectsSet}
import com.haierubic.bigdata.dataflow.utils.{ConfigParse, DBUtils}
import org.apache.flink.configuration.Configuration
import org.apache.flink.streaming.api.functions.source.{RichSourceFunction, SourceFunction}
import scalikejdbc.SQL
class CustomMysqlSource(exec_sql: String, hourInterval: Int = 0) extends RichSourceFunction[ProjectsClass] {
var isRunning = true
var queryDbName: Symbol = _
override def open(parameters: Configuration): Unit = {
queryDbName = Symbol(ConfigParse.getString("bg.broadcast_dim.database")) // 创建数据库链接
}
override def run(sourceContext: SourceFunction.SourceContext[ProjectsClass]): Unit = {
val resultList = DBUtils.select[ProjectsClass](queryDbName, SQL(exec_sql), projectsSet) // 获取执行结果List[Class]
resultList.map(x => sourceContext.collect(x)) // 数据写入collect中
// Thread.sleep(3600000 * hourInterval)
}
override def close(): Unit = {
DBUtils.closeOne(queryDbName)
}
override def cancel(): Unit = isRunning = false
}
- flink添加数据源,读取数据后并广播出去
import com.haierubic.bigdata.dataflow.models.Sentence
import com.haierubic.bigdata.dataflow.udf.CustomMysqlSource
import com.haierubic.bigdata.dataflow.utils.ConfigParse
import org.apache.flink.api.common.state.MapStateDescriptor
import org.apache.flink.api.common.typeinfo.BasicTypeInfo
import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
import org.apache.flink.streaming.api.scala._
/**
* 广播 projects表, 自定义MysqlSource,获取projects表数据,并进行广播出去;
* created by moun
*/
object QueryProjects extends Sentence{
def getProjects(env: StreamExecutionEnvironment) = {
val broadCastDBName = ConfigParse.getString("bg.broadcast_dim.database")
val broadCastTbName = ConfigParse.getString("bg.broadcast_dim.tablename")
val exec_sql = getBroadCastInfo(broadCastDBName, broadCastTbName)
// 建立MapStateDescriptor
val projectsDimDesc = new MapStateDescriptor(
"projects",
BasicTypeInfo.STRING_TYPE_INFO,
BasicTypeInfo.STRING_TYPE_INFO
)
val projects = env.addSource(new CustomMysqlSource(exec_sql))
val dimProjects = projects.broadcast(projectsDimDesc)
dimProjects
}
}