准备 坐标引入
<properties>
<flink.version>1.15.4</flink.version>
<java.version>11</java.version>
</properties>
<dependencies>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-clients</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-java</artifactId>
<version>${flink.version}</version>
</dependency>
<!-- IDEA运行需要 -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-planner_2.12</artifactId>
<version>${flink.version}</version>
</dependency>
<!-- TableApi依赖 -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-api-java-bridge</artifactId>
<version>${flink.version}</version>
</dependency>
<!-- 使用JdbcInputFormat创建Source -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-jdbc</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>8.0.30</version>
</dependency>
</dependencies>
体验一 使用JDBC
public class JdbcMysqlJob {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env=StreamExecutionEnvironment.createLocalEnvironment();
env.setParallelism(1);
TypeInformation[] fieldTypes = new TypeInformation[]{BasicTypeInfo.LONG_TYPE_INFO, BasicTypeInfo.LONG_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO};
String[] fieldNames = new String[]{"dept_id", "parent_id", "dept_name"};
RowTypeInfo rowTypeInfo = new RowTypeInfo(fieldTypes, fieldNames);
JdbcInputFormat.JdbcInputFormatBuilder jdbcInputFormatBuilder = new JdbcInputFormat.JdbcInputFormatBuilder();
JdbcInputFormat jdbc = jdbcInputFormatBuilder.setDrivername("com.mysql.cj.jdbc.Driver")
.setDBUrl("jdbc:mysql://localhost:3306/flink_test")
.setUsername("root")
.setPassword("root")
.setQuery("select dept_id,parent_id,dept_name from sys_dept")
.setRowTypeInfo(rowTypeInfo)
.finish();
DataStreamSource<Row> dsRow=env.createInput(jdbc);
dsRow.print();
env.execute();
}
}
体验二 使用Table API
StreamExecutionEnvironment env=StreamExecutionEnvironment.getExecutionEnvironment();
env.setRuntimeMode(RuntimeExecutionMode.STREAMING);
env.setParallelism(1);
StreamTableEnvironment tableEnv= StreamTableEnvironment.create(env);
Schema tableSchema = Schema.newBuilder()
.column("dept_id",DataTypes.BIGINT().notNull())
.column("parent_id", DataTypes.BIGINT())
.column("dept_name", DataTypes.STRING())
.primaryKey("dept_id")
.build();
tableEnv.createTemporaryTable("sys_dept",TableDescriptor.forConnector("jdbc")
.schema(tableSchema)
.option("table-name","sys_dept")
.option("driver","com.mysql.cj.jdbc.Driver")
.option("url","jdbc:mysql://localhost:3306/flink_test")
.option("username","root")
.option("password","root")
.build());
tableEnv.from("sys_dept").printSchema();
TableResult tableResult = tableEnv.from("sys_dept").groupBy("parent_id")
.select($("parent_id"), $("dept_id").count().as("cnt")).execute();
tableResult.print();
// No operators defined in streaming topology. Cannot execute.
// 已经存在tableEnv.executeSql 或者 statementSet.execute() 时就不需要再 env.execute() 了!
// env.execute();
体验三 SqlCreate 语句
StreamExecutionEnvironment env=StreamExecutionEnvironment.getExecutionEnvironment();
env.setRuntimeMode(RuntimeExecutionMode.STREAMING);
env.setParallelism(1);
StreamTableEnvironment tableEnv=StreamTableEnvironment.create(env);
String sqlStatement = "create table sys_dept( dept_id bigint,parent_id bigint,dept_name String)with(" +
"'connector'='jdbc'," +
"'url'='jdbc:mysql://localhost:3306/flink_test'," +
"'driver'='com.mysql.cj.jdbc.Driver'," +
"'username'='root'," +
"'password'='root',"+
"'table-name'='sys_dept'"
+")";
tableEnv.executeSql(sqlStatement);
tableEnv.executeSql("select parent_id ,count(dept_id) as cnt from sys_dept group by parent_id")
.print();
总结
根据对比发现,还是sql比较香。当然可以自定义Source,可以后续接入FlinkCDC,根据业务需要做相应的算子计算。