FlinkTable 体验

186 阅读1分钟

准备 坐标引入

<properties>
    <flink.version>1.15.4</flink.version>
    <java.version>11</java.version>
</properties>
 
<dependencies>
    <dependency>
        <groupId>org.apache.flink</groupId>
        <artifactId>flink-clients</artifactId>
        <version>${flink.version}</version>
    </dependency>
    <dependency>
        <groupId>org.apache.flink</groupId>
        <artifactId>flink-java</artifactId>
        <version>${flink.version}</version>
    </dependency>
            <!-- IDEA运行需要 -->
    <dependency>
        <groupId>org.apache.flink</groupId>
        <artifactId>flink-table-planner_2.12</artifactId>
        <version>${flink.version}</version>
    </dependency>
<!--    TableApi依赖 -->
    <dependency>
        <groupId>org.apache.flink</groupId>
        <artifactId>flink-table-api-java-bridge</artifactId>
        <version>${flink.version}</version>
    </dependency>
<!--    使用JdbcInputFormat创建Source -->
    <dependency>
        <groupId>org.apache.flink</groupId>
        <artifactId>flink-connector-jdbc</artifactId>
        <version>${flink.version}</version>
    </dependency>
    <dependency>
        <groupId>mysql</groupId>
        <artifactId>mysql-connector-java</artifactId>
        <version>8.0.30</version>
    </dependency>
</dependencies>

体验一 使用JDBC

public class JdbcMysqlJob {
    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env=StreamExecutionEnvironment.createLocalEnvironment();
        env.setParallelism(1);

        TypeInformation[] fieldTypes = new TypeInformation[]{BasicTypeInfo.LONG_TYPE_INFO, BasicTypeInfo.LONG_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO};
        String[] fieldNames = new String[]{"dept_id", "parent_id", "dept_name"};

        RowTypeInfo rowTypeInfo = new RowTypeInfo(fieldTypes, fieldNames);


        JdbcInputFormat.JdbcInputFormatBuilder jdbcInputFormatBuilder = new JdbcInputFormat.JdbcInputFormatBuilder();
        JdbcInputFormat jdbc = jdbcInputFormatBuilder.setDrivername("com.mysql.cj.jdbc.Driver")
                .setDBUrl("jdbc:mysql://localhost:3306/flink_test")
                .setUsername("root")
                .setPassword("root")
                .setQuery("select dept_id,parent_id,dept_name from sys_dept")
                .setRowTypeInfo(rowTypeInfo)
                .finish();
        DataStreamSource<Row> dsRow=env.createInput(jdbc);
        dsRow.print();

        env.execute();
    }
}

体验二 使用Table API

  StreamExecutionEnvironment env=StreamExecutionEnvironment.getExecutionEnvironment();
        env.setRuntimeMode(RuntimeExecutionMode.STREAMING);
        env.setParallelism(1);

        StreamTableEnvironment tableEnv= StreamTableEnvironment.create(env);

        Schema tableSchema = Schema.newBuilder()
                .column("dept_id",DataTypes.BIGINT().notNull())
                .column("parent_id", DataTypes.BIGINT())
                .column("dept_name", DataTypes.STRING())
                .primaryKey("dept_id")
                .build();

        tableEnv.createTemporaryTable("sys_dept",TableDescriptor.forConnector("jdbc")
                        .schema(tableSchema)
                        .option("table-name","sys_dept")
                        .option("driver","com.mysql.cj.jdbc.Driver")
                .option("url","jdbc:mysql://localhost:3306/flink_test")
                        .option("username","root")
                        .option("password","root")
                .build());
        tableEnv.from("sys_dept").printSchema();
        
        TableResult tableResult = tableEnv.from("sys_dept").groupBy("parent_id")
                .select($("parent_id"), $("dept_id").count().as("cnt")).execute();
        tableResult.print();
// No operators defined in streaming topology. Cannot execute. 
// 已经存在tableEnv.executeSql 或者 statementSet.execute() 时就不需要再 env.execute() 了!
//        env.execute();

体验三 SqlCreate 语句

StreamExecutionEnvironment env=StreamExecutionEnvironment.getExecutionEnvironment();
env.setRuntimeMode(RuntimeExecutionMode.STREAMING);
env.setParallelism(1);

StreamTableEnvironment tableEnv=StreamTableEnvironment.create(env);

String sqlStatement = "create table sys_dept( dept_id bigint,parent_id bigint,dept_name String)with(" +
       "'connector'='jdbc'," +
       "'url'='jdbc:mysql://localhost:3306/flink_test'," +
       "'driver'='com.mysql.cj.jdbc.Driver'," +
       "'username'='root'," +
       "'password'='root',"+
       "'table-name'='sys_dept'"
       +")";
tableEnv.executeSql(sqlStatement);
tableEnv.executeSql("select parent_id ,count(dept_id) as cnt from sys_dept group by parent_id")

       .print();

总结

根据对比发现,还是sql比较香。当然可以自定义Source,可以后续接入FlinkCDC,根据业务需要做相应的算子计算。