docs.starrocks.io/zh/docs/dat…
CREATE EXTERNAL CATALOG 'iceberg'
PROPERTIES
(
"type"="iceberg",
"iceberg.catalog.type"="rest",
"iceberg.catalog.uri"="http://iceberg-gravitino.cyan.com/iceberg/",
"aws.s3.access_key"="rustfsadmin",
"aws.s3.secret_key"="rustfsadmin",
"aws.s3.endpoint"="http://rustfs.cyan.com",
"aws.s3.enable_path_style_access"="true",
"client.factory"="com.starrocks.connector.iceberg.IcebergAwsClientFactory"
);
-- 创建 MySQL External Catalog
CREATE EXTERNAL CATALOG mysql
PROPERTIES
(
"type"="jdbc",
"user"="root",
"password"="627459859@qq.com",
"jdbc_uri"="jdbc:mysql://10.0.0.2:3306",
"driver_url"="https://repo1.maven.org/maven2/mysql/mysql-connector-java/8.0.28/mysql-connector-java-8.0.28.jar",
"driver_class"="com.mysql.cj.jdbc.Driver"
);
-- =============================================
-- 第一步:核心全局配置(解决Checkpoint不触发、流模式失效问题)
-- =============================================
-- 1. 强制流执行模式(SQL Client默认可能是批模式,导致Checkpoint不生效)
SET execution.runtime-mode = streaming;
-- 2. 开启Checkpoint(核心!Iceberg依赖Checkpoint提交数据)
SET execution.checkpointing.interval = 30s; -- 每30s触发一次Checkpoint
SET execution.checkpointing.tolerable-failed-checkpoints = 3; -- 允许3次失败
-- =============================================
-- 第二步:创建Kafka CDC源表(仅优化格式配置,核心不变)
-- =============================================
CREATE TABLE kafka_cdc_goods_category (
id BIGINT,
name STRING,
brand STRING,
price DECIMAL(10, 2),
created_at STRING,
updated_at STRING,
deleted_at STRING,
op STRING -- CDC操作类型:c=新增,u=更新,d=删除
) WITH (
'connector' = 'kafka',
'topic' = 'goods_category_copy_goods_goods_category_copy.goods.goods_category_copy',
'properties.bootstrap.servers' = '10.0.0.2:9092',
'properties.group.id' = 'flink-cdc-iceberg-sql-client', -- 单独指定group.id,避免和Java程序冲突
'scan.startup.mode' = 'earliest-offset',
'format' = 'debezium-json',
'debezium-json.schema-include' = 'true',
'debezium-json.ignore-parse-errors' = 'true',
-- 新增:适配Debezium JSON的时间格式,避免解析失败
'debezium-json.timestamp-format.standard' = 'ISO-8601'
);
-- =============================================
-- 第三步:创建Iceberg REST Catalog(补充必要参数,和Java对齐)
-- =============================================
CREATE CATALOG iceberg_dw WITH (
'type' = 'iceberg',
'catalog-type' = 'rest',
'uri' = 'http://iceberg-gravitino.cyan.com/iceberg',
's3.access-key-id' = 'rustfsadmin',
's3.secret-access-key' = 'rustfsadmin',
's3.endpoint' = 'http://rustfs.cyan.com',
's3.region' = 'us-east-1',
's3.path-style-access' = 'true'
);
-- 创建数据库(确保存在)
CREATE DATABASE IF NOT EXISTS iceberg_dw.ods;
-- =============================================
-- 第四步:创建Iceberg目标表(补充connector参数,避免识别失败)
-- =============================================
CREATE TABLE IF NOT EXISTS iceberg_dw.ods.ods_goods_category_copy (
id BIGINT,
name STRING,
brand STRING,
price DECIMAL(10, 2),
created_at STRING,
updated_at STRING,
deleted_at STRING,
PRIMARY KEY (id) NOT ENFORCED -- 主键用于UPSERT
) WITH (
'format-version' = '2',
'write.format.default' = 'parquet',
'write.upsert.enabled' = 'true'
);
-- =============================================
-- 第五步:后台提交INSERT任务(核心!退出SQL Client不停止)
-- =============================================
INSERT INTO iceberg_dw.ods.ods_goods_category_copy
SELECT
id, name, brand, price, created_at, updated_at, deleted_at
FROM kafka_cdc_goods_category;