flinksql 轻松实现csv to hudi
通过Flink SQL脚本,将CSV数据高效迁移到Hudi,实现实时大数据管理。
关键步骤
配置
首先,设置 Flink 的检查点间隔、JobManager 和 TaskManager 内存配置。
sql
SET execution.checkpointing.interval=60000;
SET jobmanager.memory.process.size=2048m;
SET taskmanager.memory.process.size=6144m;
这些配置参数是为了确保 Flink 任务的可靠性和性能。例如,设置检查点间隔为60秒,可以在任务出错时恢复。
创建源表
创建一个名为 dim_area_csv
的源表,它从 HDFS 上的一个 CSV 文件中读取数据。
sql
CREATE TABLE dim_area_csv (
area_id varchar,
mdf_id varchar,
mdf_name varchar,
sub_bureau_id varchar,
sub_bureau_name varchar,
adm_sub_bureau_id varchar,
adm_sub_bureau_name varchar,
branch_bureau_id varchar,
branch_bureau_name varchar,
bureau_id varchar,
bureau_name varchar,
all_bureau varchar,
area_tp varchar,
lst_upd_tmp varchar,
lst_upb_by varchar,
busi_key varchar,
mdf_code varchar,
sub_bureau_code varchar,
branch_bureau_code varchar,
bureau_code varchar,
created_tmp varchar,
bureau_org_id varchar,
bureau_cd varchar,
adm_sub_bureau_flg varchar,
adm_sub_bureau_cd varchar,
org_id varchar,
mss_num varchar,
area_sub_tp varchar
) WITH (
'connector' = 'filesystem',
'format' = 'csv', -- 选择CSV格式
'path' = 'hdfs://ns1/domain/ns1/prd/hudi/shct/eda/dim_area.csv',
'csv.field-delimiter' = ','
);
创建目标表
创建一个目标表 dim_area
,该表使用了 Hudi 进行存储。这张表支持更新和 ACID 操作。
sql
CREATE TABLE dim_area (
area_id varchar,
mdf_id varchar,
mdf_name varchar,
sub_bureau_id varchar,
sub_bureau_name varchar,
adm_sub_bureau_id varchar,
adm_sub_bureau_name varchar,
branch_bureau_id varchar,
branch_bureau_name varchar,
bureau_id varchar,
bureau_name varchar,
all_bureau varchar,
area_tp varchar,
lst_upd_tmp varchar,
lst_upb_by varchar,
busi_key varchar,
mdf_code varchar,
sub_bureau_code varchar,
branch_bureau_code varchar,
bureau_code varchar,
created_tmp varchar,
bureau_org_id varchar,
bureau_cd varchar,
adm_sub_bureau_flg varchar,
adm_sub_bureau_cd varchar,
org_id varchar,
mss_num varchar,
area_sub_tp varchar
) WITH (
'connector' = 'hudi',
'path' = '/domain/ns1/prd/hudi/shct/eda/db_dim/dim_area',
'hoodie.table.name' = 'dim_area',
'table.type' = 'COPY_ON_WRITE',
'hoodie.datasource.write.recordkey.field' = 'area_id',
'write.precombine.field' = 'lst_upd_tmp',
'index.type' = 'BUCKET',
'write.operation' = 'upsert',
'write.tasks' = '2',
'hive_sync.enabled' = 'true',
'hive_sync.mode' = 'hms',
'hive_sync.matastore.uris' = 'thrift://ddp-js6dn-c027:9083,thrift://ddp-js6dn-c028:9083',
'hive_sync.conf.dir' = '/opt/TDP/hive/conf',
'hive_sync.db' = 'db_dim',
'hive_sync.table' = 'dim_area',
'hive_sync.kerberos.krb5.conf' = '/data/dataos/hadoop-cluster/krb5.conf',
'hive_sync.kerberos.principal' = 'zt_test2@GROUPB.HADOOP.CN',
'hive_sync.kerberos.keytab.file' = '/data/dataos/hadoop-cluster/zt_test2.keytab',
'hoodie.bucket.index.num.buckets' = '2',
'compaction.tasks' = '2',
'compaction.max_memory' = '2048'
);
从源表插入数据到目标表
将 dim_area_csv
表的数据插入到 dim_area
表中。
sql
INSERT INTO dim_area
SELECT *
FROM dim_area_csv;
关键配置解释
- SET execution.checkpointing.interval=60000; :设置 Flink 的检查点间隔为60秒。
- SET jobmanager.memory.process.size=2048m; :设置 JobManager 内存为2048 MB。
- SET taskmanager.memory.process.size=6144m; :设置 TaskManager 内存为6144 MB。
- 'write.operation' = 'upsert' :指定 Hudi 执行的是 upsert 操作。
- 'hive_sync.enabled' = 'true' :开启 Hive 元数据同步。
优化和注意事项
- 数据格式:确保 CSV 文件的格式正确,并且文件路径正确。
- 内存配置:根据数据规模和任务需求调整 JobManager 和 TaskManager 的内存设置,以避免内存不足。
- 并行度:根据集群资源和数据规模,适当调整 write.tasks 和 compaction.tasks 的数量。
- Kerberos 配置:确保 Kerberos 配置文件和 keytab 文件路径正确,并具有正确的访问权限。
完整脚本
SET execution.checkpointing.interval=60000;
SET jobmanager.memory.process.size=2048m;
SET taskmanager.memory.process.size=6144m;
CREATE TABLE dim_area_csv (
area_id varchar,
mdf_id varchar,
mdf_name varchar,
sub_bureau_id varchar,
sub_bureau_name varchar,
adm_sub_bureau_id varchar,
adm_sub_bureau_name varchar,
branch_bureau_id varchar,
branch_bureau_name varchar,
bureau_id varchar,
bureau_name varchar,
all_bureau varchar,
area_tp varchar,
lst_upd_tmp varchar,
lst_upb_by varchar,
busi_key varchar,
mdf_code varchar,
sub_bureau_code varchar,
branch_bureau_code varchar,
bureau_code varchar,
created_tmp varchar,
bureau_org_id varchar,
bureau_cd varchar,
adm_sub_bureau_flg varchar,
adm_sub_bureau_cd varchar,
org_id varchar,
mss_num varchar,
area_sub_tp varchar
) WITH (
'connector' = 'filesystem',
'format' = 'csv', -- 根据实际格式选择如:csv, parquet等
'path' = 'hdfs://ns1/domain/ns1/prd/hudi/shct/eda/dim_area.csv',
'csv.field-delimiter' = ','
);
CREATE TABLE dim_area (
area_id varchar,
mdf_id varchar,
mdf_name varchar,
sub_bureau_id varchar,
sub_bureau_name varchar,
adm_sub_bureau_id varchar,
adm_sub_bureau_name varchar,
branch_bureau_id varchar,
branch_bureau_name varchar,
bureau_id varchar,
bureau_name varchar,
all_bureau varchar,
area_tp varchar,
lst_upd_tmp varchar,
lst_upb_by varchar,
busi_key varchar,
mdf_code varchar,
sub_bureau_code varchar,
branch_bureau_code varchar,
bureau_code varchar,
created_tmp varchar,
bureau_org_id varchar,
bureau_cd varchar,
adm_sub_bureau_flg varchar,
adm_sub_bureau_cd varchar,
org_id varchar,
mss_num varchar,
area_sub_tp varchar
)WITH (
'connector' = 'hudi',
'path' = '/domain/ns1/prd/hudi/shct/eda/db_dim/dim_area',
'hoodie.table.name' = 'dim_area',
'table.type' = 'COPY_ON_WRITE',
'hoodie.datasource.write.recordkey.field' = 'area_id',
'write.precombine.field' = 'lst_upd_tmp',
'index.type' = 'BUCKET',
'write.operation' = 'upsert',
'write.tasks' = '2',
'hive_sync.enabled' = 'true',
'hive_sync.mode' = 'hms',
'hive_sync.matastore.uris' = 'thrift://ddp-js6dn-c027:9083,thrift://ddp-js6dn-c028:9083',
'hive_sync.conf.dir' = '/opt/TDP/hive/conf','hive_sync.db' = 'db_dim',
'hive_sync.table' = 'dim_area',
'hive_sync.kerberos.krb5.conf' = '/data/dataos/hadoop-cluster/krb5.conf',
'hive_sync.kerberos.principal' = 'zt_test2@GROUPB.HADOOP.CN',
'hive_sync.kerberos.keytab.file' = '/data/dataos/hadoop-cluster/zt_test2.keytab',
'hoodie.bucket.index.num.buckets' = '2',
'compaction.tasks' = '2',
'compaction.max_memory' = '2048'
);
insert into dim_area select * from dim_area_csv;
SET execution.checkpointing.interval=60000;
SET jobmanager.memory.process.size=2048m;
SET taskmanager.memory.process.size=6144m;
CREATE TABLE dim_prom_csv (
prom_id varchar(30) comment '销售品ID',
prom_name varchar(2000) comment 'prom_name',
prom_num varchar(120) comment 'prom_num',
busi_key varchar(120) comment 'busi_key',
src_sys varchar(24) comment 'src_sys',
created_by varchar(120) comment 'created_by',
created_tmp varchar(120) comment 'created_tmp',
lst_upd_by varchar(120) comment 'lst_upd_by',
lst_upd_tmp varchar(120) comment 'lst_upd_tmp',
prom_prod_tp varchar(120) comment 'prom_prod_tp',
prom_brand_tp varchar(120) comment 'prom_brand_tp',
prom_pack_series varchar(120) comment 'prom_pack_series',
prom_pack_tp varchar(120) comment 'prom_pack_tp',
prom_pack_grade varchar(120) comment 'prom_pack_grade',
prom_pack_equipment varchar(120) comment 'prom_pack_equipment',
prom_lvl1_cd varchar(60) comment 'prom_lvl1_cd',
prom_lvl1_name varchar(400) comment 'prom_lvl1_name',
prom_lvl2_cd varchar(60) comment 'prom_lvl2_cd',
prom_lvl2_name varchar(400) comment 'prom_lvl2_name',
prom_lvl3_cd varchar(60) comment 'prom_lvl3_cd',
prom_lvl3_name varchar(400) comment 'prom_lvl3_name',
prom_lvl4_cd varchar(60) comment 'prom_lvl4_cd',
prom_lvl4_name varchar(400) comment 'prom_lvl4_name',
prom_lvl5_cd varchar(60) comment 'prom_lvl5_cd',
prom_lvl5_name varchar(400) comment 'prom_lvl5_name',
imp_start_dt varchar(30) comment 'imp_start_dt',
imp_end_dt varchar(30) comment 'imp_end_dt',
prom_brick_tp varchar(120) comment 'prom_brick_tp',
prom_jt_tp varchar(120) comment 'prom_jt_tp',
dest_cost1 varchar(20) comment 'dest_cost1',
dest_cost2 varchar(30) comment 'dest_cost2',
prom_merge_flag varchar(4) comment 'prom_merge_flag',
prom_cust_tp varchar(80) comment 'prom_cust_tp',
prom_sale_grade varchar(80) comment 'prom_sale_grade',
cary_cost varchar(14) comment 'cary_cost',
if_bureau_flag varchar(4) comment 'if_bureau_flag',
if_3g_flag varchar(4) comment 'if_3g_flag',
if_imp_flag varchar(4) comment 'if_imp_flag',
start_dt varchar(30) comment 'start_dt',
end_dt varchar(30) comment 'end_dt',
stat_tp varchar(80) comment 'stat_tp',
duration varchar(10) comment 'duration',
return_amt varchar(14) comment 'return_amt',
prom_schema varchar(400) comment 'prom_schema',
prom_cust_sub_tp varchar(80) comment 'prom_cust_sub_tp',
prom_onstart_tp varchar(30) comment 'prom_onstart_tp',
income_bureau_flag varchar(40) comment 'income_bureau_flag',
hzfc_flag varchar(40) comment 'hzfc_flag',
prom_tp varchar(30) comment 'prom_tp',
prom_sub_tp varchar(30) comment 'prom_sub_tp',
prom_tp_desc varchar(120) comment 'prom_tp_desc',
prom_sub_tp_desc varchar(120) comment 'prom_sub_tp_desc',
prom_group_flg varchar(40) comment 'prom_group_flg',
prom_tc_sub_tp varchar(120) comment 'prom_tc_sub_tp',
restrict_days_cnt varchar(10) comment 'restrict_days_cnt',
restrict_days_unit varchar(30) comment 'restrict_days_unit',
constravarchar_days_cnt varchar(10) comment 'constravarchar_days_cnt',
constravarchar_days_unit varchar(30) comment 'constravarchar_days_unit',
opt_allo_type varchar(120) comment 'opt_allo_type',
group_prod_code varchar(50) comment 'group_prod_code',
limit_mon_cnt varchar(5) comment 'limit_mon_cnt',
ppm_catalog_id varchar(12) comment 'ppm_catalog_id',
prom_vouc_level varchar(30) comment 'prom_vouc_level',
prom_vouc_type varchar(30) comment 'prom_vouc_type',
free_pay_flg varchar(40) comment 'free_pay_flg',
sale_chnl_tp varchar(120) comment 'sale_chnl_tp',
varcharnsv_flg varchar(40) comment 'varcharnsv_flg',
sbsdy_cffcnt varchar(10) comment 'sbsdy_cffcnt',
prom_overlap_tp varchar(10) comment 'prom_overlap_tp',
doublecheck varchar(120) comment 'doublecheck',
prom_stat_class varchar(200) comment 'prom_stat_class',
prom_merge_cd varchar(30) comment 'prom_merge_cd',
prom_merge_type varchar(30) comment 'prom_merge_type',
prom_4glte_flg varchar(40) comment 'prom_4glte_flg',
prom_extnl_name varchar(880) comment 'prom_extnl_name',
ctg_po_cate_cd varchar(30) comment 'ctg_po_cate_cd',
terminal_amt varchar(22) comment 'terminal_amt',
discount_rate varchar(22) comment 'discount_rate',
rmind_flg varchar(4) comment 'rmind_flg',
flow_pckg_stat_type varchar(120) comment 'flow_pckg_stat_type',
zx_growth varchar(30) comment 'zx_growth',
star_prom_flg varchar(40) comment 'star_prom_flg',
alias_name varchar(400) comment 'alias_name',
strategy varchar(1000) comment 'strategy',
sub_status varchar(120) comment 'sub_status',
net_chg_att varchar(120) comment 'net_chg_att',
bill_day_cd varchar(120) comment 'bill_day_cd',
ctg_varcharfc_prom_code varchar(200) comment 'ctg_varcharfc_prom_code',
account_diff_flg varchar(40) comment 'account_diff_flg',
send_ibp_flg varchar(40) comment 'send_ibp_flg',
term_allwnc_flg varchar(40) comment 'term_allwnc_flg',
compound_flg varchar(40) comment 'compound_flg',
msg_accept_flg varchar(40) comment 'msg_accept_flg',
bp_diff_flg varchar(40) comment 'bp_diff_flg',
need_submit_ocs varchar(40) comment 'need_submit_ocs',
movable_flg varchar(40) comment 'movable_flg',
elctrc_chnl_accepr_flg varchar(40) comment 'elctrc_chnl_accepr_flg',
losable_flg varchar(40) comment 'losable_flg',
limit_cncl_flg varchar(40) comment 'limit_cncl_flg',
limit_comp_deal_flg varchar(40) comment 'limit_comp_deal_flg',
net_pack_tp varchar(120) comment 'net_pack_tp',
net_sale_range varchar(120) comment 'net_sale_range',
pay_device_flg varchar(40) comment 'pay_device_flg',
send_msg_flag varchar(40) comment 'send_msg_flag',
td_info varchar(4000) comment 'td_info',
zx_info varchar(4000) comment 'zx_info',
upgd_4g_constr varchar(40) comment 'upgd_4g_constr',
prom_term_sn_flg varchar(40) comment 'prom_term_sn_flg',
restrict_expire_dt varchar(30) comment 'restrict_expire_dt',
eff_flag varchar(50) comment 'eff_flag',
ygfzyhs varchar(20) comment 'ygfzyhs',
ygzsr_aftertax varchar(20) comment 'ygzsr_aftertax',
xspygml_aftertax varchar(20) comment 'xspygml_aftertax',
xspygmll_aftertax varchar(20) comment 'xspygmll_aftertax',
hx_level varchar(120) comment 'hx_level',
free_value_voice varchar(20) comment 'free_value_voice',
free_value_flux varchar(20) comment 'free_value_flux',
fair_value_voice varchar(20) comment 'fair_value_voice',
fair_value_flux varchar(20) comment 'fair_value_flux',
prod_tax_rate varchar(20) comment 'prod_tax_rate',
once_gift_amount varchar(10) comment 'once_gift_amount',
deduct_cycle varchar(120) comment 'deduct_cycle',
present_mthd varchar(120) comment 'present_mthd',
rel_to_main varchar(120) comment 'rel_to_main',
return_mthd varchar(120) comment 'return_mthd',
agree_flag varchar(40) comment 'agree_flag',
auto_con_flg varchar(40) comment 'auto_con_flg',
prom_sale_end_tmp varchar(30) comment 'prom_sale_end_tmp',
ctg_prom_num varchar(30) comment 'ctg_prom_num',
commodity_tp varchar(10) comment 'commodity_tp',
commodity_tp_desc varchar(60) comment 'commodity_tp_desc',
biz_stats_cd varchar(30) comment 'biz_stats_cd',
biz_stats_name varchar(50) comment 'biz_stats_name',
wyj_monthly_basic_fee varchar(19) comment 'wyj_monthly_basic_fee',
value_change varchar(30) comment 'value_change',
if_belong_prom_discount varchar(30) comment 'if_belong_prom_discount',
tn_discount_rate varchar(30) comment 'tn_discount_rate',
if_zdxs varchar(30) comment 'if_zdxs'
) WITH (
'connector' = 'filesystem',
'format' = 'csv', -- 根据实际格式选择如:csv, parquet等
'path' = 'hdfs://ns1/domain/ns1/prd/hudi/shct/eda/dim_prom.csv',
'csv.field-delimiter' = ',',
'csv.ignore-parse-errors' = 'true'
);
CREATE TABLE dim_prom (
prom_id varchar(30) comment '销售品ID',
prom_name varchar(2000) comment 'prom_name',
prom_num varchar(120) comment 'prom_num',
busi_key varchar(120) comment 'busi_key',
src_sys varchar(24) comment 'src_sys',
created_by varchar(120) comment 'created_by',
created_tmp varchar(120) comment 'created_tmp',
lst_upd_by varchar(120) comment 'lst_upd_by',
lst_upd_tmp varchar(120) comment 'lst_upd_tmp',
prom_prod_tp varchar(120) comment 'prom_prod_tp',
prom_brand_tp varchar(120) comment 'prom_brand_tp',
prom_pack_series varchar(120) comment 'prom_pack_series',
prom_pack_tp varchar(120) comment 'prom_pack_tp',
prom_pack_grade varchar(120) comment 'prom_pack_grade',
prom_pack_equipment varchar(120) comment 'prom_pack_equipment',
prom_lvl1_cd varchar(60) comment 'prom_lvl1_cd',
prom_lvl1_name varchar(400) comment 'prom_lvl1_name',
prom_lvl2_cd varchar(60) comment 'prom_lvl2_cd',
prom_lvl2_name varchar(400) comment 'prom_lvl2_name',
prom_lvl3_cd varchar(60) comment 'prom_lvl3_cd',
prom_lvl3_name varchar(400) comment 'prom_lvl3_name',
prom_lvl4_cd varchar(60) comment 'prom_lvl4_cd',
prom_lvl4_name varchar(400) comment 'prom_lvl4_name',
prom_lvl5_cd varchar(60) comment 'prom_lvl5_cd',
prom_lvl5_name varchar(400) comment 'prom_lvl5_name',
imp_start_dt varchar(30) comment 'imp_start_dt',
imp_end_dt varchar(30) comment 'imp_end_dt',
prom_brick_tp varchar(120) comment 'prom_brick_tp',
prom_jt_tp varchar(120) comment 'prom_jt_tp',
dest_cost1 varchar(20) comment 'dest_cost1',
dest_cost2 varchar(30) comment 'dest_cost2',
prom_merge_flag varchar(4) comment 'prom_merge_flag',
prom_cust_tp varchar(80) comment 'prom_cust_tp',
prom_sale_grade varchar(80) comment 'prom_sale_grade',
cary_cost varchar(14) comment 'cary_cost',
if_bureau_flag varchar(4) comment 'if_bureau_flag',
if_3g_flag varchar(4) comment 'if_3g_flag',
if_imp_flag varchar(4) comment 'if_imp_flag',
start_dt varchar(30) comment 'start_dt',
end_dt varchar(30) comment 'end_dt',
stat_tp varchar(80) comment 'stat_tp',
duration varchar(10) comment 'duration',
return_amt varchar(14) comment 'return_amt',
prom_schema varchar(400) comment 'prom_schema',
prom_cust_sub_tp varchar(80) comment 'prom_cust_sub_tp',
prom_onstart_tp varchar(30) comment 'prom_onstart_tp',
income_bureau_flag varchar(40) comment 'income_bureau_flag',
hzfc_flag varchar(40) comment 'hzfc_flag',
prom_tp varchar(30) comment 'prom_tp',
prom_sub_tp varchar(30) comment 'prom_sub_tp',
prom_tp_desc varchar(120) comment 'prom_tp_desc',
prom_sub_tp_desc varchar(120) comment 'prom_sub_tp_desc',
prom_group_flg varchar(40) comment 'prom_group_flg',
prom_tc_sub_tp varchar(120) comment 'prom_tc_sub_tp',
restrict_days_cnt varchar(10) comment 'restrict_days_cnt',
restrict_days_unit varchar(30) comment 'restrict_days_unit',
constravarchar_days_cnt varchar(10) comment 'constravarchar_days_cnt',
constravarchar_days_unit varchar(30) comment 'constravarchar_days_unit',
opt_allo_type varchar(120) comment 'opt_allo_type',
group_prod_code varchar(50) comment 'group_prod_code',
limit_mon_cnt varchar(5) comment 'limit_mon_cnt',
ppm_catalog_id varchar(12) comment 'ppm_catalog_id',
prom_vouc_level varchar(30) comment 'prom_vouc_level',
prom_vouc_type varchar(30) comment 'prom_vouc_type',
free_pay_flg varchar(40) comment 'free_pay_flg',
sale_chnl_tp varchar(120) comment 'sale_chnl_tp',
varcharnsv_flg varchar(40) comment 'varcharnsv_flg',
sbsdy_cffcnt varchar(10) comment 'sbsdy_cffcnt',
prom_overlap_tp varchar(10) comment 'prom_overlap_tp',
doublecheck varchar(120) comment 'doublecheck',
prom_stat_class varchar(200) comment 'prom_stat_class',
prom_merge_cd varchar(30) comment 'prom_merge_cd',
prom_merge_type varchar(30) comment 'prom_merge_type',
prom_4glte_flg varchar(40) comment 'prom_4glte_flg',
prom_extnl_name varchar(880) comment 'prom_extnl_name',
ctg_po_cate_cd varchar(30) comment 'ctg_po_cate_cd',
terminal_amt varchar(22) comment 'terminal_amt',
discount_rate varchar(22) comment 'discount_rate',
rmind_flg varchar(4) comment 'rmind_flg',
flow_pckg_stat_type varchar(120) comment 'flow_pckg_stat_type',
zx_growth varchar(30) comment 'zx_growth',
star_prom_flg varchar(40) comment 'star_prom_flg',
alias_name varchar(400) comment 'alias_name',
strategy varchar(1000) comment 'strategy',
sub_status varchar(120) comment 'sub_status',
net_chg_att varchar(120) comment 'net_chg_att',
bill_day_cd varchar(120) comment 'bill_day_cd',
ctg_varcharfc_prom_code varchar(200) comment 'ctg_varcharfc_prom_code',
account_diff_flg varchar(40) comment 'account_diff_flg',
send_ibp_flg varchar(40) comment 'send_ibp_flg',
term_allwnc_flg varchar(40) comment 'term_allwnc_flg',
compound_flg varchar(40) comment 'compound_flg',
msg_accept_flg varchar(40) comment 'msg_accept_flg',
bp_diff_flg varchar(40) comment 'bp_diff_flg',
need_submit_ocs varchar(40) comment 'need_submit_ocs',
movable_flg varchar(40) comment 'movable_flg',
elctrc_chnl_accepr_flg varchar(40) comment 'elctrc_chnl_accepr_flg',
losable_flg varchar(40) comment 'losable_flg',
limit_cncl_flg varchar(40) comment 'limit_cncl_flg',
limit_comp_deal_flg varchar(40) comment 'limit_comp_deal_flg',
net_pack_tp varchar(120) comment 'net_pack_tp',
net_sale_range varchar(120) comment 'net_sale_range',
pay_device_flg varchar(40) comment 'pay_device_flg',
send_msg_flag varchar(40) comment 'send_msg_flag',
td_info varchar(4000) comment 'td_info',
zx_info varchar(4000) comment 'zx_info',
upgd_4g_constr varchar(40) comment 'upgd_4g_constr',
prom_term_sn_flg varchar(40) comment 'prom_term_sn_flg',
restrict_expire_dt varchar(30) comment 'restrict_expire_dt',
eff_flag varchar(50) comment 'eff_flag',
ygfzyhs varchar(20) comment 'ygfzyhs',
ygzsr_aftertax varchar(20) comment 'ygzsr_aftertax',
xspygml_aftertax varchar(20) comment 'xspygml_aftertax',
xspygmll_aftertax varchar(20) comment 'xspygmll_aftertax',
hx_level varchar(120) comment 'hx_level',
free_value_voice varchar(20) comment 'free_value_voice',
free_value_flux varchar(20) comment 'free_value_flux',
fair_value_voice varchar(20) comment 'fair_value_voice',
fair_value_flux varchar(20) comment 'fair_value_flux',
prod_tax_rate varchar(20) comment 'prod_tax_rate',
once_gift_amount varchar(10) comment 'once_gift_amount',
deduct_cycle varchar(120) comment 'deduct_cycle',
present_mthd varchar(120) comment 'present_mthd',
rel_to_main varchar(120) comment 'rel_to_main',
return_mthd varchar(120) comment 'return_mthd',
agree_flag varchar(40) comment 'agree_flag',
auto_con_flg varchar(40) comment 'auto_con_flg',
prom_sale_end_tmp varchar(30) comment 'prom_sale_end_tmp',
ctg_prom_num varchar(30) comment 'ctg_prom_num',
commodity_tp varchar(10) comment 'commodity_tp',
commodity_tp_desc varchar(60) comment 'commodity_tp_desc',
biz_stats_cd varchar(30) comment 'biz_stats_cd',
biz_stats_name varchar(50) comment 'biz_stats_name',
wyj_monthly_basic_fee varchar(19) comment 'wyj_monthly_basic_fee',
value_change varchar(30) comment 'value_change',
if_belong_prom_discount varchar(30) comment 'if_belong_prom_discount',
tn_discount_rate varchar(30) comment 'tn_discount_rate',
if_zdxs varchar(30) comment 'if_zdxs'
)WITH (
'connector' = 'hudi',
'path' = '/domain/ns1/prd/hudi/shct/eda/db_dim/dim_prom',
'hoodie.table.name' = 'dim_prom',
'table.type' = 'COPY_ON_WRITE',
'hoodie.datasource.write.recordkey.field' = 'prom_id',
'write.precombine.field' = 'lst_upd_tmp',
'index.type' = 'BUCKET',
'write.operation' = 'upsert',
'write.tasks' = '2',
'hive_sync.enabled' = 'true',
'hive_sync.mode' = 'hms',
'hive_sync.matastore.uris' = 'thrift://ddp-js6dn-c027:9083,thrift://ddp-js6dn-c028:9083',
'hive_sync.conf.dir' = '/opt/TDP/hive/conf','hive_sync.db' = 'db_dim',
'hive_sync.table' = 'dim_prom',
'hive_sync.kerberos.krb5.conf' = '/data/dataos/hadoop-cluster/krb5.conf',
'hive_sync.kerberos.principal' = 'zt_test2@GROUPB.HADOOP.CN',
'hive_sync.kerberos.keytab.file' = '/data/dataos/hadoop-cluster/zt_test2.keytab',
'hoodie.bucket.index.num.buckets' = '2',
'compaction.tasks' = '2',
'compaction.max_memory' = '2048'
);
insert into dim_prom select * from dim_prom_csv;