DataX支持postgerSql的update操作

1,697 阅读2分钟

Datax中mysql可以支持插入的数据,如果存在则更新。

但公司业务使用postgreSql,业务需要插入数据存在,则更新的操作,由于数据量太大,用jdbc速度太慢,只能使用datax,因此源码进行修改,使其增加更新操作。

1.修改com.alibaba.datax.plugin.writer.postgresqlwriter

注释不支持writeMode模式的代码

public void init() {
         this.originalConfig = super.getPluginJobConf();

         // warn:not like mysql, PostgreSQL only support insert mode, don't use
//       String writeMode = this.originalConfig.getString(Key.WRITE_MODE);
//       if (null != writeMode) {
//          throw DataXException.asDataXException(DBUtilErrorCode.CONF_ERROR,
//             String.format("写入模式(writeMode)配置有误. 因为PostgreSQL不支持配置参数项 writeMode: %s, PostgreSQL仅使用insert sql 插入数据. 请检查您的配置并作出修改.", writeMode));
//       }

         this.commonRdbmsWriterMaster = new CommonRdbmsWriter.Job(DATABASE_TYPE);
         this.commonRdbmsWriterMaster.init(this.originalConfig);
      }

2.修改com.alibaba.datax.plugin.rdbms.writer.util.WriterUtil

新增else if (dataBaseType == DataBaseType.PostgreSQL)这部分的代码,使其支持更新

public static String getWriteTemplate(List<String> columnHolders, List<String> valueHolders, String writeMode, DataBaseType dataBaseType, boolean forceUseUpdate) {
    boolean isWriteModeLegal = writeMode.trim().toLowerCase().startsWith("insert")
            || writeMode.trim().toLowerCase().startsWith("replace")
            || writeMode.trim().toLowerCase().startsWith("update");

    if (!isWriteModeLegal) {
        throw DataXException.asDataXException(DBUtilErrorCode.ILLEGAL_VALUE,
                String.format("您所配置的 writeMode:%s 错误. 因为DataX 目前仅支持replace,update 或 insert 方式. 请检查您的配置并作出修改.", writeMode));
    }
    // && writeMode.trim().toLowerCase().startsWith("replace")
    String writeDataSqlTemplate;
    if (forceUseUpdate ||
            ((dataBaseType == DataBaseType.MySql || dataBaseType == DataBaseType.Tddl) && writeMode.trim().toLowerCase().startsWith("update"))
            ) {
        //update只在mysql下使用

        writeDataSqlTemplate = new StringBuilder()
                .append("INSERT INTO %s (").append(StringUtils.join(columnHolders, ","))
                .append(") VALUES(").append(StringUtils.join(valueHolders, ","))
                .append(")")
                .append(onDuplicateKeyUpdateString(columnHolders))
                .toString();
    } else if (dataBaseType == DataBaseType.PostgreSQL) {
        //新增postgreSQL的更新模式,进行增量更新
        writeDataSqlTemplate = new StringBuilder()
                .append("INSERT INTO %s (").append(StringUtils.join(columnHolders, ","))
                .append(") VALUES(").append(StringUtils.join(valueHolders, ","))
                .append(")")
                .append(onDuplicateKeyUpdateString(writeMode.trim().toLowerCase().replace(" ",""), columnHolders))
                .toString();
    } else {

        //这里是保护,如果其他错误的使用了update,需要更换为replace
        if (writeMode.trim().toLowerCase().startsWith("update")) {
            writeMode = "replace";
        }
        writeDataSqlTemplate = new StringBuilder().append(writeMode)
                .append(" INTO %s (").append(StringUtils.join(columnHolders, ","))
                .append(") VALUES(").append(StringUtils.join(valueHolders, ","))
                .append(")").toString();
    }

    return writeDataSqlTemplate;
}

3.编写更新的逻辑

修改上述代码中的onDuplicateKeyUpdateString()方法

private static String onDuplicateKeyUpdateString(String writeMode, List<String> columnHolders) {
    String[] writeModeArr = writeMode.split("!@#", -1);
    int writeModeArrLen = writeModeArr.length;
    writeMode = writeModeArr[0];

    StringBuilder sb = new StringBuilder();
    if ("insert".equals(writeMode) && writeModeArrLen == 2) {
        sb.append(" ON CONFLICT ").append(writeModeArr[1]).append(" DO NOTHING");
    }

    if ("update".equals(writeMode) && writeModeArrLen == 3) {
        sb.append(" ON CONFLICT ").append(writeModeArr[1]);
        String[] updateFieldArr = writeModeArr[2].replace("(","").replace(")","").split(",", -1);

        List<String> updateSqlList = new ArrayList<>();
        for (String updateField : updateFieldArr) {
            if (!columnHolders.contains(updateField)) {
                continue;
            }
            updateSqlList.add(updateField + "=EXCLUDED." + updateField);
        }

        if (updateSqlList.isEmpty()) {
            sb.append(" DO NOTHING");
        } else {
            sb.append(" DO UPDATE SET ").append(StringUtils.join(updateSqlList, ","));
        }
    }

    return sb.toString();
}

4.使用方法

配置文件

{
    "job":{
        "content":[
            {
                "reader":{
                    "name":"txtfilereader",
                    "parameter":{
                        "path":[
                            "D:\\workspace\\IdeaProjects\\DataxDemo\\config\\test.csv"
                        ],
                        "column":[
                            {
                                "index":0,
                                "type":"long"
                            },
                            {
                                "index":1,
                                "type":"string"
                            }
                        ]
                    }
                },
                "writer":{
                    "name":"postgresqlwriter",
                    "parameter":{
                        "writeMode": "update!@#(id)!@#(name)",
                        "column":[
                            "id",
                            "name"
                        ],
                        "connection":[
                            {
                                "jdbcUrl":"jdbc:postgresql://127.0.0.1:5432/postgres",
                                "table":[
                                    "test_datax"
                                ]
                            }
                        ],
                        "password":"313700",
                        "username":"postgres"
                    }
                }
            }
        ],
        "setting":{
            "speed":{
                "channel":6
            }
        }
    }
}

update表示写模式,(id)表示主键或者唯一标识,(name)标识如果主键存在要更新的字段,它们用!@#连接 如果没有主键存在要更新的字段,则是不进行任何操作