Datax中mysql可以支持插入的数据,如果存在则更新。
但公司业务使用postgreSql,业务需要插入数据存在,则更新的操作,由于数据量太大,用jdbc速度太慢,只能使用datax,因此源码进行修改,使其增加更新操作。
1.修改com.alibaba.datax.plugin.writer.postgresqlwriter
注释不支持writeMode模式的代码
public void init() {
this.originalConfig = super.getPluginJobConf();
// warn:not like mysql, PostgreSQL only support insert mode, don't use
// String writeMode = this.originalConfig.getString(Key.WRITE_MODE);
// if (null != writeMode) {
// throw DataXException.asDataXException(DBUtilErrorCode.CONF_ERROR,
// String.format("写入模式(writeMode)配置有误. 因为PostgreSQL不支持配置参数项 writeMode: %s, PostgreSQL仅使用insert sql 插入数据. 请检查您的配置并作出修改.", writeMode));
// }
this.commonRdbmsWriterMaster = new CommonRdbmsWriter.Job(DATABASE_TYPE);
this.commonRdbmsWriterMaster.init(this.originalConfig);
}
2.修改com.alibaba.datax.plugin.rdbms.writer.util.WriterUtil
新增else if (dataBaseType == DataBaseType.PostgreSQL)这部分的代码,使其支持更新
public static String getWriteTemplate(List<String> columnHolders, List<String> valueHolders, String writeMode, DataBaseType dataBaseType, boolean forceUseUpdate) {
boolean isWriteModeLegal = writeMode.trim().toLowerCase().startsWith("insert")
|| writeMode.trim().toLowerCase().startsWith("replace")
|| writeMode.trim().toLowerCase().startsWith("update");
if (!isWriteModeLegal) {
throw DataXException.asDataXException(DBUtilErrorCode.ILLEGAL_VALUE,
String.format("您所配置的 writeMode:%s 错误. 因为DataX 目前仅支持replace,update 或 insert 方式. 请检查您的配置并作出修改.", writeMode));
}
// && writeMode.trim().toLowerCase().startsWith("replace")
String writeDataSqlTemplate;
if (forceUseUpdate ||
((dataBaseType == DataBaseType.MySql || dataBaseType == DataBaseType.Tddl) && writeMode.trim().toLowerCase().startsWith("update"))
) {
//update只在mysql下使用
writeDataSqlTemplate = new StringBuilder()
.append("INSERT INTO %s (").append(StringUtils.join(columnHolders, ","))
.append(") VALUES(").append(StringUtils.join(valueHolders, ","))
.append(")")
.append(onDuplicateKeyUpdateString(columnHolders))
.toString();
} else if (dataBaseType == DataBaseType.PostgreSQL) {
//新增postgreSQL的更新模式,进行增量更新
writeDataSqlTemplate = new StringBuilder()
.append("INSERT INTO %s (").append(StringUtils.join(columnHolders, ","))
.append(") VALUES(").append(StringUtils.join(valueHolders, ","))
.append(")")
.append(onDuplicateKeyUpdateString(writeMode.trim().toLowerCase().replace(" ",""), columnHolders))
.toString();
} else {
//这里是保护,如果其他错误的使用了update,需要更换为replace
if (writeMode.trim().toLowerCase().startsWith("update")) {
writeMode = "replace";
}
writeDataSqlTemplate = new StringBuilder().append(writeMode)
.append(" INTO %s (").append(StringUtils.join(columnHolders, ","))
.append(") VALUES(").append(StringUtils.join(valueHolders, ","))
.append(")").toString();
}
return writeDataSqlTemplate;
}
3.编写更新的逻辑
修改上述代码中的onDuplicateKeyUpdateString()方法
private static String onDuplicateKeyUpdateString(String writeMode, List<String> columnHolders) {
String[] writeModeArr = writeMode.split("!@#", -1);
int writeModeArrLen = writeModeArr.length;
writeMode = writeModeArr[0];
StringBuilder sb = new StringBuilder();
if ("insert".equals(writeMode) && writeModeArrLen == 2) {
sb.append(" ON CONFLICT ").append(writeModeArr[1]).append(" DO NOTHING");
}
if ("update".equals(writeMode) && writeModeArrLen == 3) {
sb.append(" ON CONFLICT ").append(writeModeArr[1]);
String[] updateFieldArr = writeModeArr[2].replace("(","").replace(")","").split(",", -1);
List<String> updateSqlList = new ArrayList<>();
for (String updateField : updateFieldArr) {
if (!columnHolders.contains(updateField)) {
continue;
}
updateSqlList.add(updateField + "=EXCLUDED." + updateField);
}
if (updateSqlList.isEmpty()) {
sb.append(" DO NOTHING");
} else {
sb.append(" DO UPDATE SET ").append(StringUtils.join(updateSqlList, ","));
}
}
return sb.toString();
}
4.使用方法
配置文件
{
"job":{
"content":[
{
"reader":{
"name":"txtfilereader",
"parameter":{
"path":[
"D:\\workspace\\IdeaProjects\\DataxDemo\\config\\test.csv"
],
"column":[
{
"index":0,
"type":"long"
},
{
"index":1,
"type":"string"
}
]
}
},
"writer":{
"name":"postgresqlwriter",
"parameter":{
"writeMode": "update!@#(id)!@#(name)",
"column":[
"id",
"name"
],
"connection":[
{
"jdbcUrl":"jdbc:postgresql://127.0.0.1:5432/postgres",
"table":[
"test_datax"
]
}
],
"password":"313700",
"username":"postgres"
}
}
}
],
"setting":{
"speed":{
"channel":6
}
}
}
}
update表示写模式,(id)表示主键或者唯一标识,(name)标识如果主键存在要更新的字段,它们用!@#连接 如果没有主键存在要更新的字段,则是不进行任何操作