先说结论
0. Doris两阶段提交的基本步骤
-
第一阶段(Prepare) :
-
URL:
http://${hostPort}/api/${db}/${table}/_stream_load(不带_2pc后缀)。 -
作用:将数据发送到 Doris,但标记为 未提交状态(类似事务的 Prepare 阶段)。
-
关键点:
- Doris 接收数据后,会创建一个临时事务(Transaction ID)返回给Flink,但数据不会立即持久化到最终表。
- 数据会被写入临时存储(如 WAL 日志),等待后续提交。
- 开启2pc:就是在http请求头加一个参数
two_phase_commit=true因为有这个参数,Doris会自动将一阶段数据变为不可见,等待二阶段提交才可写入到数据库
-
第二阶段(Commit) :
-
URL:
http://${hostPort}/api/${db}/_stream_load_2pc(带_2pc后缀)。 -
作用:确认数据可以提交|回滚,Doris 将临时数据持久化到目标表。
-
关键点:
- 请求中必须携带
txn_id参数(即第一阶段返回的事务 ID)。 - Doris 收到 Commit 请求后,会将临时数据原子性地写入目标表,确保数据可见性。 注意:
- 请求中必须携带
- 数据在http请求中是分成了一小批一小批,也叫分块传输编码
- 数据以临时缓冲区的形式存储在数据库的内层或磁盘,但是不可见
- 所有写入操作绑定到当前的事务id,Doris内部是用的
enable_http_server_v2去配置维护事务的 - 当触发Flink的检查点的时候,Sink任务会暂时停止接收新数据(严格barrier对齐,会阻塞invoke方法),并关闭当前的http传输通道,此时,Doris端的临时缓冲区的数据被标记为预提交(PreCommitted) ,但不可见
- 预提交阶段失败
- 数据还未到达数据库,还在算子内存中: Flink会从ck恢复,重新处理这个数据,需要Source算子支持重置offset (不会导致重复写入相同的数据)
- 数据已经预提交但未持久化到ck: Flink从ck恢复,重新处理写入这个数据 (导致重复写入相同的数据)
- 如果ck失败了,Flink也会发送HTTP的请求,携带事务id和abort指令,请求数据库回滚数据
- 提交阶段失败,此时数据还在预提交状态。此时flink发送了abort请求,数据回滚到原始状态,并重置上游offset
这里只展示核心的代码,其他没用的都不展示
1.DorisSink
@PublicEvolving
public class DorisSink<IN> implements StatefulSink<IN, DorisWriterState>, TwoPhaseCommittingSink<IN, DorisAbstractCommittable> {
private static final Logger LOG = LoggerFactory.getLogger(DorisSink.class);
private final DorisOptions dorisOptions;
private final DorisReadOptions dorisReadOptions;
private final DorisExecutionOptions dorisExecutionOptions;
private final DorisRecordSerializer<IN> serializer;
public DorisSink(DorisOptions dorisOptions, DorisReadOptions dorisReadOptions, DorisExecutionOptions dorisExecutionOptions, DorisRecordSerializer<IN> serializer) {
this.dorisOptions = dorisOptions;
this.dorisReadOptions = dorisReadOptions;
this.dorisExecutionOptions = dorisExecutionOptions;
this.serializer = serializer;
this.checkKeyType();
}
// 调用getDorisAbstractWriter去创建DorisWriter
public DorisAbstractWriter createWriter(Sink.InitContext initContext) throws IOException {
return this.getDorisAbstractWriter(initContext, Collections.emptyList());
}
// 创建DorisCommitter
public Committer createCommitter() throws IOException {
if (!WriteMode.STREAM_LOAD.equals(this.dorisExecutionOptions.getWriteMode()) && !WriteMode.STREAM_LOAD_BATCH.equals(this.dorisExecutionOptions.getWriteMode())) {
if (WriteMode.COPY.equals(this.dorisExecutionOptions.getWriteMode())) {
return new DorisCopyCommitter(this.dorisOptions, this.dorisExecutionOptions.getMaxRetries());
} else {
throw new IllegalArgumentException("Unsupported write mode " + this.dorisExecutionOptions.getWriteMode());
}
} else {
return new DorisCommitter(this.dorisOptions, this.dorisReadOptions, this.dorisExecutionOptions);
}
}
// 从ck恢复后再创建新的DorisWriter
public DorisAbstractWriter restoreWriter(Sink.InitContext initContext, Collection<DorisWriterState> recoveredState) throws IOException {
return this.getDorisAbstractWriter(initContext, recoveredState);
}
// 创建DorisWriter
@VisibleForTesting
public DorisAbstractWriter getDorisAbstractWriter(Sink.InitContext initContext, Collection<DorisWriterState> states) {
if (WriteMode.STREAM_LOAD.equals(this.dorisExecutionOptions.getWriteMode())) {
return new DorisWriter(initContext, states, this.serializer, this.dorisOptions, this.dorisReadOptions, this.dorisExecutionOptions);
} else if (WriteMode.STREAM_LOAD_BATCH.equals(this.dorisExecutionOptions.getWriteMode())) {
return new DorisBatchWriter(initContext, this.serializer, this.dorisOptions, this.dorisReadOptions, this.dorisExecutionOptions);
} else if (WriteMode.COPY.equals(this.dorisExecutionOptions.getWriteMode())) {
return new DorisCopyWriter(initContext, this.serializer, this.dorisOptions, this.dorisReadOptions, this.dorisExecutionOptions);
} else {
throw new IllegalArgumentException("Unsupported write mode " + this.dorisExecutionOptions.getWriteMode());
}
}
public SimpleVersionedSerializer<DorisWriterState> getWriterStateSerializer() {
return new DorisWriterStateSerializer();
}
public SimpleVersionedSerializer getCommittableSerializer() {
if (!WriteMode.STREAM_LOAD.equals(this.dorisExecutionOptions.getWriteMode()) && !WriteMode.STREAM_LOAD_BATCH.equals(this.dorisExecutionOptions.getWriteMode())) {
if (WriteMode.COPY.equals(this.dorisExecutionOptions.getWriteMode())) {
return new CopyCommittableSerializer();
} else {
throw new IllegalArgumentException("Unsupported write mode " + this.dorisExecutionOptions.getWriteMode());
}
} else {
return new DorisCommittableSerializer();
}
}
public static <IN> Builder<IN> builder() {
return new Builder();
}
// 构造者模式去构造
public static class Builder<IN> {
private DorisOptions dorisOptions;
private DorisReadOptions dorisReadOptions;
private DorisExecutionOptions dorisExecutionOptions;
private DorisRecordSerializer<IN> serializer;
public Builder() {
}
public Builder<IN> setDorisOptions(DorisOptions dorisOptions) {
this.dorisOptions = dorisOptions;
return this;
}
public Builder<IN> setDorisReadOptions(DorisReadOptions dorisReadOptions) {
this.dorisReadOptions = dorisReadOptions;
return this;
}
public Builder<IN> setDorisExecutionOptions(DorisExecutionOptions dorisExecutionOptions) {
this.dorisExecutionOptions = dorisExecutionOptions;
return this;
}
public Builder<IN> setSerializer(DorisRecordSerializer<IN> serializer) {
this.serializer = serializer;
return this;
}
public DorisSink<IN> build() {
Preconditions.checkNotNull(this.dorisOptions);
Preconditions.checkNotNull(this.dorisExecutionOptions);
Preconditions.checkNotNull(this.serializer);
if (this.dorisReadOptions == null) {
this.dorisReadOptions = DorisReadOptions.builder().build();
}
return new DorisSink(this.dorisOptions, this.dorisReadOptions, this.dorisExecutionOptions, this.serializer);
}
}
}
2.DorisWriter
public class DorisWriter<IN> implements DorisAbstractWriter<IN, DorisWriterState, DorisCommittable> {
private static final Logger LOG = LoggerFactory.getLogger(DorisWriter.class);
private final long lastCheckpointId;
private long curCheckpointId;
private Map<String, DorisStreamLoad> dorisStreamLoadMap = new ConcurrentHashMap();
private Map<String, LabelGenerator> labelGeneratorMap = new ConcurrentHashMap();
volatile boolean globalLoading;
private Map<String, Boolean> loadingMap = new ConcurrentHashMap();
private final DorisOptions dorisOptions;
private final DorisReadOptions dorisReadOptions;
private final DorisExecutionOptions executionOptions;
private String labelPrefix;
private final int subtaskId;
private final int intervalTime;
private final DorisRecordSerializer<IN> serializer;
private final transient ScheduledExecutorService scheduledExecutorService;
private transient Thread executorThread;
private transient volatile Exception loadException = null;
private BackendUtil backendUtil;
private SinkWriterMetricGroup sinkMetricGroup;
private Map<String, DorisWriteMetrics> sinkMetricsMap = new ConcurrentHashMap();
private volatile boolean multiTableLoad = false;
public DorisWriter(Sink.InitContext initContext, Collection<DorisWriterState> state, DorisRecordSerializer<IN> serializer, DorisOptions dorisOptions, DorisReadOptions dorisReadOptions, DorisExecutionOptions executionOptions) {
this.lastCheckpointId = initContext.getRestoredCheckpointId().orElse(0L);
this.curCheckpointId = this.lastCheckpointId + 1L;
LOG.info("restore from checkpointId {}", this.lastCheckpointId);
LOG.info("labelPrefix {}", executionOptions.getLabelPrefix());
this.labelPrefix = executionOptions.getLabelPrefix();
this.subtaskId = initContext.getSubtaskId();
this.scheduledExecutorService = new ScheduledThreadPoolExecutor(1, (r) -> {
Thread t = new Thread(r, "stream-load-check-" + this.subtaskId);
t.setPriority(1);
return t;
});
this.serializer = serializer;
if (StringUtils.isBlank(dorisOptions.getTableIdentifier())) {
this.multiTableLoad = true;
LOG.info("table.identifier is empty, multiple table writes.");
}
this.dorisOptions = dorisOptions;
this.dorisReadOptions = dorisReadOptions;
this.executionOptions = executionOptions;
this.intervalTime = executionOptions.checkInterval();
this.globalLoading = false;
this.sinkMetricGroup = initContext.metricGroup();
this.initializeLoad(state);
serializer.initial();
}
// 根据labelPrefix和当前的ckid去abort对应的一批数据
@VisibleForTesting
public void abortLingeringTransactions(Collection<DorisWriterState> recoveredStates) throws Exception {
List<String> alreadyAborts = new ArrayList();
Iterator var3 = recoveredStates.iterator();
while(true) {
while(var3.hasNext()) {
DorisWriterState state = (DorisWriterState)var3.next();
LOG.info("try to abort txn from DorisWriterState {}", state.toString());
if (!state.getLabelPrefix().equals(this.labelPrefix)) {
LOG.warn("Label prefix from previous execution {} has changed to {}.", state.getLabelPrefix(), this.executionOptions.getLabelPrefix());
}
if (state.getDatabase() != null && state.getTable() != null) {
String key = state.getDatabase() + "." + state.getTable();
DorisStreamLoad streamLoader = this.getStreamLoader(key);
streamLoader.abortPreCommit(state.getLabelPrefix(), this.curCheckpointId);
alreadyAborts.add(state.getLabelPrefix());
} else {
LOG.warn("Transactions cannot be aborted when restore because the last used flink-doris-connector version less than 1.5.0.");
}
}
if (!alreadyAborts.contains(this.labelPrefix) && StringUtils.isNotEmpty(this.dorisOptions.getTableIdentifier()) && StringUtils.isNotEmpty(this.labelPrefix)) {
DorisStreamLoad streamLoader = this.getStreamLoader(this.dorisOptions.getTableIdentifier());
streamLoader.abortPreCommit(this.labelPrefix, this.curCheckpointId);
}
return;
}
}
// 写入操作
public void write(IN in, SinkWriter.Context context) throws IOException, InterruptedException {
this.checkLoadException();
this.writeOneDorisRecord(this.serializer.serialize(in));
}
// flush操作
public void flush(boolean endOfInput) throws IOException, InterruptedException {
this.writeOneDorisRecord(this.serializer.flush());
}
// 其实是调用的DorisStreamLoad去写入的
public void writeOneDorisRecord(DorisRecord record) throws IOException, InterruptedException {
if (record != null && record.getRow() != null) {
String tableKey = this.dorisOptions.getTableIdentifier();
if (record.getTableIdentifier() != null) {
tableKey = record.getTableIdentifier();
}
DorisStreamLoad streamLoader = this.getStreamLoader(tableKey);
if (!this.loadingMap.containsKey(tableKey)) {
LabelGenerator labelGenerator = this.getLabelGenerator(tableKey);
String currentLabel = labelGenerator.generateTableLabel(this.curCheckpointId);
streamLoader.startLoad(currentLabel, false);
this.loadingMap.put(tableKey, true);
this.globalLoading = true;
this.registerMetrics(tableKey);
}
streamLoader.writeRecord(record.getRow());
}
}
// 预提交阶段
public Collection<DorisCommittable> prepareCommit() throws IOException, InterruptedException {
if (!this.globalLoading && this.loadingMap.values().stream().noneMatch(Boolean::booleanValue)) {
return Collections.emptyList();
} else {
this.globalLoading = false;
List<DorisCommittable> committableList = new ArrayList();
Iterator var2 = this.dorisStreamLoadMap.entrySet().iterator();
while(var2.hasNext()) {
Map.Entry<String, DorisStreamLoad> streamLoader = (Map.Entry)var2.next();
String tableIdentifier = (String)streamLoader.getKey();
if (!(Boolean)this.loadingMap.getOrDefault(tableIdentifier, false)) {
LOG.debug("skip table {}, no data need to load.", tableIdentifier);
} else {
DorisStreamLoad dorisStreamLoad = (DorisStreamLoad)streamLoader.getValue();
RespContent respContent = dorisStreamLoad.stopLoad();
if (this.sinkMetricsMap.containsKey(tableIdentifier)) {
DorisWriteMetrics dorisWriteMetrics = (DorisWriteMetrics)this.sinkMetricsMap.get(tableIdentifier);
dorisWriteMetrics.flush(respContent);
}
if (this.executionOptions.enabled2PC()) {
long txnId = respContent.getTxnId();
committableList.add(new DorisCommittable(dorisStreamLoad.getHostPort(), dorisStreamLoad.getDb(), txnId));
}
}
}
this.loadingMap.clear();
return committableList;
}
}
private void abortPossibleSuccessfulTransaction() {
if (this.executionOptions.enabled2PC() && this.multiTableLoad) {
LOG.info("Try to abort may have successfully preCommitted label.");
Iterator var1 = this.dorisStreamLoadMap.entrySet().iterator();
while(var1.hasNext()) {
Map.Entry<String, DorisStreamLoad> entry = (Map.Entry)var1.next();
DorisStreamLoad abortLoader = (DorisStreamLoad)entry.getValue();
try {
abortLoader.abortTransactionByLabel(abortLoader.getCurrentLabel());
} catch (Exception var5) {
LOG.warn("Skip abort transaction failed by label, reason is {}.", var5.getMessage());
}
}
}
}
// checkpoint阶段
public List<DorisWriterState> snapshotState(long checkpointId) throws IOException {
List<DorisWriterState> writerStates = new ArrayList();
Iterator var4 = this.dorisStreamLoadMap.values().iterator();
while(var4.hasNext()) {
DorisStreamLoad dorisStreamLoad = (DorisStreamLoad)var4.next();
dorisStreamLoad.setHostPort(this.backendUtil.getAvailableBackend(this.subtaskId));
DorisWriterState writerState = new DorisWriterState(this.labelPrefix, dorisStreamLoad.getDb(), dorisStreamLoad.getTable(), this.subtaskId);
writerStates.add(writerState);
}
this.curCheckpointId = checkpointId + 1L;
return writerStates;
}
private LabelGenerator getLabelGenerator(String tableKey) {
return (LabelGenerator)this.labelGeneratorMap.computeIfAbsent(tableKey, (v) -> {
return new LabelGenerator(this.labelPrefix, this.executionOptions.enabled2PC(), tableKey, this.subtaskId);
});
}
@VisibleForTesting
public DorisStreamLoad getStreamLoader(String tableKey) {
LabelGenerator labelGenerator = this.getLabelGenerator(tableKey);
this.dorisOptions.setTableIdentifier(tableKey);
return (DorisStreamLoad)this.dorisStreamLoadMap.computeIfAbsent(tableKey, (v) -> {
return new DorisStreamLoad(this.backendUtil.getAvailableBackend(this.subtaskId), this.dorisOptions, this.executionOptions, labelGenerator, (new HttpUtil(this.dorisReadOptions)).getHttpClient());
});
}
public void close() throws Exception {
LOG.info("Close DorisWriter.");
if (this.scheduledExecutorService != null) {
this.scheduledExecutorService.shutdownNow();
}
this.abortPossibleSuccessfulTransaction();
if (this.dorisStreamLoadMap != null && !this.dorisStreamLoadMap.isEmpty()) {
Iterator var1 = this.dorisStreamLoadMap.values().iterator();
while(var1.hasNext()) {
DorisStreamLoad dorisStreamLoad = (DorisStreamLoad)var1.next();
dorisStreamLoad.close();
}
}
this.serializer.close();
}
}