Flink消费Kafka注意事项

391 阅读1分钟

1、5种模式指定消费策略

public enum StartupMode {

   /** Start from committed offsets in ZK / Kafka brokers of a specific consumer group (default). */
   GROUP_OFFSETS(KafkaTopicPartitionStateSentinel.GROUP_OFFSET),

   /** Start from the earliest offset possible. */
   EARLIEST(KafkaTopicPartitionStateSentinel.EARLIEST_OFFSET),

   /** Start from the latest offset. */
   LATEST(KafkaTopicPartitionStateSentinel.LATEST_OFFSET),

   /**
    * Start from user-supplied timestamp for each partition.
    * Since this mode will have specific offsets to start with, we do not need a sentinel value;
    * using Long.MIN_VALUE as a placeholder.
    */
   TIMESTAMP(Long.MIN_VALUE),

   /**
    * Start from user-supplied specific offsets for each partition.
    * Since this mode will have specific offsets to start with, we do not need a sentinel value;
    * using Long.MIN_VALUE as a placeholder.
    */
   SPECIFIC_OFFSETS(Long.MIN_VALUE);
}

2、用户代码中指定消费策略

 FlinkKafkaConsumer011<String> Consumer = new FlinkKafkaConsumer011(topics, (DeserializationSchema)new SimpleStringSchema(), props);
    Consumer.setCommitOffsetsOnCheckpoints(true);
    if (first.booleanValue())
      Consumer.setStartFromEarliest(); 

3、Flink消费Kafka创建流程

public void open(Configuration configuration) throws Exception {
    // 省略一些代码……
     //  从checkpoint恢复offset
    if (this.restoredState != null) {
      for (KafkaTopicPartition partition : allPartitions) {
        if (!this.restoredState.containsKey(partition))
          // 如果缓存中没有当前分区的offset,就从EARLIEST_OFFSET进行消费
          // -915623761775L的值就是表示从EARLIEST_OFFSET进行消费
          this.restoredState.put(partition, Long.valueOf(-915623761775L)); 
      } 
     // …… 省略代码
     // else 就是根据策略进行消费
    } else {
      switch (this.startupMode) {
        case SPECIFIC_OFFSETS: //从指定offset进行消费
        case TIMESTAMP: // 从时间戳进行消费
        default:  //如果都没有,就从kafka group id策略进行消费
          for (KafkaTopicPartition seedPartition : allPartitions)
            this.subscribedPartitionsToStartOffsets.put(seedPartition, Long.valueOf(this.startupMode.getStateSentinel())); 
          break;
      } 
      // 如果就从kafka group id策略有值
      if (!this.subscribedPartitionsToStartOffsets.isEmpty()) {
        switch (this.startupMode) {
          case EARLIEST: // 如果启动模式为EARLIEST就从EARLIEST进行消费
          case LATEST: // 如果启动模式为LATEST就从LATEST进行消费
          case TIMESTAMP: // 如果启动模式TIMESTAMP就从TIMESTAMP进行消费
          case SPECIFIC_OFFSETS:// 如果启动模式SPECIFIC_OFFSETS就从SPECIFIC_OFFSETS进行消费
          case GROUP_OFFSETS: //如果启动模式GROUP_OFFSETS就从GROUP_OFFSETS进行消费 。 默认是 StartupMode startupMode = StartupMode.GROUP_OFFSETS;
          
            LOG.info("Consumer subtask {} will start reading the following {} partitions from the committed group offsets in Kafka: {}", new Object[] { Integer.valueOf(getRuntimeContext().getIndexOfThisSubtask()), 
                  Integer.valueOf(this.subscribedPartitionsToStartOffsets.size()), this.subscribedPartitionsToStartOffsets
                  .keySet() });
            break;
        } 
      } else {
        // 最后条件不满足就报错
      } 
    } 
  }

本文由博客一文多发平台 OpenWrite 发布!