Kafka服务端网络层源码剖析

703 阅读2分钟

kafka网络层是使用java nio实现了一个NIO Server,采用Reactor模式,基于事件驱动的模式。 image.png

数据处理流程

  1. client将请求发送给接收器Acceptor,client包括producer,consumer,broker等
  2. Acceptor将请求转发给Processor处理
  3. Processor将请求封装后写入队列requestQueue
  4. KafkaRequestHandler消费requestQueue,取出请求交给handler,handler处理真正的IO处理和外部请求
  5. handler处理完成后回调response方法,将response写入对应Processor的responseQueue
  6. Processor从responseQueue中取出response,处理返回给Client/Broker

组件解析

Acceptor

  • 创建TCP连接,并将请求传递给Processor处理。每个服务只有一个Acceptor线程。
/** 循环检查新连接的创建 */
def run(): Unit = {
  serverChannel.register(nioSelector, SelectionKey.OP_ACCEPT)
  startupComplete()
  try {
    var currentProcessorIndex = 0
    while (isRunning) {
      try {

        val ready = nioSelector.select(500)
        if (ready > 0) {
          val keys = nioSelector.selectedKeys()
          val iter = keys.iterator()
          while (iter.hasNext && isRunning) {
            try {
              val key = iter.next
              iter.remove()

              if (key.isAcceptable) {
                accept(key).foreach { socketChannel =>
                  /**轮询将连接分配给Processor*/
                  var retriesLeft = synchronized(processors.length)
                  var processor: Processor = null
                  do {
                    retriesLeft -= 1
                    processor = synchronized {
                      currentProcessorIndex = currentProcessorIndex % processors.length
                      processors(currentProcessorIndex)
                    }
                    currentProcessorIndex += 1
                  } while (!assignNewConnection(socketChannel, processor, retriesLeft == 0))
                }
              } else
                throw new IllegalStateException("Unrecognized key state for acceptor thread.")
            } catch {
              case e: Throwable => error("Error while accepting connection", e)
            }
          }
        }
      }
      catch {
        case e: ControlThrowable => throw e
        case e: Throwable => error("Error occurred", e)
      }
    }
  } finally {
    debug("Closing server socket and selector.")
    CoreUtils.swallow(serverChannel.close(), this, Level.ERROR)
    CoreUtils.swallow(nioSelector.close(), this, Level.ERROR)
    shutdownComplete()
  }
}

Processor

  • 处理单个TCP连接上所有请求的线程。
  • 负责将接收到的请求添加到RequestChannel的requestQueue上,并将response请求发送给request发送方。
  • 拥有独立的Selector(java.nio.channels.Selector)。
  • 线程数由参数num.network.threads控制。推荐设置为CPU核心数*2。
override def run(): Unit = {
  startupComplete()
  try {
    while (isRunning) {
      try {
        /**获取Acceptor线程下发的连接,将对应SocketChannel注册到Selector上,添加Read事件*/
        configureNewConnections()
        /**发送response,并将Response放入到inflightResponses临时队列*/
        processNewResponses()
        /**获取对应SocketChannel上准备就绪的I/O操作*/
        poll()
        /**将poll执行完成封装的request添加到requestQueue队列上*/
        processCompletedReceives()
        /**为临时Response队列中的Response执行回调逻辑*/
        processCompletedSends()
        /**处理发送失败而导致的连接断开*/
        processDisconnected()
        /**关闭超过配额限制部分的连接*/
        closeExcessConnections()
      } catch {
        case e: Throwable => processException("Processor got uncaught exception.", e)
      }
    }
  } finally {
    debug(s"Closing selector - processor $id")
    CoreUtils.swallow(closeAll(), this, Level.ERROR)
    shutdownComplete()
  }
}

KafkaRequestHandlerPool

  • 管理KafkaRequestHandler的线程池
  • 线程池大小由参数num.io.threads决定,推荐磁盘数*2
class KafkaRequestHandlerPool(val brokerId: Int,
                              val requestChannel: RequestChannel,
                              val apis: KafkaApis,
                              time: Time,
                              numThreads: Int,
                              requestHandlerAvgIdleMetricName: String,
                              logAndThreadNamePrefix : String) extends Logging with KafkaMetricsGroup {

  private val threadPoolSize: AtomicInteger = new AtomicInteger(numThreads)
  private val aggregateIdleMeter = newMeter(requestHandlerAvgIdleMetricName, "percent", TimeUnit.NANOSECONDS)

  this.logIdent = "[" + logAndThreadNamePrefix + " Kafka Request Handler on Broker " + brokerId + "], "
  val runnables = new mutable.ArrayBuffer[KafkaRequestHandler](numThreads)
  /**创建num.io.threads个Handler线程*/
  for (i <- 0 until numThreads) {
    createHandler(i)
  }
  /**创建序号为指定id的Handler线程,并启动该线程*/
  def createHandler(id: Int): Unit = synchronized {
    runnables += new KafkaRequestHandler(id, brokerId, aggregateIdleMeter, threadPoolSize, requestChannel, apis, time)
    KafkaThread.daemon(logAndThreadNamePrefix + "-kafka-request-handler-" + id, runnables(id)).start()
  }
 /**重置线程数,用于动态调整参数*/
  def resizeThreadPool(newSize: Int): Unit = synchronized {
    val currentSize = threadPoolSize.get
    info(s"Resizing request handler thread pool size from $currentSize to $newSize")
    if (newSize > currentSize) {
      for (i <- currentSize until newSize) {
        createHandler(i)
      }
    } else if (newSize < currentSize) {
      for (i <- 1 to (currentSize - newSize)) {
        runnables.remove(currentSize - i).stop()
      }
    }
    threadPoolSize.set(newSize)
  }

  def shutdown(): Unit = synchronized {
    info("shutting down")
    for (handler <- runnables)
      handler.initiateShutdown()
    for (handler <- runnables)
      handler.awaitShutdown()
    info("shut down completely")
  }
}

KafkaRequestHandler

  • 实际执行kafka处理请求
  • 消费requestQueue中的请求,调用KafkaApis的handle处理实际的IO操作
class KafkaRequestHandler(id: Int,
                          brokerId: Int,
                          val aggregateIdleMeter: Meter,
                          val totalHandlerThreads: AtomicInteger,
                          val requestChannel: RequestChannel,
                          apis: KafkaApis,
                          time: Time) extends Runnable with Logging {
  this.logIdent = "[Kafka Request Handler " + id + " on Broker " + brokerId + "], "
  private val shutdownComplete = new CountDownLatch(1)
  @volatile private var stopped = false

  def run(): Unit = {
    while (!stopped) {
      val startSelectTime = time.nanoseconds
      /**从requestChannel的requestQueue中获取请求*/
      val req = requestChannel.receiveRequest(300)
      val endTime = time.nanoseconds
      val idleTime = endTime - startSelectTime
      aggregateIdleMeter.mark(idleTime / totalHandlerThreads.get)

      req match {
        case RequestChannel.ShutdownRequest =>
          debug(s"Kafka request handler $id on broker $brokerId received shut down command")
          shutdownComplete.countDown()
          return

        case request: RequestChannel.Request =>
          try {
            request.requestDequeueTimeNanos = endTime
            trace(s"Kafka request handler $id on broker $brokerId handling request $request")
            /**调用KafkaApis处理handler*/
            apis.handle(request)
          } catch {
            case e: FatalExitError =>
              shutdownComplete.countDown()
              Exit.exit(e.statusCode)
            case e: Throwable => error("Exception when handling request", e)
          } finally {
            request.releaseBuffer()
          }

        case null => // continue
      }
    }
    shutdownComplete.countDown()
  }

  def stop(): Unit = {
    stopped = true
  }

  def initiateShutdown(): Unit = requestChannel.sendShutdownRequest()

  def awaitShutdown(): Unit = shutdownComplete.await()

}

总结

  1. 整体介绍了kafka server网络层的处理流程
  2. 介绍各个组件的职责和能力