Kubernetes 系列 - 10. apiserver（七、停机流程）10. apiserver（七、停机流程） 1

10. apiserver（七、停机流程）

10.1 信号流转

Run方法中，使用停机信号进行优雅停机：

10.2 前置知识

先介绍一下lifecycleSignal：

type lifecycleSignal interface {
    // Signal signals the event, indicating that the event has occurred.
    // Signal is idempotent, once signaled the event stays signaled and
    // it immediately unblocks any goroutine waiting for this event.
    Signal()

    // Signaled returns a channel that is closed when the underlying event
    // has been signaled. Successive calls to Signaled return the same value.
    Signaled() <-chan struct{}

    // Name returns the name of the signal, useful for logging.
    Name() string
}

Signal()：触发信号，唤醒监听协程；
Signaled()：返回信号；

生命信号总好包含8种：

type lifecycleSignals struct {
    // ShutdownInitiated event is signaled when an apiserver shutdown has been initiated.
    // It is signaled when the `stopCh` provided by the main goroutine
    // receives a KILL signal and is closed as a consequence.
    ShutdownInitiated lifecycleSignal

    // AfterShutdownDelayDuration event is signaled as soon as ShutdownDelayDuration
    // has elapsed since the ShutdownInitiated event.
    // ShutdownDelayDuration allows the apiserver to delay shutdown for some time.
    AfterShutdownDelayDuration lifecycleSignal

    // PreShutdownHooksStopped event is signaled when all registered
    // preshutdown hook(s) have finished running.
    PreShutdownHooksStopped lifecycleSignal

    // NotAcceptingNewRequest event is signaled when the server is no
    // longer accepting any new request, from this point on any new
    // request will receive an error.
    NotAcceptingNewRequest lifecycleSignal

    // InFlightRequestsDrained event is signaled when the existing requests
    // in flight have completed. This is used as signal to shut down the audit backends
    InFlightRequestsDrained lifecycleSignal

    // HTTPServerStoppedListening termination event is signaled when the
    // HTTP Server has stopped listening to the underlying socket.
    HTTPServerStoppedListening lifecycleSignal

    // HasBeenReady is signaled when the readyz endpoint succeeds for the first time.
    HasBeenReady lifecycleSignal

    // MuxAndDiscoveryComplete is signaled when all known HTTP paths have been installed.
    // It exists primarily to avoid returning a 404 response when a resource actually exists but we haven't installed the path to a handler.
    // The actual logic is implemented by an APIServer using the generic server library.
    MuxAndDiscoveryComplete lifecycleSignal
}

最后两种为启动信号，其他为关机信号。

Run方法的总体流程如下（这里省略了各个子协程以及if块的逻辑，便于更清晰地看到Run方法的逻辑）：

func (s preparedGenericAPIServer) Run(stopCh <-chan struct{}) error {
    delayedStopCh := s.lifecycleSignals.AfterShutdownDelayDuration
    shutdownInitiatedCh := s.lifecycleSignals.ShutdownInitiated

    defer s.Destroy()

    // 启动debug socket
    if s.UnprotectedDebugSocket != nil {...}

    go func() {...}()

    go func() {...}()

    // 设置shutdownTimeout
    shutdownTimeout := s.ShutdownTimeout
    if s.ShutdownSendRetryAfter {...}

    notAcceptingNewRequestCh := s.lifecycleSignals.NotAcceptingNewRequest
    drainedCh := s.lifecycleSignals.InFlightRequestsDrained
    stopHttpServerCh := make(chan struct{})
    go func() {...}()

    // 监听drainedCh，启动AuditBackend
    if s.AuditBackend != nil {...}

    stoppedCh, listenerStoppedCh, err := s.NonBlockingRun(stopHttpServerCh, shutdownTimeout)

    httpServerStoppedListeningCh := s.lifecycleSignals.HTTPServerStoppedListening
    go func() {...}()

    preShutdownHooksHasStoppedCh := s.lifecycleSignals.PreShutdownHooksStopped
    go func() {...}()

    nonLongRunningRequestDrainedCh := make(chan struct{})
    go func() {...}()

    activeWatchesDrainedCh := make(chan struct{})
    go func() {...}()

    go func() {...}()

    // 【重要】前面都是启动了各种协程，而这里开始有处理逻辑
    klog.V(1).Info("[graceful-termination] waiting for shutdown to be initiated")
    <-stopCh

    func() {...}()

    <-drainedCh.Signaled()

    if s.AuditBackend != nil {...}

    <-listenerStoppedCh
    <-stoppedCh

    klog.V(1).Info("[graceful-termination] apiserver is exiting")
    return nil
}

1）首先看一下最开始的stopCh信号：

func NewAPIServerCommand() *cobra.Command {
    s := options.NewServerRunOptions()
    cmd := &cobra.Command{
       ...
       RunE: func(cmd *cobra.Command, args []string) error {
          ...
          return Run(completedOptions, genericapiserver.SetupSignalHandler())
       },
       ...
    }
    ...
}

执行Run方法的时候使用genericapiserver.SetupSignalHandler()传入stopCh信号:

func SetupSignalHandler() <-chan struct{} {
    return SetupSignalContext().Done()
}

使用Done()方法返回cancelCtx的done字段存储的通道，即信号流转图里面的stopCh。

func SetupSignalContext() context.Context {
    close(onlyOneSignalHandler) // panics when called twice

    shutdownHandler = make(chan os.Signal, 2)

    ctx, cancel := context.WithCancel(context.Background())
    // 当触发关机信号（windows: SIGINT, linux: SIGINT、SIGTERM）时，会写入shutdownHandler
    signal.Notify(shutdownHandler, shutdownSignals...)
    go func() {
       <-shutdownHandler
       cancel()  // 触发close(stopCh)
       <-shutdownHandler
       os.Exit(1) // second signal. Exit directly.
    }()

    return ctx
}

小结当程序接收到停机信号之后，触发close(stopCh)。

2）接收stopCh，触发delayedStopCh和preShutdownHooksHasStoppedCh

func (s preparedGenericAPIServer) Run(stopCh <-chan struct{}) error {
    delayedStopCh := s.lifecycleSignals.AfterShutdownDelayDuration
    shutdownInitiatedCh := s.lifecycleSignals.ShutdownInitiated

    ...

    go func() {
       defer delayedStopCh.Signal()
       defer klog.V(1).InfoS("[graceful-termination] shutdown event", "name", delayedStopCh.Name())

       <-stopCh

       // As soon as shutdown is initiated, /readyz should start returning failure.
       // This gives the load balancer a window defined by ShutdownDelayDuration to detect that /readyz is red
       // and stop sending traffic to this server.
       shutdownInitiatedCh.Signal()
       klog.V(1).InfoS("[graceful-termination] shutdown event", "name", shutdownInitiatedCh.Name())

       time.Sleep(s.ShutdownDelayDuration)
    }()
    ...
    
    klog.V(1).Info("[graceful-termination] waiting for shutdown to be initiated")
    <-stopCh

    // run shutdown hooks directly. This includes deregistering from
    // the kubernetes endpoint in case of kube-apiserver.
    func() {
       defer func() {
          preShutdownHooksHasStoppedCh.Signal()
          klog.V(1).InfoS("[graceful-termination] pre-shutdown hooks completed", "name", preShutdownHooksHasStoppedCh.Name())
       }()
       err = s.RunPreShutdownHooks()
    }()
}

当接收到stopCh之后，立即触发：1）shutdownInitiatedCh(ShutdownInitiated)，再等待ShutdownDelayDuration之后触发delayedStopCh(AfterShutdownDelayDuration)；2）执行shutdownHooks之后触发preShutdownHooksHasStoppedCh(PreShutdownHooksStopped)，需要注意的是其中一个preShutdownHook是取消租约，以便让其他apiserver实例接手流量。

3）接收到delayedStopCh和preShutdownHooksHasStoppedCh，触发notAcceptingNewRequestCh

go func() {
    defer klog.V(1).InfoS("[graceful-termination] shutdown event", "name", notAcceptingNewRequestCh.Name())
    defer notAcceptingNewRequestCh.Signal()

    // wait for the delayed stopCh before closing the handler chain
    <-delayedStopCh.Signaled()

    // Additionally wait for preshutdown hooks to also be finished, as some of them need
    // to send API calls to clean up after themselves (e.g. lease reconcilers removing
    // itself from the active servers).
    <-preShutdownHooksHasStoppedCh.Signaled()
}()

4）接收到notAcceptingNewRequestCh，触发nonLongRunningRequestDrainedCh和activeWatchesDrainedCh：

func (s preparedGenericAPIServer) Run(stopCh <-chan struct{}) error {
    ...

    // 1. 根据ShutdownSendRetryAfter决定是否由notAcceptingNewRequestCh触发stopHttpServerCh
    notAcceptingNewRequestCh := s.lifecycleSignals.NotAcceptingNewRequest
    drainedCh := s.lifecycleSignals.InFlightRequestsDrained
    stopHttpServerCh := make(chan struct{})
    go func() {
       defer close(stopHttpServerCh)

       timeToStopHttpServerCh := notAcceptingNewRequestCh.Signaled()
       if s.ShutdownSendRetryAfter {
          timeToStopHttpServerCh = drainedCh.Signaled()
       }

       <-timeToStopHttpServerCh
    }()

    ...
    
    // 2. 执行NonLongRunningRequestWaitGroup::Wait之后触发nonLongRunningRequestDrainedCh
    // wait for all in-flight non-long running requests to finish
    nonLongRunningRequestDrainedCh := make(chan struct{})
    go func() {
       defer close(nonLongRunningRequestDrainedCh)
       defer klog.V(1).Info("[graceful-termination] in-flight non long-running request(s) have drained")

       // wait for the delayed stopCh before closing the handler chain (it rejects everything after Wait has been called).
       <-notAcceptingNewRequestCh.Signaled()

       s.NonLongRunningRequestWaitGroup.Wait()
    }()

    // 3. 执行WatchRequestWaitGroup::Wait之后触发activeWatchesDrainedCh
    // wait for all in-flight watches to finish
    activeWatchesDrainedCh := make(chan struct{})
    go func() {
       defer close(activeWatchesDrainedCh)
       
       <-notAcceptingNewRequestCh.Signaled()
       if s.ShutdownWatchTerminationGracePeriod <= time.Duration(0) {
          klog.V(1).InfoS("[graceful-termination] not going to wait for active watch request(s) to drain")
          return
       }

       // Wait for all active watches to finish
       grace := s.ShutdownWatchTerminationGracePeriod
       activeBefore, activeAfter, err := s.WatchRequestWaitGroup.Wait(func(count int) (utilwaitgroup.RateLimiter, context.Context, context.CancelFunc) {
          qps := float64(count) / grace.Seconds()
          // TODO: we don't want the QPS (max requests drained per second) to
          //  get below a certain floor value, since we want the server to
          //  drain the active watch requests as soon as possible.
          //  For now, it's hard coded to 200, and it is subject to change
          //  based on the result from the scale testing.
          if qps < 200 {
             qps = 200
          }

          ctx, cancel := context.WithTimeout(context.Background(), grace)
          // We don't expect more than one token to be consumed
          // in a single Wait call, so setting burst to 1.
          return rate.NewLimiter(rate.Limit(qps), 1), ctx, cancel
       })
       klog.V(1).InfoS("[graceful-termination] active watch request(s) have drained",
          "duration", grace, "activeWatchesBefore", activeBefore, "activeWatchesAfter", activeAfter, "error", err)
    }()

    ...
}

5）nonLongRunningRequestDrainedCh和activeWatchesDrainedCh共同触发drainedCh，再根据配置触发stopHttpServerCh：

func (s preparedGenericAPIServer) Run(stopCh <-chan struct{}) error {
    ...

    notAcceptingNewRequestCh := s.lifecycleSignals.NotAcceptingNewRequest
    drainedCh := s.lifecycleSignals.InFlightRequestsDrained
    stopHttpServerCh := make(chan struct{})
    go func() {
       defer close(stopHttpServerCh)

       timeToStopHttpServerCh := notAcceptingNewRequestCh.Signaled()
       if s.ShutdownSendRetryAfter {
          timeToStopHttpServerCh = drainedCh.Signaled()
       }

       <-timeToStopHttpServerCh
    }()

    // Start the audit backend before any request comes in. This means we must call Backend.Run
    // before http server start serving. Otherwise the Backend.ProcessEvents call might block.
    // AuditBackend.Run will stop as soon as all in-flight requests are drained.
    if s.AuditBackend != nil {
       if err := s.AuditBackend.Run(drainedCh.Signaled()); err != nil {
          return fmt.Errorf("failed to run the audit backend: %v", err)
       }
    }

    ...

    go func() {
       defer klog.V(1).InfoS("[graceful-termination] shutdown event", "name", drainedCh.Name())
       defer drainedCh.Signal()

       <-nonLongRunningRequestDrainedCh
       <-activeWatchesDrainedCh
    }()

    ...
    
    <-drainedCh.Signaled()

    if s.AuditBackend != nil {
       s.AuditBackend.Shutdown()
       klog.V(1).InfoS("[graceful-termination] audit backend shutdown completed")
    }

    // wait for stoppedCh that is closed when the graceful termination (server.Shutdown) is finished.
    <-listenerStoppedCh
    <-stoppedCh

    klog.V(1).Info("[graceful-termination] apiserver is exiting")
    return nil
}

drained同时会触发AuditBackend::Shutdown方法。

6）接收到stopHttpServerCh之后，server会触发

func (s preparedGenericAPIServer) Run(stopCh <-chan struct{}) error {
    ...
    stoppedCh, listenerStoppedCh, err := s.NonBlockingRun(stopHttpServerCh, shutdownTimeout)
    ...
}

这里不再具体阐述（secure_serving.go#RunServer）。

7）接收到listenerStoppedCh之后会触发HttpServerStoppedListening信号，并在AuditBackend::Shutdown执行完之后由listenerStoppedCh、stoppedCh共同触发server的销毁。

func (s preparedGenericAPIServer) Run(stopCh <-chan struct{}) error {
    ...
    // Clean up resources on shutdown.
    defer s.Destroy()
    ...

    // Wait for all requests in flight to drain, bounded by the RequestTimeout variable.
    <-drainedCh.Signaled()

    if s.AuditBackend != nil {
       s.AuditBackend.Shutdown()
       klog.V(1).InfoS("[graceful-termination] audit backend shutdown completed")
    }

    // wait for stoppedCh that is closed when the graceful termination (server.Shutdown) is finished.
    <-listenerStoppedCh
    <-stoppedCh

    klog.V(1).Info("[graceful-termination] apiserver is exiting")
    return nil
}