kubernetes apiserver源码: 利用channel链条优雅停机

190 阅读3分钟

分析kubernetes APIServer利用channel信号链完成优雅停机?

获取停止信号

这一部分会分析收到停止信号后,优雅停机操作最终会在哪里实现。kubernetes代码的调用链很长,尤其是涉及到多态的部分。为了最终确定会执行哪个接口实现?需要确切定位结构体中字段的类型。

// NewAPIServerCommand creates a *cobra.Command object with default parameters
func NewAPIServerCommand() *cobra.Command {
	s := options.NewServerRunOptions()
	cmd := &cobra.Command{
		Use: "kube-apiserver",
		RunE: func(cmd *cobra.Command, args []string) error {
                        // -> 这里是启动server, genericapiserver.SetupSignalHandler()获取停止的信号
			return Run(completedOptions, genericapiserver.SetupSignalHandler())
                }
        }
}        
  • genericapiserver.SetupSignalHandler()

其中定义停止Server的信号为:

var shutdownSignals = []os.Signal{os.Interrupt, syscall.SIGTERM}

也就是命令行输入Ctrl+C或者收到系统的SIGTERM进程终止信号

// SetupSignalHandler registered for SIGTERM and SIGINT. A stop channel is returned
// which is closed on one of these signals. If a second signal is caught, the program
// is terminated with exit code 1.
// Only one of SetupSignalContext and SetupSignalHandler should be called, and only can
// be called once.
// 调用SetupSignalContext()获取可取消Context, 然后Done()返回对应channel
func SetupSignalHandler() <-chan struct{} {
	return SetupSignalContext().Done()  // 返回终止Channel
}

// SetupSignalContext is same as SetupSignalHandler, but a context.Context is returned.
// Only one of SetupSignalContext and SetupSignalHandler should be called, and only can
// be called once.
func SetupSignalContext() context.Context {
	close(onlyOneSignalHandler) // panics when called twice

	shutdownHandler = make(chan os.Signal, 2)

	ctx, cancel := context.WithCancel(context.Background())
        // 系统会将所有属于shutdownSignals数组中记录种类的信号,转发到channel shutdownHandler
	signal.Notify(shutdownHandler, shutdownSignals...)
	go func() {
		<-shutdownHandler // 按下Ctrl +C 这里会停止阻塞
		cancel()  // 调用cancel函数,Done()返回的channel收到信号,读取此channel的go routine将停止阻塞
		<-shutdownHandler 
		os.Exit(1) // second signal. Exit directly.
	}()

	return ctx
}
  • NewAPIServerCommand() -> Run(completedOptions, genericapiserver.SetupSignalHandler())
// StopCh 即为刚才SetupSignalContext().Done()返回的终止Channel
// Run runs the specified APIServer.  This should never exit.
func Run(completeOptions completedServerRunOptions, stopCh <-chan struct{}) error {
	// To help debugging, immediately log version
	klog.Infof("Version: %+v", version.Get())

	server, err := CreateServerChain(completeOptions, stopCh)
	if err != nil {
		return err
	}
        // 展开点1
	prepared, err := server.PrepareRun()
	if err != nil {
		return err
	}
        // 展开点2: 调用preparedAPIAggregator.Run()方法,透传stopCh
	return prepared.Run(stopCh)
}

  • 展开点1: NewAPIServerCommand() -> Run() -> server.PrepareRun()

// preparedGenericAPIServer is a private wrapper that enforces a call of PrepareRun() before Run can be invoked.
type preparedGenericAPIServer struct {
	*GenericAPIServer
}


// PrepareRun prepares the aggregator to run, by setting up the OpenAPI spec and calling
// the generic PrepareRun.
func (s *APIAggregator) PrepareRun() (preparedAPIAggregator, error) {
        // prepared是preparedGenericAPIServer类型,后面的多态调用,调用的是它的Run方法。
        prepared := s.GenericAPIServer.PrepareRun()
	return preparedAPIAggregator{APIAggregator: s, runnable: prepared}, nil
}
  • 展开点2: NewAPIServerCommand() -> Run() -> PrepareRun() -> prepared.Run(stopCh)

Run方法实际是一个接口类型:

type runnable interface {
	Run(stopCh <-chan struct{}) error
}

所以这里涉及到"多态":

// 
func (s preparedAPIAggregator) Run(stopCh <-chan struct{}) error {
        // 调用preparedGenericAPIServer.Run()
	return s.runnable.Run(stopCh)
}
  • 调用链:
 NewAPIServerCommand() -> Run() -> PrepareRun() -> prepared.Run(stopCh) ->  s.runnable.Run(stopCh)

调用preparedGenericAPIServer.Run():


// Run spawns the secure http server. It only returns if stopCh is closed
// or the secure port cannot be listened on initially.
func (s preparedGenericAPIServer) Run(stopCh <-chan struct{}) error {
	delayedStopCh := make(chan struct{})

	go func() {
		defer close(delayedStopCh)
                // 阻塞在这里,直到收到Ctrl+C信号
		<-stopCh

		// As soon as shutdown is initiated, /readyz should start returning failure.
		// This gives the load balancer a window defined by ShutdownDelayDuration to detect that /readyz is red
		// and stop sending traffic to this server.
		close(s.readinessStopCh)

		time.Sleep(s.ShutdownDelayDuration)
	}()

	// close socket after delayed stopCh
	stoppedCh, err := s.NonBlockingRun(delayedStopCh)
	if err != nil {
		return err
	}
        // 阻塞在这里,直到收到Ctrl+C信号
	<-stopCh

	// run shutdown hooks directly. This includes deregistering from the kubernetes endpoint in case of kube-apiserver.
	err = s.RunPreShutdownHooks()
	if err != nil {
		return err
	}

	// wait for the delayed stopCh before closing the handler chain (it rejects everything after Wait has been called).
	<-delayedStopCh
	// wait for stoppedCh that is closed when the graceful termination (server.Shutdown) is finished.
	<-stoppedCh

	// Wait for all requests to finish, which are bounded by the RequestTimeout variable.
	s.HandlerChainWaitGroup.Wait()

	return nil
}

channel关闭后,channel读取操作是否还会继续阻塞?

channel关闭后,读取操作是否还会阻塞?

package main

import (
	"fmt"
	"time"
)

func main() {
	ch:=make(chan bool,1)
	go func() {
		time.Sleep(time.Second*20)
                // 并没有向channel写入数据
		close(ch)
	}()
	for {
		select {
                // 监听channel
		case h,ok:=<-ch:
			if ok {
				fmt.Println("I received after close",h)
			}else{
				fmt.Println("not ok after closed")
			}
			return
		default:
			fmt.Println("waiting it")
			time.Sleep(time.Second*4)
		}
	}
}


运行:

waiting it
waiting it
waiting it
waiting it
waiting it
not ok after closed false // 这里获取了channel的默认值,程序停止阻塞并退出

如何可见只要关闭channel,读channel的操作就会停止阻塞。即使没有往channel里写值。

关闭操作的Channel链条

那么回到刚才的Run函数,经过分析后会发现一条关闭操作导致的channel链:


// Run spawns the secure http server. It only returns if stopCh is closed
// or the secure port cannot be listened on initially.
func (s preparedGenericAPIServer) Run(stopCh <-chan struct{}) error {
	// 创建一个本地channel
        delayedStopCh := make(chan struct{})

	go func() {
                // 关闭本地信号
		defer close(delayedStopCh)
                 // 阻塞获取外部的信号
		<-stopCh
                
                // 执行一些耗时操作
		// As soon as shutdown is initiated, /readyz should start returning failure.
		// This gives the load balancer a window defined by ShutdownDelayDuration to detect that /readyz is red
		// and stop sending traffic to this server.
		close(s.readinessStopCh)

		time.Sleep(s.ShutdownDelayDuration)
	}()

        // 获取下一级的channel,将本地channel传入
	// close socket after delayed stopCh
	stoppedCh, err := s.NonBlockingRun(delayedStopCh)
	if err != nil {
		return err
	}
        // 阻塞在这里,直到收到Ctrl+C信号
	<-stopCh

	// run shutdown hooks directly. This includes deregistering from the kubernetes endpoint in case of kube-apiserver.
	err = s.RunPreShutdownHooks()
	if err != nil {
		return err
	}

	// wait for the delayed stopCh before closing the handler chain (it rejects everything after Wait has been called).
        // 等待本函数完成优雅停机
	<-delayedStopCh
	// wait for stoppedCh that is closed when the graceful termination (server.Shutdown) is finished.
        // 等待最最内层的channel信号,也就是最里层函数完成优雅停机。
	<-stoppedCh

	// Wait for all requests to finish, which are bounded by the RequestTimeout variable.
	s.HandlerChainWaitGroup.Wait()

	return nil
}

  • Run() -> s.NonBlockingRun(delayedStopCh)

下一级的函数:

// NonBlockingRun spawns the secure http server. An error is
// returned if the secure port cannot be listened on.
// The returned channel is closed when the (asynchronous) termination is finished.
func (s preparedGenericAPIServer) NonBlockingRun(stopCh <-chan struct{}) (<-chan struct{}, error) {
        // ...
        
        // 创建一个本地channel
	// Use an internal stop channel to allow cleanup of the listeners on error.
	internalStopCh := make(chan struct{})
	var stoppedCh <-chan struct{}
	if s.SecureServingInfo != nil && s.Handler != nil {
		var err error
                 // 获取下一级的channel,将本地channel传入
		stoppedCh, err = s.SecureServingInfo.Serve(s.Handler, s.ShutdownTimeout, internalStopCh)
		if err != nil {
			close(internalStopCh)
			close(auditStopCh)
			return nil, err
		}
	}

	// Now that listener have bound successfully, it is the
	// responsibility of the caller to close the provided channel to
	// ensure cleanup.
	go func() {
                 // 阻塞获取外部的信号
		<-stopCh
                 // 关闭本地信号
		close(internalStopCh)
                // ...
	}()

	s.RunPostStartHooks(stopCh)

	if _, err := systemd.SdNotify(true, "READY=1\n"); err != nil {
		klog.Errorf("Unable to send systemd daemon successful start message: %v\n", err)
	}
         // 返回最最内部的channel
	return stoppedCh, nil
}

channel信号链太长了,多深入几层分析,就可以发现最基本的模式是如下:

// 返回一个只读channel,用于通知外部调用者,本函数执行完自己耗时操作(优雅退出操作)。
func Run(stopCh channel) (<-chan struct{}, error) {

    // 创建一个本地channel
    localCh:=make(chan struct{}, 1)
    
    go func(){
       // 关闭本地信号
       defer close(localCh)
       // 阻塞获取外部的信号
       <-stopCh
       
       // 执行一些耗时操作
       ...
       
       
    }()
    // 获取下一级的channel,将本地channel传入
    innerCh:=RunNextStep(localCh)
    
    // 返回最最内部的channel
    return innerCh
}

这些模式的函数组成一个链条,外部的调用者(caller)先执行完自己的优雅停机操作,被调用者(callee)收到外部channel的信号后停止阻塞,再执行自己的优雅停机操作。

最后,最外层的调用会获取最最内层的channel而阻塞, 当最最内层函数完成优雅停机,最外层会停止阻塞,整个优雅停机操作完成。