openkruise如何实现镜像预热-源码讲解

2,649 阅读6分钟

官方文档介绍

daemon.png

openkruise.io/zh/docs/cor…

从上图可以看出kruise-daemon实现了真正的下载操作

在k8s安装kruise之后在kruise-system中有一个daemonset,有多少node就会启动多少pod, 每个pod处理对应node上的image相关的操作

k8s.png

pod的启动参数:

  containers:
  - args:
    - --logtostderr=true
    - --v=4
    - --addr=:10221
    - --feature-gates=
    - --socket-file=
    command:
    - /kruise-daemon

程序的两个模块的入口

main.png

kruise/main.go是openkruise中kruise-manager模块的启动入口

daemon/main.go是openkruise中的kruise-daemon模块的启动入口

如何实现镜像预热

简单流程

workflow.png

官方文档描述:它通过 DaemonSet 部署到每个 Node 节点上,提供镜像预热、容器重启等功能;那么daemon是如何能够知道用户创建了一个imagepulljob,需要下载哪些镜像的,下面通过源码进行讲解,最后附上一张方法调用的时序图,更加星期的展示下载的流程。

初始化

main.go核心逻辑, 初始化daemon对象,然后开始运行

func main() {
...
	d, err := daemon.NewDaemon(cfg, *bindAddr)
	if err != nil {
		klog.Fatalf("Failed to new daemon: %v", err)
	}
	if err := d.Run(ctx); err != nil {
		klog.Fatalf("Failed to start daemon: %v", err)
	}
}

做的几件主要事情

1,添加一些资源的监听事件:

  • Pod

  • 添加对NodeImage资源的监听,当有发生add update的事件把资源放入到队列中

func NewController(opts daemonoptions.Options, secretManager daemonutil.SecretManager) (*Controller, error) {
	genericClient := client.GetGenericClientWithName("kruise-daemon-imagepuller")
  // 给nodeImage创建一个新的informer实例
	informer := newNodeImageInformer(genericClient.KruiseClient, opts.NodeName)

	eventBroadcaster := record.NewBroadcaster()
	eventBroadcaster.StartRecordingToSink(&v1core.EventSinkImpl{Interface: genericClient.KubeClient.CoreV1().Events("")})
	recorder := eventBroadcaster.NewRecorder(opts.Scheme, v1.EventSource{Component: "kruise-daemon-imagepuller", Host: opts.NodeName})

	queue := workqueue.NewNamedRateLimitingQueue(
		// Backoff duration from 500ms to 50~55s
		// For nodeimage controller will mark a image:tag task failed (not responded for a long time) if daemon does not report status in 60s.
		workqueue.NewItemExponentialFailureRateLimiter(500*time.Millisecond, 50*time.Second+time.Millisecond*time.Duration(rand.Intn(5000))),
		"imagepuller",
	)

  // 对nodeImage有对应的事件的进行一些处理,目前是放入一个队列中
	informer.AddEventHandler(cache.ResourceEventHandlerFuncs{
		AddFunc: func(obj interface{}) {
			nodeImage, ok := obj.(*appsv1alpha1.NodeImage)
			if ok {
				enqueue(queue, nodeImage)
			}
		},
		UpdateFunc: func(oldObj, newObj interface{}) {
			oldNodeImage, oldOK := oldObj.(*appsv1alpha1.NodeImage)
			newNodeImage, newOK := newObj.(*appsv1alpha1.NodeImage)
			if !oldOK || !newOK {
				return
			}
			if reflect.DeepEqual(oldNodeImage.Spec, newNodeImage.Spec) {
				klog.V(5).Infof("Find imagePullNode %s spec has not changed, skip enqueueing.", newNodeImage.Name)
				return
			}
			logNewImages(oldNodeImage, newNodeImage)
			enqueue(queue, newNodeImage)
		},
	})

	puller, err := newRealPuller(opts.RuntimeFactory.GetImageService(), secretManager, recorder)
	if err != nil {
		return nil, fmt.Errorf("failed to new puller: %v", err)
	}

	opts.Healthz.RegisterFunc("nodeImageInformerSynced", func(_ *http.Request) error {
		if !informer.HasSynced() {
			return fmt.Errorf("not synced")
		}
		return nil
	})

	return &Controller{
		scheme:                opts.Scheme,
		queue:                 queue,
		puller:                puller,
		imagePullNodeInformer: informer,
		imagePullNodeLister:   listersalpha1.NewNodeImageLister(informer.GetIndexer()),
		statusUpdater:         newStatusUpdater(genericClient.KruiseClient.AppsV1alpha1().NodeImages()),
	}, nil
}
  • ContainerRecreateRequest (目前还没具体看,后续再补充)

2,创建所需要的client

  • 获取image的client

func NewFactory(varRunPath string, accountManager daemonutil.ImagePullAccountManager) (Factory, error) {
  /** 
  获取不同类型的runtime配置:
  ContainerRuntimeDocker     = "docker"
	ContainerRuntimeContainerd = "containerd"
	ContainerRuntimePouch      = "pouch"
	ContainerRuntimeCommonCRI  = "common-cri"
	
	*/
	cfgs := detectRuntime(varRunPath)
	if len(cfgs) == 0 {
		return nil, fmt.Errorf("not found container runtime sock")
	}

	var err error
	f := &factory{}

	var cfg runtimeConfig
	for i := range cfgs {
		cfg = cfgs[i]
		var imageService runtimeimage.ImageService
		var runtimeService criapi.RuntimeService
		var typedVersion *runtimeapi.VersionResponse

    /**
    	根据不同的类型,获取imageService,不同的类型都实现了imageService中的两个方法
    */
		switch cfg.runtimeType {
		case ContainerRuntimeDocker: // dockerImageService, 在NewDockerImageService的返回中包含了docker的client
			imageService, err = runtimeimage.NewDockerImageService(cfg.runtimeURI, accountManager)
			if err != nil {
				klog.Warningf("Failed to new image service for %v (%s, %s): %v", cfg.runtimeType, cfg.runtimeURI, cfg.runtimeRemoteURI, err)
				continue
			}
		case ContainerRuntimeContainerd, ContainerRuntimeCommonCRI, ContainerRuntimePouch:
			addr, _, err := kubeletutil.GetAddressAndDialer(cfg.runtimeRemoteURI)
			if err != nil {
				klog.Warningf("Failed to get address for %v (%s, %s): %v", cfg.runtimeType, cfg.runtimeURI, cfg.runtimeRemoteURI, err)
				continue
			}
			imageService, err = runtimeimage.NewCRIImageService(addr, accountManager)
			if err != nil {
				klog.Warningf("Failed to new image service for %v (%s, %s): %v", cfg.runtimeType, cfg.runtimeURI, cfg.runtimeRemoteURI, err)
				continue
			}
		}
   /**
    根据上面不同的类型获取到的imageSercie,执行具体的实现逻辑;此处以dockerImageService为例
    此处是校验一下是否可以正常的获取的images 并没有其他的逻辑处理
   */
		if _, err = imageService.ListImages(context.TODO()); err != nil {
			klog.Warningf("Failed to list images for %v (%s, %s): %v", cfg.runtimeType, cfg.runtimeURI, cfg.runtimeRemoteURI, err)
			continue
		}

		runtimeService, err = criremote.NewRemoteRuntimeService(cfg.runtimeRemoteURI, time.Second*5)
		if err != nil {
			klog.Warningf("Failed to new runtime service for %v (%s, %s): %v", cfg.runtimeType, cfg.runtimeURI, cfg.runtimeRemoteURI, err)
			continue
		}
		typedVersion, err = runtimeService.Version(kubeRuntimeAPIVersion)
		if err != nil {
			klog.Warningf("Failed to get runtime typed version for %v (%s, %s): %v", cfg.runtimeType, cfg.runtimeURI, cfg.runtimeRemoteURI, err)
			continue
		}

		klog.V(2).Infof("Add runtime impl %v, URI: (%s, %s)", typedVersion.RuntimeName, cfg.runtimeURI, cfg.runtimeRemoteURI)
		f.impls = append(f.impls, &runtimeImpl{
			cfg:            cfg,
			runtimeName:    typedVersion.RuntimeName,
			imageService:   imageService,
			runtimeService: runtimeService,
		})
	}
	if len(f.impls) == 0 {
		return nil, err
	}

	return f, nil
}

实际的下载逻辑

// daemon.go
type Runnable interface {
	// Run starts running the component. The component will stop running
	// when the channel is closed. Run blocks until the channel is closed or
	// an error occurs.
	Run(<-chan struct{})
}

// imagepuller_contianer.go 实现了daemon.go中的run接口
func (c *Controller) Run(stop <-chan struct{}) {
	defer utilruntime.HandleCrash()
	defer c.queue.ShutDown()

	klog.Info("Starting informer for NodeImage")
	go c.imagePullNodeInformer.Run(stop)
	if !cache.WaitForCacheSync(stop, c.imagePullNodeInformer.HasSynced) {
		return
	}

	klog.Infof("Starting puller controller")
	// Launch one workers to process resources, for there is only one NodeImage per Node
  // processNextWorkItem 真正处理的逻辑 ,此处是一个死循环,因此程序不退出会一直从对接中获取数据,处理
	go wait.Until(func() {
		for c.processNextWorkItem() {
		}
	}, time.Second, stop)

	klog.Info("Started puller controller successfully")
	<-stop
}

Imagepuller_controller.go # processNextWorkItem (从队列中获取对应的数据) -》imagepuller_controller.go#func (c *Controller) sync (根据对应的key获取到对象的数据,并更新nodeImage中镜像下载的状态) -》image puller_worker.go # (p *realPuller) Sync (清理内存中的数据,同时获取当前nodeImage中需要下载的image) -> image puller_worker.go # (w *realWorkerPool) Sync -》 imagepuller_worker.go # (w *pullWorker) Run 真正的下载逻辑以及更新状态的逻辑

Imagepuller_controller.go->sync

func (c *Controller) sync(key string) (retErr error) {
	...

	for imageName, imageSpec := range nodeImage.Spec.Images {
		newStatus.Desired += int32(len(imageSpec.Tags))

    // 从map中获取对应image的状态,这个状态是在后面下载完成镜像后写入的,见 备注1
		imageStatus := c.puller.GetStatus(imageName)
		if klog.V(9).Enabled() {
			klog.V(9).Infof("get image %v status %#v", imageName, imageStatus)
		}
		if imageStatus == nil {
			continue
		}
		utilimagejob.SortStatusImageTags(imageStatus)
		newStatus.ImageStatuses[imageName] = *imageStatus
		for _, tagStatus := range imageStatus.Tags {
		// 统计image不同状态的数量
	}
	if len(newStatus.ImageStatuses) == 0 {
		newStatus.ImageStatuses = nil
	}

	var limited bool
   // 更新nodeImage中的status
	limited, retErr = c.statusUpdater.updateStatus(nodeImage, &newStatus)
	if retErr != nil {
		return retErr
	}

	if limited || isImageInPulling(&nodeImage.Spec, &newStatus) {
		// 3~5s
		c.queue.AddAfter(key, 3*time.Second+time.Millisecond*time.Duration(rand.Intn(2000)))
	} else {
		// 20~30m
		c.queue.AddAfter(key, 20*time.Minute+time.Millisecond*time.Duration(rand.Intn(600000)))
	}
	return nil
}

Imagepuller_worker.go中的sync核心处理下载的逻辑以及对应的状态的变更



func (w *pullWorker) Run() {
	klog.V(3).Infof("starting worker %v version %v", w.ImageRef(), w.tagSpec.Version)

	tag := w.tagSpec.Tag
	startTime := metav1.Now()
	newStatus := &appsv1alpha1.ImageTagStatus{
		Tag:       tag,
		Phase:     appsv1alpha1.ImagePhasePulling,
		StartTime: &startTime,
		Version:   w.tagSpec.Version,
	}
	defer func() {
		cost := time.Since(startTime.Time)
		if newStatus.Phase == appsv1alpha1.ImagePhaseFailed {
			klog.Warningf("Worker failed to pull image %s:%s, cost %v, err: %v", w.name, tag, cost, newStatus.Message)
		} else {
			klog.Infof("Successfully pull image %s:%s, cost %vs", w.name, tag, cost)
		}
		if w.IsActive() {
			w.statusUpdater.UpdateStatus(newStatus)
		}
	}()

	timeout := defaultImagePullingTimeout
	if w.tagSpec.PullPolicy != nil && w.tagSpec.PullPolicy.TimeoutSeconds != nil {
		timeout = time.Duration(*w.tagSpec.PullPolicy.TimeoutSeconds) * time.Second
	}
	backoffLimit := defaultImagePullingBackoffLimit
	if w.tagSpec.PullPolicy != nil && w.tagSpec.PullPolicy.BackoffLimit != nil {
		backoffLimit = int(*w.tagSpec.PullPolicy.BackoffLimit)
	}
	if backoffLimit < 0 {
		backoffLimit = defaultImagePullingBackoffLimit
	}
	var deadline *time.Time
	if w.tagSpec.PullPolicy != nil && w.tagSpec.PullPolicy.ActiveDeadlineSeconds != nil {
		d := startTime.Time.Add(time.Duration(*w.tagSpec.PullPolicy.ActiveDeadlineSeconds) * time.Second)
		deadline = &d
	}

	var (
		step       = time.Second
		maxBackoff = 30 * time.Second
	)

	var lastError error
	for i := 0; i <= backoffLimit; i++ {
		onceTimeout := timeout
		if deadline != nil {
			if deadlineLeft := time.Since(*deadline); deadlineLeft >= 0 {
				lastError = fmt.Errorf("pulling exceeds the activeDeadlineSeconds")
				break
			} else if (-deadlineLeft) < onceTimeout {
				onceTimeout = -deadlineLeft
			}
		}

		pullContext, cancel := context.WithTimeout(context.Background(), onceTimeout)
    // doPullImage中使用docker client 进行对数据image的下载
		lastError = w.doPullImage(pullContext, newStatus)
		if lastError != nil {
			cancel()
			if !w.IsActive() {
				break
			}

			klog.Warningf("Pulling image %s:%s backoff %d, error %v", w.name, tag, i+1, lastError)
			time.Sleep(step)
			step = minDuration(2*step, maxBackoff)
			continue
		}

		if imageInfo, err := w.getImageInfo(pullContext); err == nil {
			newStatus.ImageID = fmt.Sprintf("%v@%v", w.name, imageInfo.ID)
		}
		w.finishPulling(newStatus, appsv1alpha1.ImagePhaseSucceeded, "")
		if w.ref != nil && w.eventRecorder != nil {
			w.eventRecorder.Eventf(w.ref, v1.EventTypeNormal, PullImageSucceed, "Image %v:%v, ecalpsedTime %v", w.name, w.tagSpec.Tag, time.Since(startTime.Time))
		}
		cancel()
		return
	}
	w.finishPulling(newStatus, appsv1alpha1.ImagePhaseFailed, lastError.Error())

	if w.eventRecorder != nil {
		for _, owner := range w.tagSpec.OwnerReferences {
			w.eventRecorder.Eventf(&owner, v1.EventTypeWarning, PullImageFailed, "Image %v:%v %v", w.name, w.tagSpec.Tag, lastError.Error())
		}
		if w.ref != nil {
			w.eventRecorder.Eventf(w.ref, v1.EventTypeWarning, PullImageFailed, "Image %v:%v %v", w.name, w.tagSpec.Tag, lastError.Error())
		}
	}
}



*imagepuller.imagepuller_worker.go#func (w pullWorker) doPullImage 下载镜像 & 状态更新

func (w *pullWorker) doPullImage(ctx context.Context, newStatus *appsv1alpha1.ImageTagStatus) (err error) {
	tag := w.tagSpec.Tag
	startTime := metav1.Now()

	klog.Infof("Worker is starting to pull image %s:%s version %v", w.name, tag, w.tagSpec.Version)

	if _, e := w.getImageInfo(ctx); e == nil {
		klog.Infof("Image %s:%s is already exists", w.name, tag)
		newStatus.Progress = 100
		return nil
	}

	// make it asynchronous for CRI runtime will block in pulling image
	var statusReader runtimeimage.ImagePullStatusReader
	pullChan := make(chan struct{})
	go func() {
    // 下载image
		statusReader, err = w.runtime.PullImage(ctx, w.name, tag, w.secrets, w.sandboxConfig)
		close(pullChan)
	}()

	closeStatusReader := func() {
		select {
		case <-pullChan:
		}
		if statusReader != nil {
			statusReader.Close()
		}
	}

	select {
	case <-w.stopCh:
		go closeStatusReader()
		klog.V(2).Infof("Pulling image %v:%v is stopped.", w.name, tag)
		return fmt.Errorf("pulling image %s:%s is stopped", w.name, tag)
	case <-ctx.Done():
		go closeStatusReader()
		klog.V(2).Infof("Pulling image %s:%s is canceled", w.name, tag)
		return fmt.Errorf("pulling image %s:%s is canceled", w.name, tag)
	case <-pullChan:
		if err != nil {
			return err
		}
	}
	defer statusReader.Close()

	progress := 0
	var progressInfo string
	logTicker := time.NewTicker(defaultImagePullingProgressLogInterval)
	defer logTicker.Stop()

  // 从管道中获取状态进行更新
	for {
		select {
		case <-w.stopCh:
			klog.V(2).Infof("Pulling image %v:%v is stopped.", w.name, tag)
			return fmt.Errorf("pulling image %s:%s is stopped", w.name, tag)
		case <-ctx.Done():
			klog.V(2).Infof("Pulling image %s:%s is canceled", w.name, tag)
			return fmt.Errorf("pulling image %s:%s is canceled", w.name, tag)
		case <-logTicker.C:
			klog.V(2).Infof("Pulling image %s:%s, cost: %v, progress: %v%%, detail: %v", w.name, tag, time.Since(startTime.Time), progress, progressInfo)
		case progressStatus, ok := <-statusReader.C():
			if !ok {
				return fmt.Errorf("pulling image %s:%s internal error", w.name, tag)
			}
			progress = progressStatus.Process
			progressInfo = progressStatus.DetailInfo
			newStatus.Progress = int32(progressStatus.Process)
			klog.V(5).Infof("Pulling image %s:%s, cost: %v, progress: %v%%, detail: %v", w.name, tag, time.Since(startTime.Time), progress, progressInfo)
      // 下载完成,接触for循环
			if progressStatus.Finish {
				if progressStatus.Err == nil {
					return nil
				}
				return fmt.Errorf("pulling image %s:%s error %v", w.name, tag, progressStatus.Err)
			}
      // 真正的更新状态的 更新的是内存中的状态 备注1
			w.statusUpdater.UpdateStatus(newStatus)
		}
	}
}

*Imageruntime.docker.go # func (d dockerImageService) PullImage



func (d *dockerImageService) PullImage(ctx context.Context, imageName, tag string, pullSecrets []v1.Secret, _ *appsv1alpha1.SandboxConfig) (reader ImagePullStatusReader, err error) {
...


  // 下载image
	ioReader, err = d.client.ImagePull(ctx, fullName, dockertypes.ImagePullOptions{})
	if err != nil {
		d.handleRuntimeError(err)
		return nil, err
	}
  // 使用管道的方式同步下载的进度
	return newImagePullStatusReader(ioReader), nil
}

Imageruntime.helper.go # mainloop() 使用管道的方式把镜像下载的情况发送出去

func (r *imagePullStatusReader) mainloop() {
	defer r.reader.Close()
	decoder := json.NewDecoder(r.reader)
	progress := newPullingProgress()
	// ticker := time.NewTicker(10 * time.Millisecond)
	// defer ticker.Stop()
	for {
		select {
		case <-r.done:
			return
		default:
			var jm dockermessage.JSONMessage
			err := decoder.Decode(&jm)
			if err == io.EOF {
				klog.V(5).Info("runtime read eof")
        // 发送状态
				r.seedPullStatus(ImagePullStatus{Process: 100, Finish: true})
				return
			}
			if err != nil {
				klog.V(5).Infof("runtime read err %v", err)
         // 发送状态
				r.seedPullStatus(ImagePullStatus{Err: err, Finish: true})
				return
			}
			if jm.Error != nil {
				klog.V(5).Infof("runtime read err %v", jm.Error)
         // 发送状态
				r.seedPullStatus(ImagePullStatus{Err: fmt.Errorf("get error in pull response: %+v", jm.Error), Finish: true})
				return
			}

			klog.V(5).Infof("runtime read progress %v", util.DumpJSON(jm))
			if jm.ID != "" {
				progress.Layers[jm.ID] = layerProgress{
					JSONProgress: jm.Progress,
					Status:       jm.Status,
				}
			} else if jm.Status != "" {
				progress.TotalStatuses = append(progress.TotalStatuses, jm.Status)
			}
			currentProgress := progress.getProgressPercent()
       // 发送状态
			r.seedPullStatus(ImagePullStatus{Process: int(currentProgress), DetailInfo: util.DumpJSON(progress)})
		}
	}
}

方法调用时许图

workflow1.png