官方文档介绍
从上图可以看出kruise-daemon实现了真正的下载操作
在k8s安装kruise之后在kruise-system中有一个daemonset,有多少node就会启动多少pod, 每个pod处理对应node上的image相关的操作
pod的启动参数:
containers:
- args:
- --logtostderr=true
- --v=4
- --addr=:10221
- --feature-gates=
- --socket-file=
command:
- /kruise-daemon
程序的两个模块的入口
kruise/main.go是openkruise中kruise-manager模块的启动入口
daemon/main.go是openkruise中的kruise-daemon模块的启动入口
如何实现镜像预热
简单流程
官方文档描述:它通过 DaemonSet 部署到每个 Node 节点上,提供镜像预热、容器重启等功能;那么daemon是如何能够知道用户创建了一个imagepulljob,需要下载哪些镜像的,下面通过源码进行讲解,最后附上一张方法调用的时序图,更加星期的展示下载的流程。
初始化
main.go核心逻辑, 初始化daemon对象,然后开始运行。
func main() {
...
d, err := daemon.NewDaemon(cfg, *bindAddr)
if err != nil {
klog.Fatalf("Failed to new daemon: %v", err)
}
if err := d.Run(ctx); err != nil {
klog.Fatalf("Failed to start daemon: %v", err)
}
}
做的几件主要事情
1,添加一些资源的监听事件:
-
Pod
-
添加对NodeImage资源的监听,当有发生add update的事件把资源放入到队列中
func NewController(opts daemonoptions.Options, secretManager daemonutil.SecretManager) (*Controller, error) {
genericClient := client.GetGenericClientWithName("kruise-daemon-imagepuller")
// 给nodeImage创建一个新的informer实例
informer := newNodeImageInformer(genericClient.KruiseClient, opts.NodeName)
eventBroadcaster := record.NewBroadcaster()
eventBroadcaster.StartRecordingToSink(&v1core.EventSinkImpl{Interface: genericClient.KubeClient.CoreV1().Events("")})
recorder := eventBroadcaster.NewRecorder(opts.Scheme, v1.EventSource{Component: "kruise-daemon-imagepuller", Host: opts.NodeName})
queue := workqueue.NewNamedRateLimitingQueue(
// Backoff duration from 500ms to 50~55s
// For nodeimage controller will mark a image:tag task failed (not responded for a long time) if daemon does not report status in 60s.
workqueue.NewItemExponentialFailureRateLimiter(500*time.Millisecond, 50*time.Second+time.Millisecond*time.Duration(rand.Intn(5000))),
"imagepuller",
)
// 对nodeImage有对应的事件的进行一些处理,目前是放入一个队列中
informer.AddEventHandler(cache.ResourceEventHandlerFuncs{
AddFunc: func(obj interface{}) {
nodeImage, ok := obj.(*appsv1alpha1.NodeImage)
if ok {
enqueue(queue, nodeImage)
}
},
UpdateFunc: func(oldObj, newObj interface{}) {
oldNodeImage, oldOK := oldObj.(*appsv1alpha1.NodeImage)
newNodeImage, newOK := newObj.(*appsv1alpha1.NodeImage)
if !oldOK || !newOK {
return
}
if reflect.DeepEqual(oldNodeImage.Spec, newNodeImage.Spec) {
klog.V(5).Infof("Find imagePullNode %s spec has not changed, skip enqueueing.", newNodeImage.Name)
return
}
logNewImages(oldNodeImage, newNodeImage)
enqueue(queue, newNodeImage)
},
})
puller, err := newRealPuller(opts.RuntimeFactory.GetImageService(), secretManager, recorder)
if err != nil {
return nil, fmt.Errorf("failed to new puller: %v", err)
}
opts.Healthz.RegisterFunc("nodeImageInformerSynced", func(_ *http.Request) error {
if !informer.HasSynced() {
return fmt.Errorf("not synced")
}
return nil
})
return &Controller{
scheme: opts.Scheme,
queue: queue,
puller: puller,
imagePullNodeInformer: informer,
imagePullNodeLister: listersalpha1.NewNodeImageLister(informer.GetIndexer()),
statusUpdater: newStatusUpdater(genericClient.KruiseClient.AppsV1alpha1().NodeImages()),
}, nil
}
- ContainerRecreateRequest (目前还没具体看,后续再补充)
2,创建所需要的client
- 获取image的client
func NewFactory(varRunPath string, accountManager daemonutil.ImagePullAccountManager) (Factory, error) {
/**
获取不同类型的runtime配置:
ContainerRuntimeDocker = "docker"
ContainerRuntimeContainerd = "containerd"
ContainerRuntimePouch = "pouch"
ContainerRuntimeCommonCRI = "common-cri"
*/
cfgs := detectRuntime(varRunPath)
if len(cfgs) == 0 {
return nil, fmt.Errorf("not found container runtime sock")
}
var err error
f := &factory{}
var cfg runtimeConfig
for i := range cfgs {
cfg = cfgs[i]
var imageService runtimeimage.ImageService
var runtimeService criapi.RuntimeService
var typedVersion *runtimeapi.VersionResponse
/**
根据不同的类型,获取imageService,不同的类型都实现了imageService中的两个方法
*/
switch cfg.runtimeType {
case ContainerRuntimeDocker: // dockerImageService, 在NewDockerImageService的返回中包含了docker的client
imageService, err = runtimeimage.NewDockerImageService(cfg.runtimeURI, accountManager)
if err != nil {
klog.Warningf("Failed to new image service for %v (%s, %s): %v", cfg.runtimeType, cfg.runtimeURI, cfg.runtimeRemoteURI, err)
continue
}
case ContainerRuntimeContainerd, ContainerRuntimeCommonCRI, ContainerRuntimePouch:
addr, _, err := kubeletutil.GetAddressAndDialer(cfg.runtimeRemoteURI)
if err != nil {
klog.Warningf("Failed to get address for %v (%s, %s): %v", cfg.runtimeType, cfg.runtimeURI, cfg.runtimeRemoteURI, err)
continue
}
imageService, err = runtimeimage.NewCRIImageService(addr, accountManager)
if err != nil {
klog.Warningf("Failed to new image service for %v (%s, %s): %v", cfg.runtimeType, cfg.runtimeURI, cfg.runtimeRemoteURI, err)
continue
}
}
/**
根据上面不同的类型获取到的imageSercie,执行具体的实现逻辑;此处以dockerImageService为例
此处是校验一下是否可以正常的获取的images 并没有其他的逻辑处理
*/
if _, err = imageService.ListImages(context.TODO()); err != nil {
klog.Warningf("Failed to list images for %v (%s, %s): %v", cfg.runtimeType, cfg.runtimeURI, cfg.runtimeRemoteURI, err)
continue
}
runtimeService, err = criremote.NewRemoteRuntimeService(cfg.runtimeRemoteURI, time.Second*5)
if err != nil {
klog.Warningf("Failed to new runtime service for %v (%s, %s): %v", cfg.runtimeType, cfg.runtimeURI, cfg.runtimeRemoteURI, err)
continue
}
typedVersion, err = runtimeService.Version(kubeRuntimeAPIVersion)
if err != nil {
klog.Warningf("Failed to get runtime typed version for %v (%s, %s): %v", cfg.runtimeType, cfg.runtimeURI, cfg.runtimeRemoteURI, err)
continue
}
klog.V(2).Infof("Add runtime impl %v, URI: (%s, %s)", typedVersion.RuntimeName, cfg.runtimeURI, cfg.runtimeRemoteURI)
f.impls = append(f.impls, &runtimeImpl{
cfg: cfg,
runtimeName: typedVersion.RuntimeName,
imageService: imageService,
runtimeService: runtimeService,
})
}
if len(f.impls) == 0 {
return nil, err
}
return f, nil
}
实际的下载逻辑
// daemon.go
type Runnable interface {
// Run starts running the component. The component will stop running
// when the channel is closed. Run blocks until the channel is closed or
// an error occurs.
Run(<-chan struct{})
}
// imagepuller_contianer.go 实现了daemon.go中的run接口
func (c *Controller) Run(stop <-chan struct{}) {
defer utilruntime.HandleCrash()
defer c.queue.ShutDown()
klog.Info("Starting informer for NodeImage")
go c.imagePullNodeInformer.Run(stop)
if !cache.WaitForCacheSync(stop, c.imagePullNodeInformer.HasSynced) {
return
}
klog.Infof("Starting puller controller")
// Launch one workers to process resources, for there is only one NodeImage per Node
// processNextWorkItem 真正处理的逻辑 ,此处是一个死循环,因此程序不退出会一直从对接中获取数据,处理
go wait.Until(func() {
for c.processNextWorkItem() {
}
}, time.Second, stop)
klog.Info("Started puller controller successfully")
<-stop
}
Imagepuller_controller.go # processNextWorkItem (从队列中获取对应的数据) -》imagepuller_controller.go#func (c *Controller) sync (根据对应的key获取到对象的数据,并更新nodeImage中镜像下载的状态) -》image puller_worker.go # (p *realPuller) Sync (清理内存中的数据,同时获取当前nodeImage中需要下载的image) -> image puller_worker.go # (w *realWorkerPool) Sync -》 imagepuller_worker.go # (w *pullWorker) Run 真正的下载逻辑以及更新状态的逻辑
Imagepuller_controller.go->sync
func (c *Controller) sync(key string) (retErr error) {
...
for imageName, imageSpec := range nodeImage.Spec.Images {
newStatus.Desired += int32(len(imageSpec.Tags))
// 从map中获取对应image的状态,这个状态是在后面下载完成镜像后写入的,见 备注1
imageStatus := c.puller.GetStatus(imageName)
if klog.V(9).Enabled() {
klog.V(9).Infof("get image %v status %#v", imageName, imageStatus)
}
if imageStatus == nil {
continue
}
utilimagejob.SortStatusImageTags(imageStatus)
newStatus.ImageStatuses[imageName] = *imageStatus
for _, tagStatus := range imageStatus.Tags {
// 统计image不同状态的数量
}
if len(newStatus.ImageStatuses) == 0 {
newStatus.ImageStatuses = nil
}
var limited bool
// 更新nodeImage中的status
limited, retErr = c.statusUpdater.updateStatus(nodeImage, &newStatus)
if retErr != nil {
return retErr
}
if limited || isImageInPulling(&nodeImage.Spec, &newStatus) {
// 3~5s
c.queue.AddAfter(key, 3*time.Second+time.Millisecond*time.Duration(rand.Intn(2000)))
} else {
// 20~30m
c.queue.AddAfter(key, 20*time.Minute+time.Millisecond*time.Duration(rand.Intn(600000)))
}
return nil
}
Imagepuller_worker.go中的sync核心处理下载的逻辑以及对应的状态的变更
func (w *pullWorker) Run() {
klog.V(3).Infof("starting worker %v version %v", w.ImageRef(), w.tagSpec.Version)
tag := w.tagSpec.Tag
startTime := metav1.Now()
newStatus := &appsv1alpha1.ImageTagStatus{
Tag: tag,
Phase: appsv1alpha1.ImagePhasePulling,
StartTime: &startTime,
Version: w.tagSpec.Version,
}
defer func() {
cost := time.Since(startTime.Time)
if newStatus.Phase == appsv1alpha1.ImagePhaseFailed {
klog.Warningf("Worker failed to pull image %s:%s, cost %v, err: %v", w.name, tag, cost, newStatus.Message)
} else {
klog.Infof("Successfully pull image %s:%s, cost %vs", w.name, tag, cost)
}
if w.IsActive() {
w.statusUpdater.UpdateStatus(newStatus)
}
}()
timeout := defaultImagePullingTimeout
if w.tagSpec.PullPolicy != nil && w.tagSpec.PullPolicy.TimeoutSeconds != nil {
timeout = time.Duration(*w.tagSpec.PullPolicy.TimeoutSeconds) * time.Second
}
backoffLimit := defaultImagePullingBackoffLimit
if w.tagSpec.PullPolicy != nil && w.tagSpec.PullPolicy.BackoffLimit != nil {
backoffLimit = int(*w.tagSpec.PullPolicy.BackoffLimit)
}
if backoffLimit < 0 {
backoffLimit = defaultImagePullingBackoffLimit
}
var deadline *time.Time
if w.tagSpec.PullPolicy != nil && w.tagSpec.PullPolicy.ActiveDeadlineSeconds != nil {
d := startTime.Time.Add(time.Duration(*w.tagSpec.PullPolicy.ActiveDeadlineSeconds) * time.Second)
deadline = &d
}
var (
step = time.Second
maxBackoff = 30 * time.Second
)
var lastError error
for i := 0; i <= backoffLimit; i++ {
onceTimeout := timeout
if deadline != nil {
if deadlineLeft := time.Since(*deadline); deadlineLeft >= 0 {
lastError = fmt.Errorf("pulling exceeds the activeDeadlineSeconds")
break
} else if (-deadlineLeft) < onceTimeout {
onceTimeout = -deadlineLeft
}
}
pullContext, cancel := context.WithTimeout(context.Background(), onceTimeout)
// doPullImage中使用docker client 进行对数据image的下载
lastError = w.doPullImage(pullContext, newStatus)
if lastError != nil {
cancel()
if !w.IsActive() {
break
}
klog.Warningf("Pulling image %s:%s backoff %d, error %v", w.name, tag, i+1, lastError)
time.Sleep(step)
step = minDuration(2*step, maxBackoff)
continue
}
if imageInfo, err := w.getImageInfo(pullContext); err == nil {
newStatus.ImageID = fmt.Sprintf("%v@%v", w.name, imageInfo.ID)
}
w.finishPulling(newStatus, appsv1alpha1.ImagePhaseSucceeded, "")
if w.ref != nil && w.eventRecorder != nil {
w.eventRecorder.Eventf(w.ref, v1.EventTypeNormal, PullImageSucceed, "Image %v:%v, ecalpsedTime %v", w.name, w.tagSpec.Tag, time.Since(startTime.Time))
}
cancel()
return
}
w.finishPulling(newStatus, appsv1alpha1.ImagePhaseFailed, lastError.Error())
if w.eventRecorder != nil {
for _, owner := range w.tagSpec.OwnerReferences {
w.eventRecorder.Eventf(&owner, v1.EventTypeWarning, PullImageFailed, "Image %v:%v %v", w.name, w.tagSpec.Tag, lastError.Error())
}
if w.ref != nil {
w.eventRecorder.Eventf(w.ref, v1.EventTypeWarning, PullImageFailed, "Image %v:%v %v", w.name, w.tagSpec.Tag, lastError.Error())
}
}
}
*imagepuller.imagepuller_worker.go#func (w pullWorker) doPullImage 下载镜像 & 状态更新
func (w *pullWorker) doPullImage(ctx context.Context, newStatus *appsv1alpha1.ImageTagStatus) (err error) {
tag := w.tagSpec.Tag
startTime := metav1.Now()
klog.Infof("Worker is starting to pull image %s:%s version %v", w.name, tag, w.tagSpec.Version)
if _, e := w.getImageInfo(ctx); e == nil {
klog.Infof("Image %s:%s is already exists", w.name, tag)
newStatus.Progress = 100
return nil
}
// make it asynchronous for CRI runtime will block in pulling image
var statusReader runtimeimage.ImagePullStatusReader
pullChan := make(chan struct{})
go func() {
// 下载image
statusReader, err = w.runtime.PullImage(ctx, w.name, tag, w.secrets, w.sandboxConfig)
close(pullChan)
}()
closeStatusReader := func() {
select {
case <-pullChan:
}
if statusReader != nil {
statusReader.Close()
}
}
select {
case <-w.stopCh:
go closeStatusReader()
klog.V(2).Infof("Pulling image %v:%v is stopped.", w.name, tag)
return fmt.Errorf("pulling image %s:%s is stopped", w.name, tag)
case <-ctx.Done():
go closeStatusReader()
klog.V(2).Infof("Pulling image %s:%s is canceled", w.name, tag)
return fmt.Errorf("pulling image %s:%s is canceled", w.name, tag)
case <-pullChan:
if err != nil {
return err
}
}
defer statusReader.Close()
progress := 0
var progressInfo string
logTicker := time.NewTicker(defaultImagePullingProgressLogInterval)
defer logTicker.Stop()
// 从管道中获取状态进行更新
for {
select {
case <-w.stopCh:
klog.V(2).Infof("Pulling image %v:%v is stopped.", w.name, tag)
return fmt.Errorf("pulling image %s:%s is stopped", w.name, tag)
case <-ctx.Done():
klog.V(2).Infof("Pulling image %s:%s is canceled", w.name, tag)
return fmt.Errorf("pulling image %s:%s is canceled", w.name, tag)
case <-logTicker.C:
klog.V(2).Infof("Pulling image %s:%s, cost: %v, progress: %v%%, detail: %v", w.name, tag, time.Since(startTime.Time), progress, progressInfo)
case progressStatus, ok := <-statusReader.C():
if !ok {
return fmt.Errorf("pulling image %s:%s internal error", w.name, tag)
}
progress = progressStatus.Process
progressInfo = progressStatus.DetailInfo
newStatus.Progress = int32(progressStatus.Process)
klog.V(5).Infof("Pulling image %s:%s, cost: %v, progress: %v%%, detail: %v", w.name, tag, time.Since(startTime.Time), progress, progressInfo)
// 下载完成,接触for循环
if progressStatus.Finish {
if progressStatus.Err == nil {
return nil
}
return fmt.Errorf("pulling image %s:%s error %v", w.name, tag, progressStatus.Err)
}
// 真正的更新状态的 更新的是内存中的状态 备注1
w.statusUpdater.UpdateStatus(newStatus)
}
}
}
*Imageruntime.docker.go # func (d dockerImageService) PullImage
func (d *dockerImageService) PullImage(ctx context.Context, imageName, tag string, pullSecrets []v1.Secret, _ *appsv1alpha1.SandboxConfig) (reader ImagePullStatusReader, err error) {
...
// 下载image
ioReader, err = d.client.ImagePull(ctx, fullName, dockertypes.ImagePullOptions{})
if err != nil {
d.handleRuntimeError(err)
return nil, err
}
// 使用管道的方式同步下载的进度
return newImagePullStatusReader(ioReader), nil
}
Imageruntime.helper.go # mainloop() 使用管道的方式把镜像下载的情况发送出去
func (r *imagePullStatusReader) mainloop() {
defer r.reader.Close()
decoder := json.NewDecoder(r.reader)
progress := newPullingProgress()
// ticker := time.NewTicker(10 * time.Millisecond)
// defer ticker.Stop()
for {
select {
case <-r.done:
return
default:
var jm dockermessage.JSONMessage
err := decoder.Decode(&jm)
if err == io.EOF {
klog.V(5).Info("runtime read eof")
// 发送状态
r.seedPullStatus(ImagePullStatus{Process: 100, Finish: true})
return
}
if err != nil {
klog.V(5).Infof("runtime read err %v", err)
// 发送状态
r.seedPullStatus(ImagePullStatus{Err: err, Finish: true})
return
}
if jm.Error != nil {
klog.V(5).Infof("runtime read err %v", jm.Error)
// 发送状态
r.seedPullStatus(ImagePullStatus{Err: fmt.Errorf("get error in pull response: %+v", jm.Error), Finish: true})
return
}
klog.V(5).Infof("runtime read progress %v", util.DumpJSON(jm))
if jm.ID != "" {
progress.Layers[jm.ID] = layerProgress{
JSONProgress: jm.Progress,
Status: jm.Status,
}
} else if jm.Status != "" {
progress.TotalStatuses = append(progress.TotalStatuses, jm.Status)
}
currentProgress := progress.getProgressPercent()
// 发送状态
r.seedPullStatus(ImagePullStatus{Process: int(currentProgress), DetailInfo: util.DumpJSON(progress)})
}
}
}