《kubernetes源码剖析》阅读笔记二——informer机制在kubernetes中通过控制器模式来监视API对象

1.阅读目标

informer机制

2.informer机制

2.1 先从kubernetes自定义控制器说起

在kubernetes中通过控制器模式来监视API对象的变化并控制对象状到预期的状态。上图展示了自定义控制器的流程图，控制器通过APIServer获取自定义对象或内置对象的变化。感知这种变化需要该对象的informer，每一种资源都有自己的informer，也就是informer和APIServer需要建立连接，而维护该连接的组件是reflector。使用书中的一段例子代码来理解informer的使用：

package main

import (
	v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
	"k8s.io/client-go/informers"
	"k8s.io/client-go/kubernetes"
	"k8s.io/client-go/tools/cache"
	"k8s.io/client-go/tools/clientcmd"
	"log"
	"time"
)

func main() {
	config, err := clientcmd.BuildConfigFromFlags("", "/Users/wangcheng/GolandProjects/kubeconfig/config")
	if err != nil {
		panic(err)
	}
	//informer通过clientset和APIServer进行交互
	clientset, err := kubernetes.NewForConfig(config)
	if err != nil {
		panic(err)
	}

	// informer是一个持久运行的goroutine，进程退出之前通知informer退出
	stopCh := make(chan struct{})
	defer close(stopCh)

	sharedInformers := informers.NewSharedInformerFactory(clientset, time.Minute)

	//得到Pod的informer
	informer := sharedInformers.Core().V1().Pods().Informer()
	//一般情况informer通过回调方法将将资源对象推送到WorkQueue
	//informer机制很容易的就可以监控我们关心的资源变化
	informer.AddEventHandler(cache.ResourceEventHandlerFuncs{
		AddFunc: func(obj interface{}) {
			mObj := obj.(v1.Object)
			log.Printf("New pod added to store: %s\n", mObj.GetName())
		},
		UpdateFunc: func(oldObj, newObj interface{}) {
			mOldObj := oldObj.(v1.Object)
			mNewObj := newObj.(v1.Object)
			log.Printf("old pod:%s update to new pod %s\n", mOldObj.GetName(), mNewObj.GetName())
		},
		DeleteFunc: func(obj interface{}) {
			mObj := obj.(v1.Object)
			log.Printf("Pod deleted from stroe: %s", mObj.GetName())
		},
	})

	informer.Run(stopCh)
}

上面的代码中使用informers.NewSharedInformerFactory创建SharedFactory最终调用NewSharedInformerFactoryWithOptions

func NewSharedInformerFactoryWithOptions(client kubernetes.Interface, defaultResync time.Duration, options ...SharedInformerOption) SharedInformerFactory {
	factory := &sharedInformerFactory{
		client:           client,
		namespace:        v1.NamespaceAll,
		defaultResync:    defaultResync,
		informers:        make(map[reflect.Type]cache.SharedIndexInformer),
		startedInformers: make(map[reflect.Type]bool),
		customResync:     make(map[reflect.Type]time.Duration),
	}

	// Apply all options
	for _, opt := range options {
		factory = opt(factory)
	}

	return factory
}

type sharedInformerFactory struct {
	client           kubernetes.Interface
	namespace        string
	tweakListOptions internalinterfaces.TweakListOptionsFunc
	lock             sync.Mutex
	defaultResync    time.Duration
	customResync     map[reflect.Type]time.Duration

	informers map[reflect.Type]cache.SharedIndexInformer
	// startedInformers is used for tracking which informers have been started.
	// This allows Start() to be called multiple times safely.
	startedInformers map[reflect.Type]bool
}

2.1.1 SharedInformerFactory

SharedInformerFactory使用informersmap缓存了不同kubernetes资源的informer。

2.1.2 初始化PodInformer

获取pod informer的逻辑很简单先去map中查找，没有就进行创建。在PodInformer创建的过程中完成了ListWatch的创建代码如下：

// NewFilteredPodInformer constructs a new informer for Pod type.
// Always prefer using an informer factory to get a shared informer instead of getting an independent
// one. This reduces memory footprint and number of connections to the server.
func NewFilteredPodInformer(client kubernetes.Interface, namespace string, resyncPeriod time.Duration, indexers cache.Indexers, tweakListOptions internalinterfaces.TweakListOptionsFunc) cache.SharedIndexInformer {
	return cache.NewSharedIndexInformer(
		&cache.ListWatch{
			ListFunc: func(options metav1.ListOptions) (runtime.Object, error) {
				if tweakListOptions != nil {
					tweakListOptions(&options)
				}
				return client.CoreV1().Pods(namespace).List(options)
			},
			WatchFunc: func(options metav1.ListOptions) (watch.Interface, error) {
				if tweakListOptions != nil {
					tweakListOptions(&options)
				}
				return client.CoreV1().Pods(namespace).Watch(options)
			},
		},
		&corev1.Pod{},
		resyncPeriod,
		indexers,
	)
}

ListFuc和WatchFunc最后都使用ClientSet完成对资源对象的list和watch。

2.1.3 informer.Run()

func (s *sharedIndexInformer) Run(stopCh <-chan struct{}) {
	defer utilruntime.HandleCrash()

	fifo := NewDeltaFIFO(MetaNamespaceKeyFunc, s.indexer)

	cfg := &Config{
		Queue:            fifo,
		ListerWatcher:    s.listerWatcher,
		ObjectType:       s.objectType,
		FullResyncPeriod: s.resyncCheckPeriod,
		RetryOnError:     false,
		ShouldResync:     s.processor.shouldResync,

		Process: s.HandleDeltas,
	}

	func() {
		s.startedLock.Lock()
		defer s.startedLock.Unlock()

		s.controller = New(cfg)
		s.controller.(*controller).clock = s.clock
		s.started = true
	}()

	// Separate stop channel because Processor should be stopped strictly after controller
	processorStopCh := make(chan struct{})
	var wg wait.Group
	defer wg.Wait()              // Wait for Processor to stop
	defer close(processorStopCh) // Tell Processor to stop
	wg.StartWithChannel(processorStopCh, s.cacheMutationDetector.Run)
	wg.StartWithChannel(processorStopCh, s.processor.run)

	defer func() {
		s.startedLock.Lock()
		defer s.startedLock.Unlock()
		s.stopped = true // Don't want any new listeners
	}()
	s.controller.Run(stopCh)
}

看了该方法解答了我之前的几个疑问

DeltaFIFO在什么时候创建？每个kubernetes的informer都会维护一个DeltaFIFO队列吗？

在func (s *sharedIndexInformer) Run(stopCh <-chan struct{})中首先就对DeltaFIFO和controller进行了初始化。并在channel中执行了执行了func (p *sharedProcessor) run(stopCh <-chan struct{})方法。

func (p *sharedProcessor) run(stopCh <-chan struct{}) {
	func() {
		p.listenersLock.RLock()
		defer p.listenersLock.RUnlock()
		for _, listener := range p.listeners {
			p.wg.Start(listener.run)
			p.wg.Start(listener.pop)
		}
		p.listenersStarted = true
	}()
	<-stopCh
	p.listenersLock.RLock()
	defer p.listenersLock.RUnlock()
	for _, listener := range p.listeners {
		close(listener.addCh) // Tell .pop() to stop. .pop() will tell .run() to stop
	}
	p.wg.Wait() // Wait for all .pop() and .run() to stop
}

在informer的实例代码中添加的AddEventHandler就会组册为listener，并调用func (p *processorListener) run()方法。

func (p *processorListener) run() {
	// this call blocks until the channel is closed.  When a panic happens during the notification
	// we will catch it, **the offending item will be skipped!**, and after a short delay (one second)
	// the next notification will be attempted.  This is usually better than the alternative of never
	// delivering again.
	stopCh := make(chan struct{})
	wait.Until(func() {
		// this gives us a few quick retries before a long pause and then a few more quick retries
		err := wait.ExponentialBackoff(retry.DefaultRetry, func() (bool, error) {
			for next := range p.nextCh {
				switch notification := next.(type) {
				case updateNotification:
					p.handler.OnUpdate(notification.oldObj, notification.newObj)
				case addNotification:
					p.handler.OnAdd(notification.newObj)
				case deleteNotification:
					p.handler.OnDelete(notification.oldObj)
				default:
					utilruntime.HandleError(fmt.Errorf("unrecognized notification: %T", next))
				}
			}
			// the only way to get here is if the p.nextCh is empty and closed
			return true, nil
		})

		// the only way to get here is if the p.nextCh is empty and closed
		if err == nil {
			close(stopCh)
		}
	}, 1*time.Minute, stopCh)
}

在该方法中根据不同的事件类型调用handler的不同处理方法（回调AddHandlerEvent中的方法）。Process: s.HandleDeltas方法会在DeltaFIFO队列pop的时候执行主要是将队列中对象存储到indexer。

2.2 informer的几个组件

Reflector 通过ListAndWatch机制感知资源的变化
DeltaFIFO 增量的先进先出队列（解决生产消费者问题）
Indexer 本地缓存，获取资源信息不用没次都访问APIServer，减轻APIServer的负担，informer重要的职责就是将DeltaFIFO中的信息同步到本地缓存，是资源状态和etcd中保持一致。

2.2.1 Reflector

在文章前面的源码分析中了解到func (p *processorListener) run()方法中完成了DeltaFIFO创建和对象时间回调,在回调逻辑中一般是把事件对象加入DeltaFIFO队列。接下来具体分析一下该方法的最后一行代码func (c *controller) Run(stopCh <-chan struct{})方法。

func (c *controller) Run(stopCh <-chan struct{}) {
	defer utilruntime.HandleCrash()
	go func() {
		<-stopCh
		c.config.Queue.Close()
	}()
	r := NewReflector(
		c.config.ListerWatcher,
		c.config.ObjectType,
		c.config.Queue,
		c.config.FullResyncPeriod,
	)
	r.ShouldResync = c.config.ShouldResync
	r.clock = c.clock

	c.reflectorMutex.Lock()
	c.reflector = r
	c.reflectorMutex.Unlock()

	var wg wait.Group
	defer wg.Wait()

	wg.StartWithChannel(stopCh, r.Run)

	wait.Until(c.processLoop, time.Second, stopCh)
}

NewReflector初始化了reflector的run方法

// Run starts a watch and handles watch events. Will restart the watch if it is closed.
// Run will exit when stopCh is closed.
func (r *Reflector) Run(stopCh <-chan struct{}) {
	klog.V(3).Infof("Starting reflector %v (%s) from %s", r.expectedTypeName, r.resyncPeriod, r.name)
	wait.Until(func() {
		if err := r.ListAndWatch(stopCh); err != nil {
			utilruntime.HandleError(err)
		}
	}, r.period, stopCh)
}

在r.ListAndWatch方法中首先使用kubernetes资源对象的Informer下ListFuc函数列出资源列表，然后执行r.syncWith方法将资源对象和版本好存储到DeltaFIFO队列中。

2.2.2 DeltaFIFO

DeltaFIFO结构体如下：

type DeltaFIFO struct {
	// lock/cond protects access to 'items' and 'queue'.
	lock sync.RWMutex
	cond sync.Cond

	// We depend on the property that items in the set are in
	// the queue and vice versa, and that all Deltas in this
	// map have at least one Delta.
	items map[string]Deltas
	queue []string

	// populated is true if the first batch of items inserted by Replace() has been populated
	// or Delete/Add/Update was called first.
	populated bool
	// initialPopulationCount is the number of items inserted by the first call of Replace()
	initialPopulationCount int

	// keyFunc is used to make the key used for queued item
	// insertion and retrieval, and should be deterministic.
	keyFunc KeyFunc

	// knownObjects list keys that are "known", for the
	// purpose of figuring out which items have been deleted
	// when Replace() or Delete() is called.
	knownObjects KeyListerGetter

	// Indication the queue is closed.
	// Used to indicate a queue is closed so a control loop can exit when a queue is empty.
	// Currently, not used to gate any of CRED operations.
	closed     bool
	closedLock sync.Mutex
}

其中主要用到的数据结构就是items和queue，可以简单的分析一下队列的添加方法就大概能明白DeltaFIFO是如何存储数据的。队列元素删除、新增、更新最后都会执行如下方法：

func (f *DeltaFIFO) queueActionLocked(actionType DeltaType, obj interface{}) error {
	//计算操作对象的key
	id, err := f.KeyOf(obj)
	if err != nil {
		return KeyError{obj, err}
	}
	//添加到items map key为操作使用KeyOf函数计算出来的
	newDeltas := append(f.items[id], Delta{actionType, obj})
	newDeltas = dedupDeltas(newDeltas)

	if len(newDeltas) > 0 {
		if _, exists := f.items[id]; !exists {
        	//如果key在map中没有存在，就将key添加到queue
			f.queue = append(f.queue, id)
		}
		f.items[id] = newDeltas
		f.cond.Broadcast()
	} else {
		// We need to remove this from our map (extra items in the queue are
		// ignored if they are not in the map).
		delete(f.items, id)
	}
	return nil
}

2.2.3 Indexer

Reflector从DeltaFIFO中消费出来的资源对象最后存储在了Indexer，Indexer中的数据和Etcd保持了一致，相当于是本地缓存减轻APIServer的压力。可以关注一下func (s *sharedIndexInformer) HandleDeltas(obj interface{}) error方法如何将pop出的对象更新到indexer。

func (s *sharedIndexInformer) HandleDeltas(obj interface{}) error {
	s.blockDeltas.Lock()
	defer s.blockDeltas.Unlock()

	// from oldest to newest
	for _, d := range obj.(Deltas) {
		switch d.Type {
		case Sync, Added, Updated:
			isSync := d.Type == Sync
			s.cacheMutationDetector.AddObject(d.Object)
			if old, exists, err := s.indexer.Get(d.Object); err == nil && exists {
				if err := s.indexer.Update(d.Object); err != nil {
					return err
				}
             	//将事件分发给对映的lister,也就是informer.AddHandlerEvent方法注册的listner
				s.processor.distribute(updateNotification{oldObj: old, newObj: d.Object}, isSync)
			} else {
				if err := s.indexer.Add(d.Object); err != nil {
					return err
				}
				s.processor.distribute(addNotification{newObj: d.Object}, isSync)
			}
		case Deleted:
			if err := s.indexer.Delete(d.Object); err != nil {
				return err
			}
			s.processor.distribute(deleteNotification{oldObj: d.Object}, false)
		}
	}
	return nil
}

该方法做了如下两件事

更新indexer
分发事件 indexer是个一个代索引的本地缓存，底层的数据结构为并发安全的mapthreadSafeMap

type threadSafeMap struct {
	lock  sync.RWMutex
	items map[string]interface{}

	// indexers maps a name to an IndexFunc
	indexers Indexers
	// indices maps a name to an Index
	indices Indices
}

看到threadSafeMap中的属性我心生疑惑，于是阅读updateIndices方法来帮助理解。

func (c *threadSafeMap) updateIndices(oldObj interface{}, newObj interface{}, key string) {
	// if we got an old object, we need to remove it before we add it again
	if oldObj != nil {
		c.deleteFromIndices(oldObj, key)
	}
	for name, indexFunc := range c.indexers {
		indexValues, err := indexFunc(newObj)
		if err != nil {
			panic(fmt.Errorf("unable to calculate an index entry for key %q on index %q: %v", key, name, err))
		}
		index := c.indices[name]
		if index == nil {
			index = Index{}
			c.indices[name] = index
		}

		for _, indexValue := range indexValues {
			set := index[indexValue]
			if set == nil {
				set = sets.String{}
				index[indexValue] = set
			}
			set.Insert(key)
		}
	}
}

遍历Indexers(为一个map key为函数名称 value为indexFunc)，获取indexFunc并计算出indexValues(这里可以看出来indexFunc可以计算出多个index)
通过函数名称在indices(map)中查找Index，index是使用map实现的set
在index中使用indexValue进行查找通过下图可能会更清晰：

参考资料

《Kubernetes源码剖析》阅读笔记一——client-go源码结构

《kubernetes源码剖析》阅读笔记二——informer机制

《kubernetes源码剖析》阅读笔记三——WorkQueue

《kubernetes源码剖析》阅读笔记四——一阶段总结