k8s之configmap热更新原理

762 阅读7分钟

现象:

pod内挂载configmap,修改configmap,pod内的内容会随之更新,详情请参考这个文章codeantenna.com/a/pf1zJAzHF…

大致逻辑:

kubelet在每个同步周期(默认60s)去syncPod的时候,会根据kubelet.volumeManager.WaitForAttachAndMount(pod)这个方法判断当前pod的volune是否都已经被挂载,这里会通过ReprocessPod()把有volumes的pod标记成未处理,对于configmap, volumeManager的desiredStateOfWorldPopulator会在下次轮训去处理当前pod的volume,然后通过MarkRemountRequired()把当前pod标记成volumeRequired,然后由volume plugin(configmap plugin)去获取最新的configmap, volumManager会定时(100 * time.Millisecond 即0.1s )reconcile(主要逻辑就是对带configmap volume的pod执行remount操作,注意这里的remount仅仅是获取apiserver最新configmap然后在node上落盘,而起容器的时候会挂这个目录到容器,所以这个目录发生变化就会同步到容器)

注意: configmap以env和subpath的方式挂载,不会进行热更新,因为env是在起容器的写到容器中的,所以容器起了之后环境变量就没法再写入了

代码实现:

梳理kubelet关于configmap的处理逻辑: NewMainKubelet 这里摘选出和volume相关的逻辑,初始化configmapManager和secertManager,这俩参数用于初始化podManager,podManager用于初始化volumeManager:

func NewMainKubelet(kubeCfg *kubeletconfiginternal.KubeletConfiguration,kubeDeps *Dependencies,...) (*Kubelet, error) {
   klet := &Kubelet{
      hostname:                                hostname,
      hostnameOverridden:                      hostnameOverridden,
      nodeName:                                nodeName,
      kubeClient:                              kubeDeps.KubeClient,
      ...
   }

   var secretManager secret.Manager
   var configMapManager configmap.Manager

   switch kubeCfg.ConfigMapAndSecretChangeDetectionStrategy {
   case kubeletconfiginternal.WatchChangeDetectionStrategy:
      secretManager = secret.NewWatchingSecretManager(kubeDeps.KubeClient)
      configMapManager = configmap.NewWatchingConfigMapManager(kubeDeps.KubeClient)
   case kubeletconfiginternal.TTLCacheChangeDetectionStrategy:
      secretManager = secret.NewCachingSecretManager(
         kubeDeps.KubeClient, manager.GetObjectTTLFromNodeFunc(klet.GetNode))
      configMapManager = configmap.NewCachingConfigMapManager(
         kubeDeps.KubeClient, manager.GetObjectTTLFromNodeFunc(klet.GetNode))
   case kubeletconfiginternal.GetChangeDetectionStrategy:
      secretManager = secret.NewSimpleSecretManager(kubeDeps.KubeClient)
      configMapManager = configmap.NewSimpleConfigMapManager(kubeDeps.KubeClient)
   default:
      return nil, fmt.Errorf("unknown configmap and secret manager mode: %v", kubeCfg.ConfigMapAndSecretChangeDetectionStrategy)
   }

   klet.secretManager = secretManager
   klet.configMapManager = configMapManager
   klet.podManager = kubepod.NewBasicPodManager(mirrorPodClient, secretManager, configMapManager)

   // NewInitializedVolumePluginMgr initializes some storageErrors on the Kubelet runtimeState (in csi_plugin.go init)
   // which affects node ready status. This function must be called before Kubelet is initialized so that the Node
   // ReadyState is accurate with the storage state.
   klet.volumePluginMgr, err =
      NewInitializedVolumePluginMgr(klet, secretManager, configMapManager, tokenManager, kubeDeps.VolumePlugins, kubeDeps.DynamicPluginProber)

   // setup volumeManager
   klet.volumeManager = volumemanager.NewVolumeManager(
      klet.podManager,
      klet.kubeClient,
      klet.volumePluginMgr,
      klet.containerRuntime,
      kubeDeps.Mounter,
      klet.getPodsDir(),
      volumepathhandler.NewBlockVolumePathHandler())
   }
   return klet, nil
}

volumemanager.NewVolumeManager:需要关注desiredStateOfWorldPopulator和reconciler这两部分的逻辑,其中desiredStateOfWorldPopulator负责同步volume和实际和期望状态,比较重要的是标记pod是否需要remount;reconciler主要负责从apiserver同步configmap到node,后面会展开讲

func NewVolumeManager(
   controllerAttachDetachEnabled bool,
   nodeName k8stypes.NodeName,
   podManager pod.Manager,...) VolumeManager {
   vm := &volumeManager{
      kubeClient:          kubeClient,
      volumePluginMgr:     volumePluginMgr,
      desiredStateOfWorld: cache.NewDesiredStateOfWorld(volumePluginMgr),
      actualStateOfWorld:  cache.NewActualStateOfWorld(nodeName, volumePluginMgr),
      operationExecutor: operationexecutor.NewOperationExecutor(operationexecutor.NewOperationGenerator(
         kubeClient,
         volumePluginMgr,
         recorder,
         checkNodeCapabilitiesBeforeMount,
         blockVolumePathHandler)),
   }

   intreeToCSITranslator := csitrans.New()
   csiMigratedPluginManager := csimigration.NewPluginManager(intreeToCSITranslator)

   vm.intreeToCSITranslator = intreeToCSITranslator
   vm.csiMigratedPluginManager = csiMigratedPluginManager
   vm.desiredStateOfWorldPopulator = populator.NewDesiredStateOfWorldPopulator(
      kubeClient,
      desiredStateOfWorldPopulatorLoopSleepPeriod,
      desiredStateOfWorldPopulatorGetPodStatusRetryDuration,
      podManager,
      podStatusProvider,
      vm.desiredStateOfWorld,
      vm.actualStateOfWorld,
      kubeContainerRuntime,
      keepTerminatedPodVolumes,
      csiMigratedPluginManager,
      intreeToCSITranslator)
   vm.reconciler = reconciler.NewReconciler(
      kubeClient,
      controllerAttachDetachEnabled,
      reconcilerLoopSleepPeriod,
      waitForAttachTimeout,
      nodeName,
      vm.desiredStateOfWorld,
      vm.actualStateOfWorld,
      vm.desiredStateOfWorldPopulator.HasAddedPods,
      vm.operationExecutor,
      mounter,
      hostutil,
      volumePluginMgr,
      kubeletPodsDir)

   return vm

}

k.Run:kubelet的启动逻辑,这里仅摘选出跟volume和同步pod相关的部分代码

// Run starts the kubelet reacting to config updates
func (kl *Kubelet) Run(updates <-chan kubetypes.PodUpdate) {
   ...
   // Start volume manager
   go kl.volumeManager.Run(kl.sourcesReady, wait.NeverStop)
   ...
   
   klet.podWorkers = newPodWorkers(klet.syncPod, kubeDeps.Recorder, klet.workQueue, klet.resyncInterval, backOffPeriod, klet.podCache)

   // Start the pod lifecycle event generator.
   kl.pleg.Start()
   kl.syncLoop(updates, kl)
}

klet.syncPod:同步pod的时候需要关注kl.volumeManager.WaitForAttachAndMount(pod)这个函数,它的主要逻辑之判断pod的所有volume是否都已经attach并且mount,还有一个比较核心的操作,是把有volume的pod标记成未处理,至此podworker关于volume的逻辑暂时结束,之后的操作由volumeManager接管

func (kl *Kubelet) syncPod(o syncPodOptions) error {
   ...
   // Volume manager will not mount volumes for terminated pods
   if !kl.podIsTerminated(pod) {
      // Wait for volumes to attach/mount
      if err := kl.volumeManager.WaitForAttachAndMount(pod); err != nil {
         return err
      }
   }
   ...
}

volume的部分:

kl.volumeManager.Run:这里主要关注两个组件desiredStateOfWorldPopulator和reconciler

func (vm *volumeManager) Run(sourcesReady config.SourcesReady, stopCh <-chan struct{}) {
   defer runtime.HandleCrash()

   go vm.desiredStateOfWorldPopulator.Run(sourcesReady, stopCh)
   go vm.reconciler.Run(stopCh)

   if vm.kubeClient != nil {
      // start informer for CSIDriver
      vm.volumePluginMgr.Run(stopCh)
   }
   <-stopCh
}

populator.NewDesiredStateOfWorldPopulator.Run:(核心逻辑:维护volume实际和期望的缓存,Run()->findAndAddNewPods()->processPodVolumes()->dswp.actualStateOfWorld.MarkRemountRequired(uniquePodName),这里会判断当前volumePlugin是否需要remount,决定是否要把当前pod标记成remountRequired,configmap和secret是true,其他volumePlugin是false)

func (dswp *desiredStateOfWorldPopulator) Run(sourcesReady config.SourcesReady, stopCh <-chan struct{}) {
   // Wait for the completion of a loop that started after sources are all ready, then set hasAddedPods accordingly
   wait.PollUntil(dswp.loopSleepDuration, func() (bool, error) {
      done := sourcesReady.AllReady()
      dswp.populatorLoop()
      return done, nil

   }, stopCh)

   dswp.hasAddedPodsLock.Lock()
   dswp.hasAddedPods = true
   dswp.hasAddedPodsLock.Unlock()
   wait.Until(dswp.populatorLoop, dswp.loopSleepDuration, stopCh)
}

func (dswp *desiredStateOfWorldPopulator) populatorLoop() {
   dswp.findAndAddNewPods()

   // findAndRemoveDeletedPods() calls out to the container runtime to
   // determine if the containers for a given pod are terminated. This is
   // an expensive operation, therefore we limit the rate that
   // findAndRemoveDeletedPods() is called independently of the main
   // populator loop.
   dswp.findAndRemoveDeletedPods()
}


// Iterate through all pods and add to desired state of world if they don't
// exist but should
func (dswp *desiredStateOfWorldPopulator) findAndAddNewPods() {
   // Map unique pod name to outer volume name to MountedVolume.
   mountedVolumesForPod := make(map[volumetypes.UniquePodName]map[string]cache.MountedVolume)
   if utilfeature.DefaultFeatureGate.Enabled(features.ExpandInUsePersistentVolumes) {
      for _, mountedVolume := range dswp.actualStateOfWorld.GetMountedVolumes() {
         mountedVolumes, exist := mountedVolumesForPod[mountedVolume.PodName]
         if !exist {
            mountedVolumes = make(map[string]cache.MountedVolume)
            mountedVolumesForPod[mountedVolume.PodName] = mountedVolumes
         }
         mountedVolumes[mountedVolume.OuterVolumeSpecName] = mountedVolume
      }
   }

   processedVolumesForFSResize := sets.NewString()
   for _, pod := range dswp.podManager.GetPods() {
      if dswp.isPodTerminated(pod) {
         // Do not (re)add volumes for terminated pods
         continue
      }
      dswp.processPodVolumes(pod, mountedVolumesForPod, processedVolumesForFSResize)
   }
}

// processPodVolumes processes the volumes in the given pod and adds them to the
// desired state of the world.
func (dswp *desiredStateOfWorldPopulator) processPodVolumes(
   pod *v1.Pod,
   mountedVolumesForPod map[volumetypes.UniquePodName]map[string]cache.MountedVolume,
   processedVolumesForFSResize sets.String) {
   allVolumesAdded := true
   mounts, devices := util.GetPodVolumeNames(pod)
   expandInUsePV := utilfeature.DefaultFeatureGate.Enabled(features.ExpandInUsePersistentVolumes)

   // Process volume spec for each volume defined in pod
   for _, podVolume := range pod.Spec.Volumes {
      if !mounts.Has([podVolume.Name](http://podvolume.name/)) && !devices.Has([podVolume.Name](http://podvolume.name/)) {
         // Volume is not used in the pod, ignore it.
         continue
      }

      pvc, volumeSpec, volumeGidValue, err :=
         dswp.createVolumeSpec(podVolume, pod, mounts, devices)
      if err != nil {
         dswp.desiredStateOfWorld.AddErrorToPod(uniquePodName, err.Error())
         allVolumesAdded = false
         continue
      }

      // Add volume to desired state of world
      _, err = dswp.desiredStateOfWorld.AddPodToVolume(
         uniquePodName, pod, volumeSpec, [podVolume.Name](http://podvolume.name/), volumeGidValue)
      if err != nil {
         dswp.desiredStateOfWorld.AddErrorToPod(uniquePodName, err.Error())
         allVolumesAdded = false
      } 

      if expandInUsePV {
         dswp.checkVolumeFSResize(pod, podVolume, pvc, volumeSpec,
            uniquePodName, mountedVolumesForPod, processedVolumesForFSResize)
      }
   }

   // some of the volume additions may have failed, should not mark this pod as fully processed
   if allVolumesAdded {
      dswp.markPodProcessed(uniquePodName)
      // New pod has been synced. Re-mount all volumes that need it
      // (e.g. DownwardAPI)
     dswp.actualStateOfWorld.MarkRemountRequired(uniquePodName)
      // Remove any stored errors for the pod, everything went well in this processPodVolumes
      dswp.desiredStateOfWorld.PopPodErrors(uniquePodName)
   } else if dswp.podHasBeenSeenOnce(uniquePodName) {
      // For the Pod which has been processed at least once, even though some volumes
      // may not have been reprocessed successfully this round, we still mark it as processed to avoid
      // processing it at a very high frequency. The pod will be reprocessed when volume manager calls
      // ReprocessPod() which is triggered by SyncPod.
      dswp.markPodProcessed(uniquePodName)
   }
}

// markPodProcessed records that the volumes for the specified pod have been
// processed by the populator
func (dswp *desiredStateOfWorldPopulator) markPodProcessed(
   podName volumetypes.UniquePodName) {
   dswp.pods.Lock()
   defer dswp.pods.Unlock()

   dswp.pods.processedPods[podName] = true
}

func (asw *actualStateOfWorld) MarkRemountRequired(
   podName volumetypes.UniquePodName) {
   asw.Lock()
   defer asw.Unlock()
   for volumeName, volumeObj := range asw.attachedVolumes {
      if podObj, podExists := volumeObj.mountedPods[podName]; podExists {
         volumePlugin, err :=
            asw.volumePluginMgr.FindPluginBySpec(podObj.volumeSpec)
         if err != nil || volumePlugin == nil {
            // Log and continue processing
            continue
         }

         if volumePlugin.RequiresRemount() {
            podObj.remountRequired = true
            asw.attachedVolumes[volumeName].mountedPods[podName] = podObj
         }
      }
   }
}

reconciler.NewReconciler:(Run()->rc.reconcile()->rc.mountAttachVolumes()->rc.operationExecutor.MountVolume(),核心逻辑:根据实际和期望的volume情况对volume的mount unmount操作),对于configmap这里的mount操作实际是将etcd中最新的configmap的内容更新到宿主机的emptydir的目录,这里我们可以仅关注挂载的逻辑

func (rc *reconciler) Run(stopCh <-chan struct{}) {
   wait.Until(rc.reconciliationLoopFunc(), rc.loopSleepDuration, stopCh)
}

func (rc *reconciler) reconciliationLoopFunc() func() {
   return func() {
      rc.reconcile()

      // Sync the state with the reality once after all existing pods are added to the desired state from all sources.
      // Otherwise, the reconstruct process may clean up pods' volumes that are still in use because
      // desired state of world does not contain a complete list of pods.
      if rc.populatorHasAddedPods() && !rc.StatesHasBeenSynced() {
         rc.sync()
      }
   }
}


func (rc *reconciler) reconcile() {
   // Unmounts are triggered before mounts so that a volume that was
   // referenced by a pod that was deleted and is now referenced by another
   // pod is unmounted from the first pod before being mounted to the new
   // pod.
   rc.unmountVolumes()

   // Next we mount required volumes. This function could also trigger
   // attach if kubelet is responsible for attaching volumes.
   // If underlying PVC was resized while in-use then this function also handles volume
   // resizing.
   rc.mountAttachVolumes()

   // Ensure devices that should be detached/unmounted are detached/unmounted.
   rc.unmountDetachDevices()
}

func (rc *reconciler) mountAttachVolumes() {
   // Ensure volumes that should be attached/mounted are attached/mounted.
   for _, volumeToMount := range rc.desiredStateOfWorld.GetVolumesToMount() {
      volMounted, devicePath, err := rc.actualStateOfWorld.PodExistsInVolume(volumeToMount.PodName, volumeToMount.VolumeName)
      volumeToMount.DevicePath = devicePath
      if cache.IsVolumeNotAttachedError(err) {
         if rc.controllerAttachDetachEnabled || !volumeToMount.PluginIsAttachable {
            // Volume is not attached (or doesn't implement attacher), kubelet attach is disabled, wait
            // for controller to finish attaching volume.
            rc.operationExecutor.VerifyControllerAttachedVolume(
               volumeToMount.VolumeToMount,
               rc.nodeName,
               rc.actualStateOfWorld)
         } else {
            // Volume is not attached to node, kubelet attach is enabled, volume implements an attacher,
            // so attach it
            volumeToAttach := operationexecutor.VolumeToAttach{
               VolumeName: volumeToMount.VolumeName,
               VolumeSpec: volumeToMount.VolumeSpec,
            }
            rc.operationExecutor.AttachVolume(volumeToAttach, rc.actualStateOfWorld)
         }
      } else if {
         // Volume is not mounted, or is already mounted, but requires remounting
         isRemount := cache.IsRemountRequiredError(err)
         if isRemount {
            remountingLogStr = "Volume is already mounted to pod, but remount was requested."
         }

         rc.operationExecutor.MountVolume(
            rc.waitForAttachTimeout,
            volumeToMount.VolumeToMount,
            rc.actualStateOfWorld,
            isRemount)
      } else if cache.IsFSResizeRequiredError(err) &&
         utilfeature.DefaultFeatureGate.Enabled(features.ExpandInUsePersistentVolumes) {
         rc.operationExecutor.ExpandInUseVolume(
            volumeToMount.VolumeToMount,
            rc.actualStateOfWorld)
      }
   }
}

mountVolume:

operationExecutor: operationexecutor.NewOperationExecutor(operationexecutor.NewOperationGenerator(
   kubeClient,
   volumePluginMgr,
   recorder,
   checkNodeCapabilitiesBeforeMount,
   blockVolumePathHandler))


func (oe *operationExecutor) MountVolume(
   waitForAttachTimeout time.Duration,
   volumeToMount VolumeToMount,
   actualStateOfWorld ActualStateOfWorldMounterUpdater,
   isRemount bool) error {
   fsVolume, err := util.CheckVolumeModeFilesystem(volumeToMount.VolumeSpec)
   var generatedOperations volumetypes.GeneratedOperations
   if fsVolume {
      // Filesystem volume case
      // Mount/remount a volume when a volume is attached
      generatedOperations = oe.operationGenerator.GenerateMountVolumeFunc(
         waitForAttachTimeout, volumeToMount, actualStateOfWorld, isRemount)
   } else {
      // Block volume case
      // Creates a map to device if a volume is attached
      generatedOperations, err = oe.operationGenerator.GenerateMapVolumeFunc(
         waitForAttachTimeout, volumeToMount, actualStateOfWorld)
   }

   // Avoid executing mount/map from multiple pods referencing the
   // same volume in parallel
   podName := nestedpendingoperations.EmptyUniquePodName

   // TODO: remove this -- not necessary
   if !volumeToMount.PluginIsAttachable && !volumeToMount.PluginIsDeviceMountable {
      // volume plugins which are Non-attachable and Non-deviceMountable can execute mount for multiple pods
      // referencing the same volume in parallel
      podName = util.GetUniquePodName(volumeToMount.Pod)
   }
   // TODO mount_device
   return oe.pendingOperations.Run(
      volumeToMount.VolumeName, podName, "" /* nodeName */, generatedOperations)

}

GenerateMapVolumeFunc:(根据volumePluginManager拿到volumePlugin,再根据volumePlugin拿到mounter信息,然后去调用mounter的SetUp方法去执行实际的mount操作,对于configmap实际的mount操作就是获取apiserver最新的configmap,落地到node的某个目录下,因为cri起容器会挂载宿主机的这个目录,所以如果configmap发生变化就会同步到宿主机的这个目录进而同步到容器)

func (og *operationGenerator) GenerateMountVolumeFunc(
   waitForAttachTimeout time.Duration,
   volumeToMount VolumeToMount,
   actualStateOfWorld ActualStateOfWorldMounterUpdater,
   isRemount bool) volumetypes.GeneratedOperations {
   volumePluginName := unknownVolumePlugin
   volumePlugin, err :=
      og.volumePluginMgr.FindPluginBySpec(volumeToMount.VolumeSpec)
   if err == nil && volumePlugin != nil {
      volumePluginName = volumePlugin.GetPluginName()
   }

   mountVolumeFunc := func() (error, error) {
      // Get mounter plugin
      volumePlugin, err := og.volumePluginMgr.FindPluginBySpec(volumeToMount.VolumeSpec)
      volumeMounter, newMounterErr := volumePlugin.NewMounter(
         volumeToMount.VolumeSpec,
         volumeToMount.Pod,
         volume.VolumeOptions{})

      // Execute mount
      mountErr := volumeMounter.SetUp(volume.MounterArgs{
         FsUser:              ioutil.FsUserFrom(volumeToMount.Pod),
         FsGroup:             fsGroup,
         DesiredSize:         volumeToMount.DesiredSizeLimit,
         FSGroupChangePolicy: fsGroupChangePolicy,
      })

      // Update actual state of world
      markOpts := MarkVolumeOpts{
         PodName:             volumeToMount.PodName,
         PodUID:              volumeToMount.Pod.UID,
         VolumeName:          volumeToMount.VolumeName,
         Mounter:             volumeMounter,
         OuterVolumeSpecName: volumeToMount.OuterVolumeSpecName,
         VolumeGidVolume:     volumeToMount.VolumeGidValue,
         VolumeSpec:          volumeToMount.VolumeSpec,
         VolumeMountState:    VolumeMounted,
      }

      if mountErr != nil {
         og.checkForFailedMount(volumeToMount, mountErr)
         og.markVolumeErrorState(volumeToMount, markOpts, mountErr, actualStateOfWorld)
         // On failure, return error. Caller will log and retry.
         return volumeToMount.GenerateError("MountVolume.SetUp failed", mountErr)
      }
      actualStateOfWorld.MarkVolumeAsMounted(markOpts)
      return nil, nil
   }

   return volumetypes.GeneratedOperations{
      OperationName:     "volume_mount",
      OperationFunc:     mountVolumeFunc,
      EventRecorderFunc: eventRecorderFunc,
      CompleteFunc:      util.OperationCompleteHook(util.GetFullQualifiedPluginNameForVolume(volumePluginName, volumeToMount.VolumeSpec), "volume_mount"),
   }
}


//这里的plugin的newMounter就是走了configmap plugin的逻辑
func (plugin *configMapPlugin) NewMounter(spec *volume.Spec, pod *v1.Pod, opts volume.VolumeOptions) (volume.Mounter, error) {
   return &configMapVolumeMounter{
      configMapVolume: &configMapVolume{
         spec.Name(),
         pod.UID,
         plugin,
         plugin.host.GetMounter(plugin.GetPluginName()),
         volume.NewCachedMetrics(volume.NewMetricsDu(getPath(pod.UID, spec.Name(), plugin.host))),
      },
      source:       *spec.Volume.ConfigMap,
      pod:          *pod,
      opts:         &opts,
      getConfigMap: plugin.getConfigMap,
   }, nil
}


func (b *configMapVolumeMounter) SetUp(mounterArgs volume.MounterArgs) error {
   return b.SetUpAt(b.GetPath(), mounterArgs)
}

func (b *configMapVolumeMounter) SetUpAt(dir string, mounterArgs volume.MounterArgs) error {
    // Wrap EmptyDir, let it do the setup.
   wrapped, err := b.plugin.host.NewWrapperMounter(b.volName, wrappedVolumeSpec(), &b.pod, *b.opts)

   optional := b.source.Optional != nil && *b.source.Optional
   configMap, err := b.getConfigMap(b.pod.Namespace, b.[source.Name](http://source.name/))
   if err != nil {
      if !(errors.IsNotFound(err) && optional) {
         return err
      }

      configMap = &v1.ConfigMap{
         ObjectMeta: metav1.ObjectMeta{
            Namespace: b.pod.Namespace,
            Name:      b.[source.Name](http://source.name/),
         },
      }
   }

   payload, err := MakePayload(b.source.Items, configMap, b.source.DefaultMode, optional)

   setupSuccess := false
   if err := wrapped.SetUpAt(dir, mounterArgs); err != nil {
      return err
   }

   defer func() {
      // Clean up directories if setup fails
      if !setupSuccess {
         unmounter, unmountCreateErr := b.plugin.NewUnmounter(b.volName, b.podUID)
         if unmountCreateErr != nil {
            return
         }
         tearDownErr := unmounter.TearDown()
      }
   }()

   writerContext := fmt.Sprintf("pod %v/%v volume %v", b.pod.Namespace, b.[pod.Name](http://pod.name/), b.volName)
   writer, err := volumeutil.NewAtomicWriter(dir, writerContext)
   writer.Write(payload)
   setupSuccess = true
   return nil
}

到这里configmap的热更新流程大致讲完, 下面主要是kubelet中volumePluginManager的实现,会主要解读configmap plugin的实现 vm.volumePluginMgr.Run(stopCh):(主要逻辑:注册了一堆volume plugin,然后去初始化各个plugin)

func NewInitializedVolumePluginMgr(
   kubelet *Kubelet,
   secretManager secret.Manager,
   configMapManager configmap.Manager,
   tokenManager *token.Manager,
   plugins []volume.VolumePlugin,
   prober volume.DynamicPluginProber) (*volume.VolumePluginMgr, error) {

   // Initialize csiDriverLister before calling InitPlugins
   var informerFactory informers.SharedInformerFactory
   var csiDriverLister storagelisters.CSIDriverLister
   var csiDriversSynced cache.InformerSynced
   const resyncPeriod = 0

   // Don't initialize if kubeClient is nil
   if kubelet.kubeClient != nil {
      informerFactory = informers.NewSharedInformerFactory(kubelet.kubeClient, resyncPeriod)
      csiDriverInformer := informerFactory.Storage().V1().CSIDrivers()
      csiDriverLister = csiDriverInformer.Lister()
      csiDriversSynced = csiDriverInformer.Informer().HasSynced
   }

   kvh := &kubeletVolumeHost{
      kubelet:          kubelet,
      volumePluginMgr:  volume.VolumePluginMgr{},
      secretManager:    secretManager,
      configMapManager: configMapManager,
      tokenManager:     tokenManager,
      informerFactory:  informerFactory,
      csiDriverLister:  csiDriverLister,
      csiDriversSynced: csiDriversSynced,
      exec:             utilexec.New(),
   }

   if err := kvh.volumePluginMgr.InitPlugins(plugins, prober, kvh); err != nil {
      return nil, fmt.Errorf(
         "could not initialize volume plugins for KubeletVolumePluginMgr: %v",
         err)
   }

return &kvh.volumePluginMgr, nil
}

// InitPlugins initializes each plugin.  All plugins must have unique names.
// This must be called exactly once before any New* methods are called on any
// plugins.
func (pm *VolumePluginMgr) InitPlugins(plugins []VolumePlugin, prober DynamicPluginProber, host VolumeHost) error {
    ...
   for _, plugin := range plugins {
      ...
      plugin.Init(host)
      ...
      pm.plugins[name] = plugin
   }
    ...
}

plugins就是下面注册的各类volume plugin,分别对应不同类型volume处理

// ProbeVolumePlugins collects all volume plugins into an easy to use list.
func ProbeVolumePlugins(featureGate featuregate.FeatureGate) ([]volume.VolumePlugin, error) {
   allPlugins := []volume.VolumePlugin{}

   // The list of plugins to probe is decided by the kubelet binary, not
   // by dynamic linking or other "magic".  Plugins will be analyzed and
   // initialized later.
   // If/when it does, see kube-controller-manager/app/plugins.go for example of using volume.VolumeConfig
   var err error
   allPlugins, err = appendLegacyProviderVolumes(allPlugins, featureGate)
   if err != nil {
      return allPlugins, err
   }

   allPlugins = append(allPlugins, emptydir.ProbeVolumePlugins()...)
   allPlugins = append(allPlugins, git_repo.ProbeVolumePlugins()...)
   allPlugins = append(allPlugins, hostpath.ProbeVolumePlugins(volume.VolumeConfig{})...)
   allPlugins = append(allPlugins, nfs.ProbeVolumePlugins(volume.VolumeConfig{})...)
   allPlugins = append(allPlugins, secret.ProbeVolumePlugins()...)
   allPlugins = append(allPlugins, iscsi.ProbeVolumePlugins()...)
   allPlugins = append(allPlugins, glusterfs.ProbeVolumePlugins()...)
   allPlugins = append(allPlugins, rbd.ProbeVolumePlugins()...)
   allPlugins = append(allPlugins, quobyte.ProbeVolumePlugins()...)
   allPlugins = append(allPlugins, cephfs.ProbeVolumePlugins()...)
   allPlugins = append(allPlugins, downwardapi.ProbeVolumePlugins()...)
   allPlugins = append(allPlugins, fc.ProbeVolumePlugins()...)
   allPlugins = append(allPlugins, flocker.ProbeVolumePlugins()...)
   allPlugins = append(allPlugins, configmap.ProbeVolumePlugins()...)
   allPlugins = append(allPlugins, projected.ProbeVolumePlugins()...)
   allPlugins = append(allPlugins, portworx.ProbeVolumePlugins()...)
   allPlugins = append(allPlugins, scaleio.ProbeVolumePlugins()...)
   allPlugins = append(allPlugins, local.ProbeVolumePlugins()...)
   allPlugins = append(allPlugins, storageos.ProbeVolumePlugins()...)
   allPlugins = append(allPlugins, csi.ProbeVolumePlugins()...)
   return allPlugins, nil
}

这里我们仅看下configmap plugin的实现即可

// ProbeVolumePlugins is the entry point for plugin detection in a package.
func ProbeVolumePlugins() []volume.VolumePlugin {
   return []volume.VolumePlugin{&configMapPlugin{}}
}


func (plugin *configMapPlugin) Init(host volume.VolumeHost) error {
   plugin.host = host
   plugin.getConfigMap = host.GetConfigMapFunc()
   return nil
}

host.GetConfigMapFunc(): host就是这个struct:

kvh := &kubeletVolumeHost{
   kubelet:          kubelet,
   volumePluginMgr:  volume.VolumePluginMgr{},
   secretManager:    secretManager,
   configMapManager: configMapManager,
   tokenManager:     tokenManager,
   informerFactory:  informerFactory,
   csiDriverLister:  csiDriverLister,
   csiDriversSynced: csiDriversSynced,
   exec:             utilexec.New(),
}

他的GetConfigMapFunc():就会从apiserver读最新的configmap

func (kvh *kubeletVolumeHost) GetConfigMapFunc() func(namespace, name string) (*v1.ConfigMap, error) {
   return kvh.configMapManager.GetConfigMap
}

configmapManager就是通过下面的方法生成的,configmap.NewWatchingConfigMapManager:

func NewWatchingConfigMapManager(kubeClient clientset.Interface) Manager {
   listConfigMap := func(namespace string, opts metav1.ListOptions) (runtime.Object, error) {
      return kubeClient.CoreV1().ConfigMaps(namespace).List(context.TODO(), opts)
   }

   watchConfigMap := func(namespace string, opts metav1.ListOptions) (watch.Interface, error) {
      return kubeClient.CoreV1().ConfigMaps(namespace).Watch(context.TODO(), opts)
   }

   newConfigMap := func() runtime.Object {
      return &v1.ConfigMap{}
   }

   isImmutable := func(object runtime.Object) bool {
      if configMap, ok := object.(*v1.ConfigMap); ok {
         return configMap.Immutable != nil && *configMap.Immutable
      }
      return false
   }

   gr := corev1.Resource("configmap")
   return &configMapManager{
      manager: manager.NewWatchBasedManager(listConfigMap, watchConfigMap, newConfigMap, isImmutable, gr, getConfigMapNames),
   }
}

configMapManager通过GetConfigMap这个方法获取configmap

func (c *configMapManager) GetConfigMap(namespace, name string) (*v1.ConfigMap, error) {
   object, err := c.manager.GetObject(namespace, name)
   if err != nil {
      return nil, err
   }

   if configmap, ok := object.(*v1.ConfigMap); ok {
      return configmap, nil
   }
   return nil, fmt.Errorf("unexpected object type: %v", object)
}

c.manager.GetObject:

func NewWatchBasedManager(
   listObject listObjectFunc,
   watchObject watchObjectFunc,
   newObject newObjectFunc,
   isImmutable isImmutableFunc,
   groupResource schema.GroupResource,
   getReferencedObjects func(*v1.Pod) sets.String) Manager {
   objectStore := NewObjectCache(listObject, watchObject, newObject, isImmutable, groupResource)
   return NewCacheBasedManager(objectStore, getReferencedObjects)
}


func NewCacheBasedManager(objectStore Store, getReferencedObjects func(*v1.Pod) sets.String) Manager {
   return &cacheBasedManager{
      objectStore:          objectStore,
      getReferencedObjects: getReferencedObjects,
      registeredPods:       make(map[objectKey]*v1.Pod),
   }
}

func (c *cacheBasedManager) GetObject(namespace, name string) (runtime.Object, error) {
   return c.objectStore.Get(namespace, name)
}

NewObjectCache:

func NewObjectCache(
   listObject listObjectFunc,
   watchObject watchObjectFunc,
   newObject newObjectFunc,
   isImmutable isImmutableFunc,
   groupResource schema.GroupResource) Store {
   return &objectCache{
      listObject:    listObject,
      watchObject:   watchObject,
      newObject:     newObject,
      isImmutable:   isImmutable,
      groupResource: groupResource,
      items:         make(map[objectKey]*objectCacheItem),
   }
}


func (c *objectCache) Get(namespace, name string) (runtime.Object, error) {
   key := objectKey{namespace: namespace, name: name}
   c.lock.RLock()
   item, exists := c.items[key]
   c.lock.RUnlock()

   if err := wait.PollImmediate(10*time.Millisecond, time.Second, item.hasSynced); err != nil {
      return nil, fmt.Errorf("failed to sync %s cache: %v", c.groupResource.String(), err)
   }

   obj, exists, err := item.store.GetByKey(c.key(namespace, name))
   if object, ok := obj.(runtime.Object); ok {
      return object, nil
   }

   return nil, fmt.Errorf("unexpected object type: %v", obj)
}

参考文章:

codeantenna.com/a/pf1zJAzHF…

dockone.io/article/863…