kubernetes的csi plugin注册机制的实现分为两个部分,第一部分是sidecar"node-driver-registrar",第二部分kubelet的pluginManager,本文主要讲后者pluginManager的源码,基于k8s1.22。
kubelet的注册原理
kubelet会监听特定的目录 /var/lib/kubelet/plugins_registry(其中 /var/lib/kubelet 是k8s工作路径,可以变更), sidecar"node-driver-registrar"实现了特定的接口并创建一个socket放到该目录下,pluginManager会通过该socket获取csi plugin的信息(主要是csi plugin name和 plugin socket的路径),并将其写入k8s node节点的annotations,从而实现csi plugin的注册操作。
kubelet pluginManger源码分析
kubelet启动时,会对pluginManager进行初始化
// pkg/kubelet/kubelet:784
klet.pluginManager = pluginmanager.NewPluginManager(
/*
这里的值就是 /var/lib/kubelet/plugins_registry
*/
klet.getPluginsRegistrationDir(),
kubeDeps.Recorder,
)
pluginManger struct
注意到这里有 actualStateOfWorld、desiredStateOfWorld和 reconciler
// pluginManager implements the PluginManager interface
type pluginManager struct {
// desiredStateOfWorldPopulator (the plugin watcher) runs an asynchronous
// periodic loop to populate the desiredStateOfWorld.
desiredStateOfWorldPopulator *pluginwatcher.Watcher
// reconciler runs an asynchronous periodic loop to reconcile the
// desiredStateOfWorld with the actualStateOfWorld by triggering register
// and unregister operations using the operationExecutor.
reconciler reconciler.Reconciler
// actualStateOfWorld is a data structure containing the actual state of
// the world according to the manager: i.e. which plugins are registered.
// The data structure is populated upon successful completion of register
// and unregister actions triggered by the reconciler.
actualStateOfWorld cache.ActualStateOfWorld
// desiredStateOfWorld is a data structure containing the desired state of
// the world according to the plugin manager: i.e. what plugins are registered.
// The data structure is populated by the desired state of the world
// populator (plugin watcher).
desiredStateOfWorld cache.DesiredStateOfWorld
}
如果没怎么看过其他的k8s代码,可能对k8s的actualStateOfWorld(后文简称asw)和desiredStateOfWorld(后文简称dsw)不熟悉。
在遵循声明式管理的k8s中,dsw一般对应了k8s资源的spec,即期望状态。asw一般对应了k8s资源的status,即实际状态。k8s最常见的一个处理框架,即是监测到外界变化后,先将其写入dsw,然后在reconciler中对dsw和asw进行比较,做相应的处理后,再把dsw中的变化写入asw,让两者同步。
在pluginManger中,也有着相似的处理:
asw中存放的是已经在k8s中已成功注册的plugin信息,dsw中存放的是期望注册的plugin信息,后续会讲到kubelet是如何监测到plugin的变化并将其存储到dsw,最终同步到asw。
在完成初始化后,直接看pluginManager的主体运行代码以及注释。
pkg/kubelet/pluginmanager/plugin_manager:108
func (pm *pluginManager) Run(sourcesReady config.SourcesReady, stopCh <-chan struct{}) {
defer runtime.HandleCrash()
/*
对 plugins_registry 目录启动了一个watcher,监听create和delete的操作
监听到 create 事件,会将socket路径写入 dsw
监听到 delete 事件,会将socket路径从 dsw 移除
*/
pm.desiredStateOfWorldPopulator.Start(stopCh)
klog.V(2).InfoS("The desired_state_of_world populator (plugin watcher) starts")
klog.InfoS("Starting Kubelet Plugin Manager")
/*
调用了常见的 wait.Until,以cron的形式去调rc.reconcile(),对比dsw和asw,并进行同步
*/
go pm.reconciler.Run(stopCh)
metrics.Register(pm.actualStateOfWorld, pm.desiredStateOfWorld)
<-stopCh
klog.InfoS("Shutting down Kubelet Plugin Manager")
}
reconcile()对dsw和asw进行了同步,完成了plugin的注册/取消注册的操作。整体逻辑如下:
- 遍历asw中的plugin,如果plugin存在于asw中,但dsw中不存在,则需要删除这个plugin。 又或者对于同一个plugin(socket路径相同),asw中plugin的timestamp与dsw中的不同, 则认为plugin已经更新了,同样需要删除操作。
- 遍历dsw,如果asw中没有同路径的socket,且timestamp也不相同,则需要执行注册plugin的操作。
func (rc *reconciler) reconcile() {
// Unregisterations are triggered before registrations
// Ensure plugins that should be unregistered are unregistered.
for _, registeredPlugin := range rc.actualStateOfWorld.GetRegisteredPlugins() {
unregisterPlugin := false
if !rc.desiredStateOfWorld.PluginExists(registeredPlugin.SocketPath) {
unregisterPlugin = true
} else {
// We also need to unregister the plugins that exist in both actual state of world
// and desired state of world cache, but the timestamps don't match.
// Iterate through desired state of world plugins and see if there's any plugin
// with the same socket path but different timestamp.
for _, dswPlugin := range rc.desiredStateOfWorld.GetPluginsToRegister() {
if dswPlugin.SocketPath == registeredPlugin.SocketPath && dswPlugin.Timestamp != registeredPlugin.Timestamp {
klog.V(5).InfoS("An updated version of plugin has been found, unregistering the plugin first before reregistering", "plugin", registeredPlugin)
unregisterPlugin = true
break
}
}
}
if unregisterPlugin {
klog.V(5).InfoS("Starting operationExecutor.UnregisterPlugin", "plugin", registeredPlugin)
err := rc.operationExecutor.UnregisterPlugin(registeredPlugin, rc.actualStateOfWorld)
if err != nil &&
!goroutinemap.IsAlreadyExists(err) &&
!exponentialbackoff.IsExponentialBackoff(err) {
// Ignore goroutinemap.IsAlreadyExists and exponentialbackoff.IsExponentialBackoff errors, they are expected.
// Log all other errors.
klog.ErrorS(err, "OperationExecutor.UnregisterPlugin failed", "plugin", registeredPlugin)
}
if err == nil {
klog.V(1).InfoS("OperationExecutor.UnregisterPlugin started", "plugin", registeredPlugin)
}
}
}
// Ensure plugins that should be registered are registered
for _, pluginToRegister := range rc.desiredStateOfWorld.GetPluginsToRegister() {
if !rc.actualStateOfWorld.PluginExistsWithCorrectTimestamp(pluginToRegister) {
klog.V(5).InfoS("Starting operationExecutor.RegisterPlugin", "plugin", pluginToRegister)
err := rc.operationExecutor.RegisterPlugin(pluginToRegister.SocketPath, pluginToRegister.Timestamp, rc.getHandlers(), rc.actualStateOfWorld)
if err != nil &&
!goroutinemap.IsAlreadyExists(err) &&
!exponentialbackoff.IsExponentialBackoff(err) {
// Ignore goroutinemap.IsAlreadyExists and exponentialbackoff.IsExponentialBackoff errors, they are expected.
klog.ErrorS(err, "OperationExecutor.RegisterPlugin failed", "plugin", pluginToRegister)
}
if err == nil {
klog.V(1).InfoS("OperationExecutor.RegisterPlugin started", "plugin", pluginToRegister)
}
}
}
}
注册plugin(RegisterPlugin)和取消注册(UnregisterPlugin)大同小异,这里讲一下注册plugin的代码。
先说整体逻辑:
- dsw中已经存储了sidecar"node-driver-registrar"的socket路径,调用sidecar中已经实现的GetInfo方法,拿到csi plugin的socket路径(代码中变量名为Endpoint)。
- 将当前plugin写入asw
- 调用csi的NodeGetInfo方法,获取nodeID、maxVolumePerNode等信息。
- 将csi driver的 nodeid 写到 node 的annotation上
- create/update csinode
- dsw与asw已同步,csi注册完成
看下重点代码,注意注释
registerPluginFunc := func() error {
client, conn, err := dial(socketPath, dialTimeoutDuration)
if err != nil {
return fmt.Errorf("RegisterPlugin error -- dial failed at socket %s, err: %v", socketPath, err)
}
defer conn.Close()
ctx, cancel := context.WithTimeout(context.Background(), time.Second)
defer cancel()
/*
sidecar里需要实现这个GetInfo方法,返回csi的socket path
*/
infoResp, err := client.GetInfo(ctx, ®isterapi.InfoRequest{})
if err != nil {
return fmt.Errorf("RegisterPlugin error -- failed to get plugin info using RPC GetInfo at socket %s, err: %v", socketPath, err)
}
handler, ok := pluginHandlers[infoResp.Type]
if !ok {
if err := og.notifyPlugin(client, false, fmt.Sprintf("RegisterPlugin error -- no handler registered for plugin type: %s at socket %s", infoResp.Type, socketPath)); err != nil {
return fmt.Errorf("RegisterPlugin error -- failed to send error at socket %s, err: %v", socketPath, err)
}
return fmt.Errorf("RegisterPlugin error -- no handler registered for plugin type: %s at socket %s", infoResp.Type, socketPath)
}
/*
这里 infoResp.Endpoint 的值会是 /var/lib/kubelet/plugins/xxxxx
*/
if infoResp.Endpoint == "" {
infoResp.Endpoint = socketPath
}
if err := handler.ValidatePlugin(infoResp.Name, infoResp.Endpoint, infoResp.SupportedVersions); err != nil {
if err = og.notifyPlugin(client, false, fmt.Sprintf("RegisterPlugin error -- plugin validation failed with err: %v", err)); err != nil {
return fmt.Errorf("RegisterPlugin error -- failed to send error at socket %s, err: %v", socketPath, err)
}
return fmt.Errorf("RegisterPlugin error -- pluginHandler.ValidatePluginFunc failed")
}
// We add the plugin to the actual state of world cache before calling a plugin consumer's Register handle
// so that if we receive a delete event during Register Plugin, we can process it as a DeRegister call.
err = actualStateOfWorldUpdater.AddPlugin(cache.PluginInfo{
SocketPath: socketPath,
Timestamp: timestamp,
Handler: handler,
Name: infoResp.Name,
})
if err != nil {
klog.ErrorS(err, "RegisterPlugin error -- failed to add plugin", "path", socketPath)
}
/*
实际的注册操作,将csi的nodeID写到node的annotation中,并且创建/更新csinode
*/
if err := handler.RegisterPlugin(infoResp.Name, infoResp.Endpoint, infoResp.SupportedVersions); err != nil {
return og.notifyPlugin(client, false, fmt.Sprintf("RegisterPlugin error -- plugin registration failed with err: %v", err))
}
// Notify is called after register to guarantee that even if notify throws an error Register will always be called after validate
if err := og.notifyPlugin(client, true, ""); err != nil {
return fmt.Errorf("RegisterPlugin error -- failed to send registration status at socket %s, err: %v", socketPath, err)
}
return nil
}
注册成功后node的annotation与csinode
遗留问题
metrics.Register(pm.actualStateOfWorld, pm.desiredStateOfWorld)
这个metrics.Register的作用是?