kubelet源码阅读(二)——kubelet对device plugin的ListAndWatch过程以及容器设备信息检出代码

从kubelet运行到plugin注册过程

kubernetes-master/pkg/kubelet/kubelet.go
 1 func (kl *Kubelet) Run(updates <-chan kubetypes.PodUpdate) {
 2     // ...
 3     go wait.Until(kl.updateRuntimeUp, 5*time.Second, wait.NeverStop)
 4     // ...
 5 }
 6 
 7 func (kl *Kubelet) updateRuntimeUp() {
 8     // ...
 9     kl.runtimeState.setRuntimeState(nil)
10     kl.runtimeState.setRuntimeHandlers(s.Handlers)
11     kl.runtimeState.setRuntimeFeatures(s.Features)
12     kl.oneTimeInitializer.Do(kl.initializeRuntimeDependentModules)
13     kl.runtimeState.setRuntimeSync(kl.clock.Now())
14 }
15 
16 func (kl *Kubelet) initializeRuntimeDependentModules() {
17     // Adding Registration Callback function for DRA Plugin and Device Plugin
18     for name, handler := range kl.containerManager.GetPluginRegistrationHandlers() {
19         kl.pluginManager.AddHandler(name, handler)
20     }
21 
22     // Start the plugin manager
23     klog.V(4).InfoS("Starting plugin manager")
24     go kl.pluginManager.Run(kl.sourcesReady, wait.NeverStop)
25 }
26 
27 func (cm *containerManagerImpl) GetPluginRegistrationHandlers() map[string]cache.PluginHandler {
28     res := map[string]cache.PluginHandler{
29         pluginwatcherapi.DevicePlugin: cm.deviceManager.GetWatcherHandler(),
30     }
31 
32     if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.DynamicResourceAllocation) {
33         res[pluginwatcherapi.DRAPlugin] = cm.draManager.GetWatcherHandler()
34     }
35 
36     return res
37 }
38 
39 kubernetes-master/pkg/kubelet/cm/devicemanager/manager.go
40 // GetWatcherHandler returns the plugin handler
41 func (m *ManagerImpl) GetWatcherHandler() cache.PluginHandler {
42     return m.server
43 }

 

启动插件管理

 1 func (kl *Kubelet) initializeRuntimeDependentModules() {
 2     // Adding Registration Callback function for DRA Plugin and Device Plugin
 3     for name, handler := range kl.containerManager.GetPluginRegistrationHandlers() {
 4         kl.pluginManager.AddHandler(name, handler)
 5     }
 6 
 7     // Start the plugin manager
 8     klog.V(4).InfoS("Starting plugin manager")
 9     go kl.pluginManager.Run(kl.sourcesReady, wait.NeverStop)
10 }

详细代码

// 启动kubelet plugin manager
kubernetes-master/pkg/kubelet/pluginmanager/plugin_manager.go
func (pm *pluginManager) Run(sourcesReady config.SourcesReady, stopCh <-chan struct{}) {
    defer runtime.HandleCrash()

    if err := pm.desiredStateOfWorldPopulator.Start(stopCh); err != nil {
        klog.ErrorS(err, "The desired_state_of_world populator (plugin watcher) starts failed!")
        return
    }

    klog.V(2).InfoS("The desired_state_of_world populator (plugin watcher) starts")

    klog.InfoS("Starting Kubelet Plugin Manager")
    go pm.reconciler.Run(stopCh)

    metrics.Register(pm.actualStateOfWorld, pm.desiredStateOfWorld)
    <-stopCh
    klog.InfoS("Shutting down Kubelet Plugin Manager")
}

调谐device plugin,确保应该注册的插件已注册
kubernetes-master/pkg/kubelet/pluginmanager/reconciler/reconciler.go
func (rc *reconciler) reconcile() {
    // Unregisterations are triggered before registrations

    // 确保应该取消注册的插件已被取消注册。
    // Ensure plugins that should be unregistered are unregistered.
    for _, registeredPlugin := range rc.actualStateOfWorld.GetRegisteredPlugins() {
        unregisterPlugin := false
        if !rc.desiredStateOfWorld.PluginExists(registeredPlugin.SocketPath) {
            unregisterPlugin = true
        } else {
            // We also need to unregister the plugins that exist in both actual state of world
            // and desired state of world cache, but the timestamps don't match.
            // Iterate through desired state of world plugins and see if there's any plugin
            // with the same socket path but different timestamp.
            for _, dswPlugin := range rc.desiredStateOfWorld.GetPluginsToRegister() {
                if dswPlugin.SocketPath == registeredPlugin.SocketPath && dswPlugin.UUID != registeredPlugin.UUID {
                    klog.V(5).InfoS("An updated version of plugin has been found, unregistering the plugin first before reregistering", "plugin", registeredPlugin)
                    unregisterPlugin = true
                    break
                }
            }
        }

        if unregisterPlugin {
            klog.V(5).InfoS("Starting operationExecutor.UnregisterPlugin", "plugin", registeredPlugin)
            err := rc.operationExecutor.UnregisterPlugin(registeredPlugin, rc.actualStateOfWorld)
            if err != nil &&
                !goroutinemap.IsAlreadyExists(err) &&
                !exponentialbackoff.IsExponentialBackoff(err) {
                // Ignore goroutinemap.IsAlreadyExists and exponentialbackoff.IsExponentialBackoff errors, they are expected.
                // Log all other errors.
                klog.ErrorS(err, "OperationExecutor.UnregisterPlugin failed", "plugin", registeredPlugin)
            }
            if err == nil {
                klog.V(1).InfoS("OperationExecutor.UnregisterPlugin started", "plugin", registeredPlugin)
            }
        }
    }

    // 确保应该注册的插件已注册
    // Ensure plugins that should be registered are registered
    for _, pluginToRegister := range rc.desiredStateOfWorld.GetPluginsToRegister() {
        if !rc.actualStateOfWorld.PluginExistsWithCorrectUUID(pluginToRegister) {
            klog.V(5).InfoS("Starting operationExecutor.RegisterPlugin", "plugin", pluginToRegister)
            err := rc.operationExecutor.RegisterPlugin(pluginToRegister.SocketPath, pluginToRegister.UUID, rc.getHandlers(), rc.actualStateOfWorld)
            if err != nil &&
                !goroutinemap.IsAlreadyExists(err) &&
                !exponentialbackoff.IsExponentialBackoff(err) {
                // Ignore goroutinemap.IsAlreadyExists and exponentialbackoff.IsExponentialBackoff errors, they are expected.
                klog.ErrorS(err, "OperationExecutor.RegisterPlugin failed", "plugin", pluginToRegister)
            }
            if err == nil {
                klog.V(1).InfoS("OperationExecutor.RegisterPlugin started", "plugin", pluginToRegister)
            }
        }
    }
}

kubernetes-master/pkg/kubelet/cm/devicemanager/plugin/v1beta1/handler.go
func (s *server) RegisterPlugin(pluginName string, endpoint string, versions []string, pluginClientTimeout *time.Duration) error {
    klog.V(2).InfoS("Registering plugin at endpoint", "plugin", pluginName, "endpoint", endpoint)
    return s.connectClient(pluginName, endpoint)
}func (s *server) connectClient(name string, socketPath string) error {
    c := NewPluginClient(name, socketPath, s.chandler)

    s.registerClient(name, c)
    if err := c.Connect(); err != nil {
        s.deregisterClient(name)
        klog.ErrorS(err, "Failed to connect to new client", "resource", name)
        return err
    }

    klog.V(2).InfoS("Connected to new client", "resource", name)
    go func() {
        s.runClient(name, c)
    }()

    return nil
}

func (s *server) runClient(name string, c Client) {
    c.Run()

    c = s.getClient(name)
    if c == nil {
        return
    }

    if err := s.disconnectClient(name, c); err != nil {
        klog.ErrorS(err, "Unable to disconnect client", "resource", name, "client", c)
    }
}

 执行ListAndWatch

 1 kubernetes-master/pkg/kubelet/cm/devicemanager/plugin/v1beta1/client.go
 2 // Run is for running the device plugin gRPC client.
 3 func (c *client) Run() {
 4     stream, err := c.client.ListAndWatch(context.Background(), &api.Empty{})
 5     if err != nil {
 6         klog.ErrorS(err, "ListAndWatch ended unexpectedly for device plugin", "resource", c.resource)
 7         return
 8     }
 9 
10     // 死循环获取device plugin的设备列表
11     for {
12         response, err := stream.Recv()
13         if err != nil {
14             klog.ErrorS(err, "ListAndWatch ended unexpectedly for device plugin", "resource", c.resource)
15             return
16         }
17         klog.V(2).InfoS("State pushed for device plugin", "resource", c.resource, "resourceCapacity", len(response.Devices))
18         c.handler.PluginListAndWatchReceiver(c.resource, response)
19     }
20 }

设备接收处理及数据检出

会更新内存,并将podDevices信息存储到磁盘上。

 1 kubernetes-master/pkg/kubelet/cm/devicemanager/manager.go
 2 // PluginListAndWatchReceiver receives ListAndWatchResponse from a device plugin
 3 // and ensures that an upto date state (e.g. number of devices and device health)
 4 // is captured. Also, registered device and device to container allocation
 5 // information is checkpointed to the disk.
 6 func (m *ManagerImpl) PluginListAndWatchReceiver(resourceName string, resp *pluginapi.ListAndWatchResponse) {
 7     var devices []pluginapi.Device
 8     for _, d := range resp.Devices {
 9         devices = append(devices, *d)
10     }
11     m.genericDeviceUpdateCallback(resourceName, devices)
12 }
13 
14 func (m *ManagerImpl) genericDeviceUpdateCallback(resourceName string, devices []pluginapi.Device) {
15     healthyCount := 0
16     m.mutex.Lock()
17     m.healthyDevices[resourceName] = sets.New[string]()
18     m.unhealthyDevices[resourceName] = sets.New[string]()
19     oldDevices := m.allDevices[resourceName]
20     podsToUpdate := sets.New[string]()
21     m.allDevices[resourceName] = make(map[string]pluginapi.Device)
22     for _, dev := range devices {
23 
24         if utilfeature.DefaultFeatureGate.Enabled(features.ResourceHealthStatus) {
25             // compare with old device's health and send update to the channel if needed
26             updatePodUIDFn := func(deviceID string) {
27                 podUID, _ := m.podDevices.getPodAndContainerForDevice(deviceID)
28                 if podUID != "" {
29                     podsToUpdate.Insert(podUID)
30                 }
31             }
32             if oldDev, ok := oldDevices[dev.ID]; ok {
33                 if oldDev.Health != dev.Health {
34                     updatePodUIDFn(dev.ID)
35                 }
36             } else {
37                 // if this is a new device, it might have existed before and disappeared for a while
38                 // but still be assigned to a Pod. In this case, we need to send an update to the channel
39                 updatePodUIDFn(dev.ID)
40             }
41         }
42 
43         m.allDevices[resourceName][dev.ID] = dev
44         if dev.Health == pluginapi.Healthy {
45             m.healthyDevices[resourceName].Insert(dev.ID)
46             healthyCount++
47         } else {
48             m.unhealthyDevices[resourceName].Insert(dev.ID)
49         }
50     }
51     m.mutex.Unlock()
52 
53     if utilfeature.DefaultFeatureGate.Enabled(features.ResourceHealthStatus) {
54         if len(podsToUpdate) > 0 {
55             select {
56             case m.update <- resourceupdates.Update{PodUIDs: podsToUpdate.UnsortedList()}:
57             default:
58                 klog.ErrorS(goerrors.New("device update channel is full"), "discard pods info", "podsToUpdate", podsToUpdate.UnsortedList())
59             }
60         }
61     }
62 
63     if err := m.writeCheckpoint(); err != nil {
64         klog.ErrorS(err, "Writing checkpoint encountered")
65     }
66     klog.V(2).InfoS("Processed device updates for resource", "resourceName", resourceName, "totalCount", len(devices), "healthyCount", healthyCount)
67 }
68 
69 // 检出设备对容器分配信息到磁盘
70 // Checkpoints device to container allocation information to disk.
71 func (m *ManagerImpl) writeCheckpoint() error {
72     m.mutex.Lock()
73     registeredDevs := make(map[string][]string)
74     for resource, devices := range m.healthyDevices {
75         registeredDevs[resource] = devices.UnsortedList()
76     }
77     data := checkpoint.New(m.podDevices.toCheckpointData(),
78         registeredDevs)
79     m.mutex.Unlock()
80     err := m.checkpointManager.CreateCheckpoint(kubeletDeviceManagerCheckpoint, data)
81     if err != nil {
82         err2 := fmt.Errorf("failed to write checkpoint file %q: %v", kubeletDeviceManagerCheckpoint, err)
83         klog.ErrorS(err, "Failed to write checkpoint file")
84         return err2
85     }
86     klog.V(4).InfoS("Checkpoint file written", "checkpoint", kubeletDeviceManagerCheckpoint)
87     return nil
88 }

检出podDevices信息到磁盘

信息包括:pod、容器、资源信息、设备ID以及allocResp(envs、mounts、devices、annotations、cdi-devices)

 1 // Turns podDevices to  .
 2 func (pdev *podDevices) toCheckpointData() []checkpoint.PodDevicesEntry {
 3     var data []checkpoint.PodDevicesEntry
 4     pdev.RLock()
 5     defer pdev.RUnlock()
 6     for podUID, containerDevices := range pdev.devs {
 7         for conName, resources := range containerDevices {
 8             for resource, devices := range resources {
 9                 if devices.allocResp == nil {
10                     klog.ErrorS(nil, "Can't marshal allocResp, allocation response is missing", "podUID", podUID, "containerName", conName, "resourceName", resource)
11                     continue
12                 }
13 
14                 allocResp, err := devices.allocResp.Marshal()
15                 if err != nil {
16                     klog.ErrorS(err, "Can't marshal allocResp", "podUID", podUID, "containerName", conName, "resourceName", resource)
17                     continue
18                 }
19                 data = append(data, checkpoint.PodDevicesEntry{
20                     PodUID:        podUID,              // Pod UID
21                     ContainerName: conName,             // 容器 UID
22                     ResourceName:  resource,            // 资源名称
23                     DeviceIDs:     devices.deviceIds,   // 设备ID列表
24                     AllocResp:     allocResp})          // envs、mounts、devices、annotations、cdi-devices
25             }
26         }
27     }
28     return data
29 }

kubelet为pod的容器分配未使用的deviceid(例如:gpu 卡 id),并将信息信息记录到内存与磁盘。

即使kubelet重启,可以加载磁盘上的checkpoint,从而得知device分配情况。

checkpoint文件名称

/var/lib/kubelet/device-plugins/目录下创建文件 kubelet_internal_checkpoint

1 // kubeletDeviceManagerCheckpoint is the file name of device plugin checkpoint
2 const kubeletDeviceManagerCheckpoint = "kubelet_internal_checkpoint"

 

posted @ 2025-09-25 16:01  xiaoxiongfei  阅读(6)  评论(0)    收藏  举报