kubelet源码阅读(一)

kubelet创建流程

cmd\kubelet\kubelet.go
func main() {
    // 创建kubelet命令,并运行
    command := app.NewKubeletCommand()
    code := cli.Run(command)
    os.Exit(code)
}

cmd\kubelet\app\server.go
// NewKubeletCommand creates a *cobra.Command object with default parameters
func NewKubeletCommand() *cobra.Command {
    cmd := &cobra.Command{
        // ...
        // RunE: Run but returns an error. 
        RunE: func(cmd *cobra.Command, args []string) error {
            // ...
            // run the kubelet (运行kubelet)
            return Run(ctx, kubeletServer, kubeletDeps, utilfeature.DefaultFeatureGate)    
        }
        // ...
    }
}

func Run(ctx context.Context, s *options.KubeletServer, kubeDeps *kubelet.Dependencies, featureGate featuregate.FeatureGate) error {
    // ...
    run(ctx, s, kubeDeps, featureGate)
    // ...
}


func run(ctx context.Context, s *options.KubeletServer, kubeDeps *kubelet.Dependencies, featureGate featuregate.FeatureGate) (err error) {
    if kubeDeps.ContainerManager == nil {
        kubeDeps.ContainerManager, err = cm.NewContainerManager(...)
    }

    if err := RunKubelet(ctx, s, kubeDeps); err != nil {
        return err
    }

    // health
}

// RunKubelet is responsible for setting up and running a kubelet.  
func RunKubelet(ctx context.Context, kubeServer *options.KubeletServer, kubeDeps *kubelet.Dependencies) error {
    // ...
    // 创建kubelet
    k, err := createAndInitKubelet(kubeServer,
        kubeDeps,
        hostname,
        hostnameOverridden,
        nodeName,
        nodeIPs)
    if err != nil {
        return fmt.Errorf("failed to create kubelet: %w", err)
    }

    // 运行kubelet
    startKubelet(k, podCfg, &kubeServer.KubeletConfiguration, kubeDeps, kubeServer.EnableServer)
}

func createAndInitKubelet(kubeServer *options.KubeletServer,
    kubeDeps *kubelet.Dependencies,
    hostname string,
    hostnameOverridden bool,
    nodeName types.NodeName,
    nodeIPs []net.IP) (k kubelet.Bootstrap, err error) {
    // TODO: block until all sources have delivered at least one update to the channel, or break the sync loop
    // up into "per source" synchronizations

    k, err = kubelet.NewMainKubelet(&kubeServer.KubeletConfiguration,
        kubeDeps,
        &kubeServer.ContainerRuntimeOptions,
        hostname,
        hostnameOverridden,
        nodeName,
        nodeIPs,
        kubeServer.ProviderID,
        kubeServer.CloudProvider,
        kubeServer.CertDirectory,
        kubeServer.RootDirectory,
        kubeServer.PodLogsDir,
        kubeServer.ImageCredentialProviderConfigFile,
        kubeServer.ImageCredentialProviderBinDir,
        kubeServer.RegisterNode,
        kubeServer.RegisterWithTaints,
        kubeServer.AllowedUnsafeSysctls,
        kubeServer.ExperimentalMounterPath,
        kubeServer.KernelMemcgNotification,
        kubeServer.ExperimentalNodeAllocatableIgnoreEvictionThreshold,
        kubeServer.MinimumGCAge,
        kubeServer.MaxPerPodContainerCount,
        kubeServer.MaxContainerCount,
        kubeServer.RegisterSchedulable,
        kubeServer.NodeLabels,
        kubeServer.NodeStatusMaxImages,
        kubeServer.KubeletFlags.SeccompDefault || kubeServer.KubeletConfiguration.SeccompDefault)
    if err != nil {
        return nil, err
    }

    k.BirthCry()

    // 启动垃圾回收
    k.StartGarbageCollection()

    return k, nil
}

func startKubelet(k kubelet.Bootstrap, podCfg *config.PodConfig, kubeCfg *kubeletconfiginternal.KubeletConfiguration, kubeDeps *kubelet.Dependencies, enableServer bool) {
    // start the kubelet
    go k.Run(podCfg.Updates())

    // start the kubelet server
    if enableServer {
        go k.ListenAndServe(kubeCfg, kubeDeps.TLSOptions, kubeDeps.Auth, kubeDeps.TracerProvider)
    }
    if kubeCfg.ReadOnlyPort > 0 {
        go k.ListenAndServeReadOnly(netutils.ParseIPSloppy(kubeCfg.Address), uint(kubeCfg.ReadOnlyPort), kubeDeps.TracerProvider)
    }
    go k.ListenAndServePodResources()
}
View Code

kubelet定义

pkg\kubelet\kubelet.go

// Kubelet is the main kubelet implementation.
type Kubelet struct {
    kubeletConfiguration kubeletconfiginternal.KubeletConfiguration

    // hostname is the hostname the kubelet detected or was given via flag/config
    hostname string
    // hostnameOverridden indicates the hostname was overridden via flag/config
    hostnameOverridden bool

    nodeName        types.NodeName
    runtimeCache    kubecontainer.RuntimeCache
    kubeClient      clientset.Interface
    heartbeatClient clientset.Interface
    // mirrorPodClient is used to create and delete mirror pods in the API for static
    // pods.
    mirrorPodClient kubepod.MirrorClient

    rootDirectory    string
    podLogsDirectory string

    lastObservedNodeAddressesMux sync.RWMutex
    lastObservedNodeAddresses    []v1.NodeAddress

    // onRepeatedHeartbeatFailure is called when a heartbeat operation fails more than once. optional.
    onRepeatedHeartbeatFailure func()

    // podManager stores the desired set of admitted pods and mirror pods that the kubelet should be
    // running. The actual set of running pods is stored on the podWorkers. The manager is populated
    // by the kubelet config loops which abstracts receiving configuration from many different sources
    // (api for regular pods, local filesystem or http for static pods). The manager may be consulted
    // by other components that need to see the set of desired pods. Note that not all desired pods are
    // running, and not all running pods are in the podManager - for instance, force deleting a pod
    // from the apiserver will remove it from the podManager, but the pod may still be terminating and
    // tracked by the podWorkers. Components that need to know the actual consumed resources of the
    // node or are driven by podWorkers and the sync*Pod methods (status, volume, stats) should also
    // consult the podWorkers when reconciling.
    //
    // TODO: review all kubelet components that need the actual set of pods (vs the desired set)
    // and update them to use podWorkers instead of podManager. This may introduce latency in some
    // methods, but avoids race conditions and correctly accounts for terminating pods that have
    // been force deleted or static pods that have been updated.
    // https://github.com/kubernetes/kubernetes/issues/116970
    podManager kubepod.Manager

    // podWorkers is responsible for driving the lifecycle state machine of each pod. The worker is
    // notified of config changes, updates, periodic reconciliation, container runtime updates, and
    // evictions of all desired pods and will invoke reconciliation methods per pod in separate
    // goroutines. The podWorkers are authoritative in the kubelet for what pods are actually being
    // run and their current state:
    //
    // * syncing: pod should be running (syncPod)
    // * terminating: pod should be stopped (syncTerminatingPod)
    // * terminated: pod should have all resources cleaned up (syncTerminatedPod)
    //
    // and invoke the handler methods that correspond to each state. Components within the
    // kubelet that need to know the phase of the pod in order to correctly set up or tear down
    // resources must consult the podWorkers.
    //
    // Once a pod has been accepted by the pod workers, no other pod with that same UID (and
    // name+namespace, for static pods) will be started until the first pod has fully terminated
    // and been cleaned up by SyncKnownPods. This means a pod may be desired (in API), admitted
    // (in pod manager), and requested (by invoking UpdatePod) but not start for an arbitrarily
    // long interval because a prior pod is still terminating.
    //
    // As an event-driven (by UpdatePod) controller, the podWorkers must periodically be resynced
    // by the kubelet invoking SyncKnownPods with the desired state (admitted pods in podManager).
    // Since the podManager may be unaware of some running pods due to force deletion, the
    // podWorkers are responsible for triggering a sync of pods that are no longer desired but
    // must still run to completion.
    podWorkers PodWorkers

    // evictionManager observes the state of the node for situations that could impact node stability
    // and evicts pods (sets to phase Failed with reason Evicted) to reduce resource pressure. The
    // eviction manager acts on the actual state of the node and considers the podWorker to be
    // authoritative.
    evictionManager eviction.Manager

    // probeManager tracks the set of running pods and ensures any user-defined periodic checks are
    // run to introspect the state of each pod.  The probe manager acts on the actual state of the node
    // and is notified of pods by the podWorker. The probe manager is the authoritative source of the
    // most recent probe status and is responsible for notifying the status manager, which
    // synthesizes them into the overall pod status.
    probeManager prober.Manager

    // secretManager caches the set of secrets used by running pods on this node. The podWorkers
    // notify the secretManager when pods are started and terminated, and the secretManager must
    // then keep the needed secrets up-to-date as they change.
    secretManager secret.Manager

    // configMapManager caches the set of config maps used by running pods on this node. The
    // podWorkers notify the configMapManager when pods are started and terminated, and the
    // configMapManager must then keep the needed config maps up-to-date as they change.
    configMapManager configmap.Manager

    // volumeManager observes the set of running pods and is responsible for attaching, mounting,
    // unmounting, and detaching as those pods move through their lifecycle. It periodically
    // synchronizes the set of known volumes to the set of actually desired volumes and cleans up
    // any orphaned volumes. The volume manager considers the podWorker to be authoritative for
    // which pods are running.
    volumeManager volumemanager.VolumeManager

    // statusManager receives updated pod status updates from the podWorker and updates the API
    // status of those pods to match. The statusManager is authoritative for the synthesized
    // status of the pod from the kubelet's perspective (other components own the individual
    // elements of status) and should be consulted by components in preference to assembling
    // that status themselves. Note that the status manager is downstream of the pod worker
    // and components that need to check whether a pod is still running should instead directly
    // consult the pod worker.
    statusManager status.Manager

    // allocationManager manages allocated resources for pods.
    allocationManager allocation.Manager

    // resyncInterval is the interval between periodic full reconciliations of
    // pods on this node.
    resyncInterval time.Duration

    // sourcesReady records the sources seen by the kubelet, it is thread-safe.
    sourcesReady config.SourcesReady

    // Optional, defaults to /logs/ from /var/log
    logServer http.Handler
    // Optional, defaults to simple Docker implementation
    runner kubecontainer.CommandRunner

    // cAdvisor used for container information.
    cadvisor cadvisor.Interface

    // Set to true to have the node register itself with the apiserver.
    registerNode bool
    // List of taints to add to a node object when the kubelet registers itself.
    registerWithTaints []v1.Taint
    // Set to true to have the node register itself as schedulable.
    registerSchedulable bool
    // for internal book keeping; access only from within registerWithApiserver
    registrationCompleted bool

    // dnsConfigurer is used for setting up DNS resolver configuration when launching pods.
    dnsConfigurer *dns.Configurer

    // serviceLister knows how to list services
    serviceLister serviceLister
    // serviceHasSynced indicates whether services have been sync'd at least once.
    // Check this before trusting a response from the lister.
    serviceHasSynced cache.InformerSynced
    // nodeLister knows how to list nodes
    nodeLister corelisters.NodeLister
    // nodeHasSynced indicates whether nodes have been sync'd at least once.
    // Check this before trusting a response from the node lister.
    nodeHasSynced cache.InformerSynced
    // a list of node labels to register
    nodeLabels map[string]string

    // Last timestamp when runtime responded on ping.
    // Mutex is used to protect this value.
    runtimeState *runtimeState

    // Volume plugins.
    volumePluginMgr *volume.VolumePluginMgr

    // Manages container health check results.
    livenessManager  proberesults.Manager
    readinessManager proberesults.Manager
    startupManager   proberesults.Manager

    // How long to keep idle streaming command execution/port forwarding
    // connections open before terminating them
    streamingConnectionIdleTimeout time.Duration

    // The EventRecorder to use
    recorder record.EventRecorder

    // Policy for handling garbage collection of dead containers.
    containerGC kubecontainer.GC

    // Manager for image garbage collection.
    imageManager images.ImageGCManager

    // Manager for container logs.
    containerLogManager logs.ContainerLogManager

    // Cached MachineInfo returned by cadvisor.
    machineInfoLock sync.RWMutex
    machineInfo     *cadvisorapi.MachineInfo

    // Handles certificate rotations.
    serverCertificateManager certificate.Manager

    // Cloud provider interface.
    cloud cloudprovider.Interface
    // Handles requests to cloud provider with timeout
    cloudResourceSyncManager cloudresource.SyncManager

    // Indicates that the node initialization happens in an external cloud controller
    externalCloudProvider bool
    // Reference to this node.
    nodeRef *v1.ObjectReference

    // Container runtime.
    containerRuntime kubecontainer.Runtime

    // Streaming runtime handles container streaming.
    streamingRuntime kubecontainer.StreamingRuntime

    // Container runtime service (needed by container runtime Start()).
    runtimeService internalapi.RuntimeService

    // reasonCache caches the failure reason of the last creation of all containers, which is
    // used for generating ContainerStatus.
    reasonCache *ReasonCache

    // containerRuntimeReadyExpected indicates whether container runtime being ready is expected
    // so errors are logged without verbosity guard, to avoid excessive error logs at node startup.
    // It's false during the node initialization period of nodeReadyGracePeriod, and after that
    // it's set to true by fastStatusUpdateOnce when it exits.
    containerRuntimeReadyExpected bool

    // nodeStatusUpdateFrequency specifies how often kubelet computes node status. If node lease
    // feature is not enabled, it is also the frequency that kubelet posts node status to master.
    // In that case, be cautious when changing the constant, it must work with nodeMonitorGracePeriod
    // in nodecontroller. There are several constraints:
    // 1. nodeMonitorGracePeriod must be N times more than nodeStatusUpdateFrequency, where
    //    N means number of retries allowed for kubelet to post node status. It is pointless
    //    to make nodeMonitorGracePeriod be less than nodeStatusUpdateFrequency, since there
    //    will only be fresh values from Kubelet at an interval of nodeStatusUpdateFrequency.
    //    The constant must be less than podEvictionTimeout.
    // 2. nodeStatusUpdateFrequency needs to be large enough for kubelet to generate node
    //    status. Kubelet may fail to update node status reliably if the value is too small,
    //    as it takes time to gather all necessary node information.
    nodeStatusUpdateFrequency time.Duration

    // nodeStatusReportFrequency is the frequency that kubelet posts node
    // status to master. It is only used when node lease feature is enabled.
    nodeStatusReportFrequency time.Duration

    // lastStatusReportTime is the time when node status was last reported.
    lastStatusReportTime time.Time

    // syncNodeStatusMux is a lock on updating the node status, because this path is not thread-safe.
    // This lock is used by Kubelet.syncNodeStatus and Kubelet.fastNodeStatusUpdate functions and shouldn't be used anywhere else.
    syncNodeStatusMux sync.Mutex

    // updatePodCIDRMux is a lock on updating pod CIDR, because this path is not thread-safe.
    // This lock is used by Kubelet.updatePodCIDR function and shouldn't be used anywhere else.
    updatePodCIDRMux sync.Mutex

    // updateRuntimeMux is a lock on updating runtime, because this path is not thread-safe.
    // This lock is used by Kubelet.updateRuntimeUp, Kubelet.fastNodeStatusUpdate and
    // Kubelet.HandlerSupportsUserNamespaces functions and shouldn't be used anywhere else.
    updateRuntimeMux sync.Mutex

    // nodeLeaseController claims and renews the node lease for this Kubelet
    nodeLeaseController lease.Controller

    // pleg observes the state of the container runtime and notifies the kubelet of changes to containers, which
    // notifies the podWorkers to reconcile the state of the pod (for instance, if a container dies and needs to
    // be restarted).
    pleg pleg.PodLifecycleEventGenerator

    // eventedPleg supplements the pleg to deliver edge-driven container changes with low-latency.
    eventedPleg pleg.PodLifecycleEventGenerator

    // Store kubecontainer.PodStatus for all pods.
    podCache kubecontainer.Cache

    // os is a facade for various syscalls that need to be mocked during testing.
    os kubecontainer.OSInterface

    // Watcher of out of memory events.
    oomWatcher oomwatcher.Watcher

    // Monitor resource usage
    resourceAnalyzer serverstats.ResourceAnalyzer

    // Whether or not we should have the QOS cgroup hierarchy for resource management
    cgroupsPerQOS bool

    // If non-empty, pass this to the container runtime as the root cgroup.
    cgroupRoot string

    // Mounter to use for volumes.
    mounter mount.Interface

    // hostutil to interact with filesystems
    hostutil hostutil.HostUtils

    // subpather to execute subpath actions
    subpather subpath.Interface

    // Manager of non-Runtime containers.
    containerManager cm.ContainerManager

    // Maximum Number of Pods which can be run by this Kubelet
    maxPods int

    // Monitor Kubelet's sync loop
    syncLoopMonitor atomic.Value

    // Container restart Backoff
    crashLoopBackOff *flowcontrol.Backoff

    // Information about the ports which are opened by daemons on Node running this Kubelet server.
    daemonEndpoints *v1.NodeDaemonEndpoints

    // A queue used to trigger pod workers.
    workQueue queue.WorkQueue

    // oneTimeInitializer is used to initialize modules that are dependent on the runtime to be up.
    oneTimeInitializer sync.Once

    // If set, use this IP address or addresses for the node
    nodeIPs []net.IP

    // use this function to validate the kubelet nodeIP
    nodeIPValidator func(net.IP) error

    // If non-nil, this is a unique identifier for the node in an external database, eg. cloudprovider
    providerID string

    // clock is an interface that provides time related functionality in a way that makes it
    // easy to test the code.
    clock clock.WithTicker

    // handlers called during the tryUpdateNodeStatus cycle
    setNodeStatusFuncs []func(context.Context, *v1.Node) error

    lastNodeUnschedulableLock sync.Mutex
    // maintains Node.Spec.Unschedulable value from previous run of tryUpdateNodeStatus()
    lastNodeUnschedulable bool

    // the list of handlers to call during pod admission.
    admitHandlers lifecycle.PodAdmitHandlers

    // the list of handlers to call during pod sync loop.
    lifecycle.PodSyncLoopHandlers

    // the list of handlers to call during pod sync.
    lifecycle.PodSyncHandlers

    // the number of allowed pods per core
    podsPerCore int

    // enableControllerAttachDetach indicates the Attach/Detach controller
    // should manage attachment/detachment of volumes scheduled to this node,
    // and disable kubelet from executing any attach/detach operations
    enableControllerAttachDetach bool

    // trigger deleting containers in a pod
    containerDeletor *podContainerDeletor

    // config iptables util rules
    makeIPTablesUtilChains bool

    // The AppArmor validator for checking whether AppArmor is supported.
    appArmorValidator apparmor.Validator

    // StatsProvider provides the node and the container stats.
    StatsProvider *stats.Provider

    // pluginmanager runs a set of asynchronous loops that figure out which
    // plugins need to be registered/unregistered based on this node and makes it so.
    pluginManager pluginmanager.PluginManager

    // This flag sets a maximum number of images to report in the node status.
    nodeStatusMaxImages int32

    // Handles RuntimeClass objects for the Kubelet.
    runtimeClassManager *runtimeclass.Manager

    // Handles node shutdown events for the Node.
    shutdownManager nodeshutdown.Manager

    // Manage user namespaces
    usernsManager *userns.UsernsManager

    // Mutex to serialize new pod admission and existing pod resizing
    podResizeMutex sync.Mutex

    // OpenTelemetry Tracer
    tracer trace.Tracer

    // Track node startup latencies
    nodeStartupLatencyTracker util.NodeStartupLatencyTracker

    // Health check kubelet
    healthChecker watchdog.HealthChecker

    // flagz is the Reader interface to get flags for flagz page.
    flagz flagz.Reader
}
View Code

kubelet依赖

// Dependencies is a bin for things we might consider "injected dependencies" -- objects constructed
// at runtime that are necessary for running the Kubelet. This is a temporary solution for grouping
// these objects while we figure out a more comprehensive dependency injection story for the Kubelet.
type Dependencies struct {
    Options []Option

    // Injected Dependencies
    Flagz                     flagz.Reader
    Auth                      server.AuthInterface
    CAdvisorInterface         cadvisor.Interface
    Cloud                     cloudprovider.Interface
    ContainerManager          cm.ContainerManager
    EventClient               v1core.EventsGetter
    HeartbeatClient           clientset.Interface
    OnHeartbeatFailure        func()
    KubeClient                clientset.Interface
    Mounter                   mount.Interface
    HostUtil                  hostutil.HostUtils
    OOMAdjuster               *oom.OOMAdjuster
    OSInterface               kubecontainer.OSInterface
    PodConfig                 *config.PodConfig
    ProbeManager              prober.Manager
    Recorder                  record.EventRecorder
    Subpather                 subpath.Interface
    TracerProvider            trace.TracerProvider
    VolumePlugins             []volume.VolumePlugin
    DynamicPluginProber       volume.DynamicPluginProber
    TLSOptions                *server.TLSOptions
    RemoteRuntimeService      internalapi.RuntimeService
    RemoteImageService        internalapi.ImageManagerService
    PodStartupLatencyTracker  util.PodStartupLatencyTracker
    NodeStartupLatencyTracker util.NodeStartupLatencyTracker
    // remove it after cadvisor.UsingLegacyCadvisorStats dropped.
    useLegacyCadvisorStats bool
}
View Code

NewMainKubelet

// NewMainKubelet instantiates a new Kubelet object along with all the required internal modules.
// No initialization of Kubelet and its modules should happen here.
func NewMainKubelet(kubeCfg *kubeletconfiginternal.KubeletConfiguration,
    kubeDeps *Dependencies,
    crOptions *config.ContainerRuntimeOptions,
    hostname string,
    hostnameOverridden bool,
    nodeName types.NodeName,
    nodeIPs []net.IP,
    providerID string,
    cloudProvider string,
    certDirectory string,
    rootDirectory string,
    podLogsDirectory string,
    imageCredentialProviderConfigFile string,
    imageCredentialProviderBinDir string,
    registerNode bool,
    registerWithTaints []v1.Taint,
    allowedUnsafeSysctls []string,
    experimentalMounterPath string,
    kernelMemcgNotification bool,
    experimentalNodeAllocatableIgnoreEvictionThreshold bool,
    minimumGCAge metav1.Duration,
    maxPerPodContainerCount int32,
    maxContainerCount int32,
    registerSchedulable bool,
    nodeLabels map[string]string,
    nodeStatusMaxImages int32,
    seccompDefault bool,
) (*Kubelet, error) {

}
View Code

 

posted @ 2025-09-11 17:35  xiaoxiongfei  阅读(6)  评论(0)    收藏  举报