cmd\kube-controller-manager\app\apps.go
startDaemonSetController
func startDaemonSetController(ctx context.Context, controllerContext ControllerContext) (controller.Interface, bool, error) {
dsc, err := daemon.NewDaemonSetsController(
controllerContext.InformerFactory.Apps().V1().DaemonSets(),
controllerContext.InformerFactory.Apps().V1().ControllerRevisions(),
controllerContext.InformerFactory.Core().V1().Pods(),
controllerContext.InformerFactory.Core().V1().Nodes(), // 关注节点信息
controllerContext.ClientBuilder.ClientOrDie("daemon-set-controller"),
flowcontrol.NewBackOff(1*time.Second, 15*time.Minute),
)
if err != nil {
return nil, true, fmt.Errorf("error creating DaemonSets controller: %v", err)
}
go dsc.Run(ctx, int(controllerContext.ComponentConfig.DaemonSetController.ConcurrentDaemonSetSyncs))
return nil, true, nil
}
pkg\controller\daemon\daemon_controller.go
enqueue
func (dsc *DaemonSetsController) enqueue(ds *apps.DaemonSet) {
key, err := controller.KeyFunc(ds)
...
dsc.queue.Add(key)
}
addDaemonset
func (dsc *DaemonSetsController) addDaemonset(obj interface{}) {
ds := obj.(*apps.DaemonSet)
dsc.enqueueDaemonSet(ds) // 直接入队
}
updateDaemonset
func (dsc *DaemonSetsController) updateDaemonset(cur, old interface{}) {
oldDS := old.(*apps.DaemonSet)
curDS := cur.(*apps.DaemonSet)
if curDS.UID != oldDS.UID {
key, err := controller.KeyFunc(oldDS)
dsc.deleteDaemonset(cache.DeletedFinalStateUnknown{
Key: key,
Obj: oldDS,
}) // 当前ds更新了需要删除老ds
}
dsc.enqueueDaemonSet(curDS)
}
deleteDaemonset
func (dsc *DaemonSetsController) deleteDaemonset(obj interface{}) {
ds, ok := obj.(*apps.DaemonSet)
if !ok {
tombstone, ok := obj.(cache.DeletedFinalStateUnknown)
ds, ok = tombstone.Obj.(*apps.DaemonSet)
}
...
dsc.queue.Add(key) // 入队
}
addHistory
func (dsc *DaemonSetsController) addHistory(obj interface{}) {
// 获取控制器历史
history := obj.(*apps.ControllerRevision)
if history.DeletionTimestamp != nil {
// 已经被清理的控制器直接走删除逻辑
dsc.deleteHistory(history)
return
}
// 否则验证老控制器下是否有ds
if controllerRef := metav1.GetControllerOf(history); controllerRef != nil {
ds := dsc.resolveControllerRef(history.Namespace, controllerRef)
if ds == nil {
return
}
return
}
// 如果存在ds,那么就是孤儿ds,入队看当前版本控制器是否有对应适配的控制情况
daemonSets := dsc.getDaemonSetsForHistory(history)
for _, ds := range daemonSets {
dsc.enqueueDaemonSet(ds)
}
}
updateHistory
func (dsc *DaemonSetsController) updateHistory(old, cur interface{}) {
curHistory := cur.(*apps.ControllerRevision)
oldHistory := old.(*apps.ControllerRevision)
curControllerRef := metav1.GetControllerOf(curHistory)
oldControllerRef := metav1.GetControllerOf(oldHistory)
controllerRefChanged := !reflect.DeepEqual(curControllerRef, oldControllerRef)
if controllerRefChanged && oldControllerRef != nil {
// 控制器版本变更了,需要更新老ds
if ds := dsc.resolveControllerRef(oldHistory.Namespace, oldControllerRef); ds != nil {
dsc.enqueueDaemonSet(ds)
}
}
// 标签更新也需要更新所有ds
labelChanged := !reflect.DeepEqual(curHistory.Labels, oldHistory.Labels)
if labelChanged || controllerRefChanged {
for _, ds := range daemonSets {
dsc.enqueueDaemonSet(ds)
}
}
}
deleteHistory
func (dsc *DaemonSetsController) deleteHistory(obj interface{}) {
history, ok := obj.(*apps.ControllerRevision)
if !ok {
tombstone, ok := obj.(cache.DeletedFinalStateUnknown)
history, ok = tombstone.Obj.(*apps.ControllerRevision)
}
controllerRef := metav1.GetControllerOf(history)
ds := dsc.resolveControllerRef(history.Namespace, controllerRef)
dsc.enqueueDaemonSet(ds) // 获取ds直接入队删除处理
}
addPod
unc (dsc *DaemonSetsController) addPod(obj interface{}) {
pod := obj.(*v1.Pod)
if pod.DeletionTimestamp != nil {
dsc.deletePod(pod) // 已经被删
return
}
// pod可以找到当前版本自己的ds
if controllerRef := metav1.GetControllerOf(pod); controllerRef != nil {
ds := dsc.resolveControllerRef(pod.Namespace, controllerRef)
dsKey, err := controller.KeyFunc(ds)
dsc.expectations.CreationObserved(dsKey)
dsc.enqueueDaemonSet(ds)
return
}
// 孤儿pod查找自己的控制器并处理
dss := dsc.getDaemonSetsForPod(pod)
if len(dss) == 0 {
return
}
for _, ds := range dss {
dsc.enqueueDaemonSet(ds)
}
}
updatePod
func (dsc *DaemonSetsController) updatePod(old, cur interface{}) {
curPod := cur.(*v1.Pod)
oldPod := old.(*v1.Pod)
// 删除处理
if curPod.DeletionTimestamp != nil {
dsc.deletePod(curPod)
return
}
curControllerRef := metav1.GetControllerOf(curPod)
oldControllerRef := metav1.GetControllerOf(oldPod)
controllerRefChanged := !reflect.DeepEqual(curControllerRef, oldControllerRef)
if controllerRefChanged && oldControllerRef != nil {
if ds := dsc.resolveControllerRef(oldPod.Namespace, oldControllerRef); ds != nil {
dsc.enqueueDaemonSet(ds)
}
}
// If it has a ControllerRef, that's all that matters.
if curControllerRef != nil {
ds := dsc.resolveControllerRef(curPod.Namespace, curControllerRef)
if ds == nil {
return
}
klog.V(4).Infof("Pod %s updated.", curPod.Name)
dsc.enqueueDaemonSet(ds)
changedToReady := !podutil.IsPodReady(oldPod) && podutil.IsPodReady(curPod)
if changedToReady && ds.Spec.MinReadySeconds > 0 {
dsc.enqueueDaemonSetAfter(ds, (time.Duration(ds.Spec.MinReadySeconds)*time.Second)+time.Second)
}
return
}
// 孤儿pod
dss := dsc.getDaemonSetsForPod(curPod)
if len(dss) == 0 {
return
}
klog.V(4).Infof("Orphan Pod %s updated.", curPod.Name)
labelChanged := !reflect.DeepEqual(curPod.Labels, oldPod.Labels)
if labelChanged || controllerRefChanged {
for _, ds := range dss {
dsc.enqueueDaemonSet(ds)
}
}
}
deletePod
func (dsc *DaemonSetsController) deletePod(obj interface{}) {
pod, ok := obj.(*v1.Pod)
if !ok {
tombstone, ok := obj.(cache.DeletedFinalStateUnknown)
pod, ok = tombstone.Obj.(*v1.Pod)
}
cotrollerRef := metav1.GetControllerOf(pod)
ds := dsc.resolveControllerRef(pod.Namespace, controllerRef)
dsKey, err := controller.KeyFunc(ds)
dsc.expectations.DeletionObserved(dsKey)
dsc.enqueueDaemonSet(ds) // 找到对应ds进行删除处理
}
addNode
func (dsc *DaemonSetsController) addNode(obj interface{}) {
// 获取所有ds
dsList, err := dsc.dsLister.List(labels.Everything())
node := obj.(*v1.Node)
for _, ds := range dsList {
if shouldRun, _ := NodeShouldRunDaemonPod(node, ds); shouldRun {
dsc.enqueueDaemonSet(ds) // 计算ds是否应该运行在该node上
}
}
}
updateNode
func (dsc *DaemonSetsController) updateNode(old, cur interface{}) {
oldNode := old.(*v1.Node)
curNode := cur.(*v1.Node)
// 获取所有ds
dsList, err := dsc.dsLister.List(labels.Everything())
// 计算node之前是否运行ds跟现在是否运行ds状况是否相同,不同则需要入队处理
for _, ds := range dsList {
oldShouldRun, oldShouldContinueRunning := NodeShouldRunDaemonPod(oldNode, ds)
currentShouldRun, currentShouldContinueRunning := NodeShouldRunDaemonPod(curNode, ds)
if (oldShouldRun != currentShouldRun) || (oldShouldContinueRunning != currentShouldContinueRunning) {
dsc.enqueueDaemonSet(ds)
}
}
}
NodeShouldRunDaemonPod
func NodeShouldRunDaemonPod(node *v1.Node, ds *apps.DaemonSet) (bool, bool) {
pod := NewPod(ds, node.Name)
// 非目标节点忽略
if !(ds.Spec.Template.Spec.NodeName == "" || ds.Spec.Template.Spec.NodeName == node.Name) {
return false, false
}
taints := node.Spec.Taints
fitsNodeName, fitsNodeAffinity, fitsTaints := predicates(pod, node, taints)
if !fitsNodeName || !fitsNodeAffinity {
// 不匹配亲和性目标节点忽略
return false, false
}
// 不符合污点shouldRun为false
if !fitsTaints {
// 如果已经在运行了则要验证是否可以容忍污点确认是否要驱逐
_, hasUntoleratedTaint := v1helper.FindMatchingUntoleratedTaint(taints, pod.Spec.Tolerations, func(t *v1.Taint) bool {
return t.Effect == v1.TaintEffectNoExecute
})
return false, !hasUntoleratedTaint
}
return true, true
}
predicates
func predicates(pod *v1.Pod, node *v1.Node, taints []v1.Taint) (fitsNodeName, fitsNodeAffinity, fitsTaints bool) {
// 节点匹配
fitsNodeName = len(pod.Spec.NodeName) == 0 || pod.Spec.NodeName == node.Name
// 匹配节点亲和性
fitsNodeAffinity, _ = nodeaffinity.GetRequiredNodeAffinity(pod).Match(node)
_, hasUntoleratedTaint := v1helper.FindMatchingUntoleratedTaint(taints, pod.Spec.Tolerations, func(t *v1.Taint) bool {
return t.Effect == v1.TaintEffectNoExecute || t.Effect == v1.TaintEffectNoSchedule
})
// 匹配污点
fitsTaints = !hasUntoleratedTaint
return
}
syncDaemonSet
func (dsc *DaemonSetsController) syncDaemonSet(ctx context.Context, key string) error {
namespace, name, err := cache.SplitMetaNamespaceKey(key)
// 获取目标ds
ds, err := dsc.dsLister.DaemonSets(namespace).Get(name)
if apierrors.IsNotFound(err) {
// 找不到的就该被删除
dsc.expectations.DeleteExpectations(key)
return nil
}
// 所有节点
nodeList, err := dsc.nodeLister.List(labels.Everything())
if err != nil {
return fmt.Errorf("couldn't get list of nodes when syncing daemon set %#v: %v", ds, err)
}
...
// 如果存在正在被删除的DaemonSet,那么暂时结束此次操作
if ds.DeletionTimestamp != nil {
return nil
}
// Construct histories of the DaemonSet, and get the hash of current history
// 构建新ds hash
cur, old, err := dsc.constructHistory(ctx, ds)
monSetUniqueLabelKey]
if !dsc.expectations.SatisfiedExpectations(dsKey) {
// ds不满足预期运行则只更新status
return dsc.updateDaemonSetStatus(ctx, ds, nodeList, hash, false)
}
// 处理ds
err = dsc.manage(ctx, ds, nodeList, hash)
if dsc.expectations.SatisfiedExpectations(dsKey) {
switch ds.Spec.UpdateStrategy.Type {
case apps.RollingUpdateDaemonSetStrategyType:
// 升级ds
err = dsc.rollingUpdate(ctx, ds, nodeList, hash)
}
// 清理ds历史
err = dsc.cleanupHistory(ctx, ds, old)
// 更新ds status
return dsc.updateDaemonSetStatus(ctx, ds, nodeList, hash, true)
}
manage
func (dsc *DaemonSetsController) manage(ctx context.Context, ds *apps.DaemonSet, nodeList []*v1.Node, hash string) error {
// 找出要运行ds的具体pod对应节点
nodeToDaemonPods, err := dsc.getNodesToDaemonPods(ctx, ds)
// 计算所有节点,对该运行pod的节点进行创建,不该运行的进行停止
var nodesNeedingDaemonPods, podsToDelete []string
for _, node := range nodeList {
nodesNeedingDaemonPodsOnNode, podsToDeleteOnNode := dsc.podsShouldBeOnNode(
node, nodeToDaemonPods, ds, hash)
nodesNeedingDaemonPods = append(nodesNeedingDaemonPods, nodesNeedingDaemonPodsOnNode...)
podsToDelete = append(podsToDelete, podsToDeleteOnNode...)
}
podsToDelete = append(podsToDelete, getUnscheduledPodsWithoutNode(nodeList, nodeToDaemonPods)...)
// 更新节点
if err = dsc.syncNodes(ctx, ds, podsToDelete, nodesNeedingDaemonPods, hash); err != nil {
return err
}
return nil
}
podsShouldBeOnNode
func (dsc *DaemonSetsController) podsShouldBeOnNode(
node *v1.Node,
nodeToDaemonPods map[string][]*v1.Pod,
ds *apps.DaemonSet,
hash string,
) (nodesNeedingDaemonPods, podsToDelete []string) {
shouldRun, shouldContinueRunning := dsc.nodeShouldRunDaemonPod(node, ds)
daemonPods, exists := nodeToDaemonPods[node.Name]
switch {
case shouldRun && !exists:
// 如果需要并且不存在则创建
nodesNeedingDaemonPods = append(nodesNeedingDaemonPods, node.Name)
case shouldContinueRunning:
// 如果创建失败则删除
...
case !shouldContinueRunning && exists:
// 如果不应该运行但是存在则要删除他
for _, pod := range daemonPods {
if pod.DeletionTimestamp != nil {
continue
}
podsToDelete = append(podsToDelete, pod.Name)
}
}
return nodesNeedingDaemonPods, podsToDelete
}
syncNodes
func (dsc *DaemonSetsController) syncNodes(ctx context.Context, ds *apps.DaemonSet, podsToDelete, nodesNeedingDaemonPods []string, hash string) error {
...
// 批量创建pod
createWait.Add(batchSize)
for i := pos; i < pos+batchSize; i++ {
go func(ix int) {
defer createWait.Done()
podTemplate := template.DeepCopy()
podTemplate.Spec.Affinity = util.ReplaceDaemonSetPodNodeNameNodeAffinity(
podTemplate.Spec.Affinity, nodesNeedingDaemonPods[ix])
err := dsc.podControl.CreatePods(ctx, ds.Namespace, podTemplate,
ds, metav1.NewControllerRef(ds, controllerKind))
}(i)
}
createWait.Wait()
...
// 删除pod
deleteWait := sync.WaitGroup{}
deleteWait.Add(deleteDiff)
for i := 0; i < deleteDiff; i++ {
go func(ix int) {
defer deleteWait.Done()
if err := dsc.podControl.DeletePod(ctx, ds.Namespace, podsToDelete[ix], ds); err != nil {
dsc.expectations.DeletionObserved(dsKey)
}
}(i)
}
deleteWait.Wait()
}
cleanupHistory
func (dsc *DaemonSetsController) cleanupHistory(ctx context.Context, ds *apps.DaemonSet, old []*apps.ControllerRevision) error {
nodesToDaemonPods, err := dsc.getNodesToDaemonPods(ctx, ds)
// 保留版本历史足够忽略
toKeep := int(*ds.Spec.RevisionHistoryLimit)
toKill := len(old) - toKeep
if toKill <= 0 {
return nil
}
// 当前运行的必须保留
liveHashes := make(map[string]bool)
for _, pods := range nodesToDaemonPods {
for _, pod := range pods {
if hash := pod.Labels[apps.DefaultDaemonSetUniqueLabelKey]; len(hash) > 0 {
liveHashes[hash] = true
}
}
}
// 对历史版本排序,清理最久的版本直到满足设置的数量
sort.Sort(historiesByRevision(old))
for _, history := range old {
if toKill <= 0 {
break
}
if hash := history.Labels[apps.DefaultDaemonSetUniqueLabelKey]; liveHashes[hash] {
continue
}
// 清理
err := dsc.kubeClient.AppsV1().ControllerRevisions(ds.Namespace).Delete(ctx, history.Name, metav1.DeleteOptions{})
if err != nil {
return err
}
toKill--
}
return nil
}