DaemonSetController

image

cmd\kube-controller-manager\app\apps.go
startDaemonSetController
func startDaemonSetController(ctx context.Context, controllerContext ControllerContext) (controller.Interface, bool, error) {
	dsc, err := daemon.NewDaemonSetsController(
		controllerContext.InformerFactory.Apps().V1().DaemonSets(),
		controllerContext.InformerFactory.Apps().V1().ControllerRevisions(),
		controllerContext.InformerFactory.Core().V1().Pods(),
		controllerContext.InformerFactory.Core().V1().Nodes(), // 关注节点信息
		controllerContext.ClientBuilder.ClientOrDie("daemon-set-controller"),
		flowcontrol.NewBackOff(1*time.Second, 15*time.Minute),
	)
	if err != nil {
		return nil, true, fmt.Errorf("error creating DaemonSets controller: %v", err)
	}
	go dsc.Run(ctx, int(controllerContext.ComponentConfig.DaemonSetController.ConcurrentDaemonSetSyncs))
	return nil, true, nil
}
pkg\controller\daemon\daemon_controller.go
enqueue
func (dsc *DaemonSetsController) enqueue(ds *apps.DaemonSet) {
	key, err := controller.KeyFunc(ds)
	...
	dsc.queue.Add(key)
}
addDaemonset
func (dsc *DaemonSetsController) addDaemonset(obj interface{}) {
	ds := obj.(*apps.DaemonSet)
	dsc.enqueueDaemonSet(ds) // 直接入队
}
updateDaemonset
func (dsc *DaemonSetsController) updateDaemonset(cur, old interface{}) {
	oldDS := old.(*apps.DaemonSet)
	curDS := cur.(*apps.DaemonSet)
	if curDS.UID != oldDS.UID {
		key, err := controller.KeyFunc(oldDS)
		dsc.deleteDaemonset(cache.DeletedFinalStateUnknown{
			Key: key,
			Obj: oldDS,
		})	// 当前ds更新了需要删除老ds
	}
	dsc.enqueueDaemonSet(curDS)
}
deleteDaemonset
func (dsc *DaemonSetsController) deleteDaemonset(obj interface{}) {
	ds, ok := obj.(*apps.DaemonSet)
	if !ok {
		tombstone, ok := obj.(cache.DeletedFinalStateUnknown)
		ds, ok = tombstone.Obj.(*apps.DaemonSet)
	}
	...
	dsc.queue.Add(key)	// 入队
}
addHistory
func (dsc *DaemonSetsController) addHistory(obj interface{}) {
	// 获取控制器历史
	history := obj.(*apps.ControllerRevision)
	if history.DeletionTimestamp != nil {
		// 已经被清理的控制器直接走删除逻辑
		dsc.deleteHistory(history)
		return
	}
	// 否则验证老控制器下是否有ds
	if controllerRef := metav1.GetControllerOf(history); controllerRef != nil {
		ds := dsc.resolveControllerRef(history.Namespace, controllerRef)
		if ds == nil {
			return
		}
		return
	}
	// 如果存在ds,那么就是孤儿ds,入队看当前版本控制器是否有对应适配的控制情况
	daemonSets := dsc.getDaemonSetsForHistory(history)
	for _, ds := range daemonSets {
		dsc.enqueueDaemonSet(ds)
	}
}
updateHistory
func (dsc *DaemonSetsController) updateHistory(old, cur interface{}) {
	curHistory := cur.(*apps.ControllerRevision)
	oldHistory := old.(*apps.ControllerRevision)
	curControllerRef := metav1.GetControllerOf(curHistory)
	oldControllerRef := metav1.GetControllerOf(oldHistory)
	controllerRefChanged := !reflect.DeepEqual(curControllerRef, oldControllerRef)
	if controllerRefChanged && oldControllerRef != nil {
		// 控制器版本变更了,需要更新老ds
		if ds := dsc.resolveControllerRef(oldHistory.Namespace, oldControllerRef); ds != nil {
			dsc.enqueueDaemonSet(ds)
		}
	}
	// 标签更新也需要更新所有ds
	labelChanged := !reflect.DeepEqual(curHistory.Labels, oldHistory.Labels)
	if labelChanged || controllerRefChanged {
		for _, ds := range daemonSets {
			dsc.enqueueDaemonSet(ds)
		}
	}
}
deleteHistory
func (dsc *DaemonSetsController) deleteHistory(obj interface{}) {
	history, ok := obj.(*apps.ControllerRevision)
	if !ok {
		tombstone, ok := obj.(cache.DeletedFinalStateUnknown)
		history, ok = tombstone.Obj.(*apps.ControllerRevision)
	}
	controllerRef := metav1.GetControllerOf(history)
	ds := dsc.resolveControllerRef(history.Namespace, controllerRef)
	dsc.enqueueDaemonSet(ds)	// 获取ds直接入队删除处理
}
addPod
unc (dsc *DaemonSetsController) addPod(obj interface{}) {
	pod := obj.(*v1.Pod)
	if pod.DeletionTimestamp != nil {
		dsc.deletePod(pod)	// 已经被删
		return
	}
	// pod可以找到当前版本自己的ds
	if controllerRef := metav1.GetControllerOf(pod); controllerRef != nil {
		ds := dsc.resolveControllerRef(pod.Namespace, controllerRef)
		dsKey, err := controller.KeyFunc(ds)
		dsc.expectations.CreationObserved(dsKey)
		dsc.enqueueDaemonSet(ds)
		return
	}

	// 孤儿pod查找自己的控制器并处理
	dss := dsc.getDaemonSetsForPod(pod)
	if len(dss) == 0 {
		return
	}
	for _, ds := range dss {
		dsc.enqueueDaemonSet(ds)
	}
}
updatePod
func (dsc *DaemonSetsController) updatePod(old, cur interface{}) {
	curPod := cur.(*v1.Pod)
	oldPod := old.(*v1.Pod)
	// 删除处理
	if curPod.DeletionTimestamp != nil {
		dsc.deletePod(curPod)
		return
	}

	curControllerRef := metav1.GetControllerOf(curPod)
	oldControllerRef := metav1.GetControllerOf(oldPod)
	controllerRefChanged := !reflect.DeepEqual(curControllerRef, oldControllerRef)
	if controllerRefChanged && oldControllerRef != nil {
		if ds := dsc.resolveControllerRef(oldPod.Namespace, oldControllerRef); ds != nil {
			dsc.enqueueDaemonSet(ds)
		}
	}

	// If it has a ControllerRef, that's all that matters.
	if curControllerRef != nil {
		ds := dsc.resolveControllerRef(curPod.Namespace, curControllerRef)
		if ds == nil {
			return
		}
		klog.V(4).Infof("Pod %s updated.", curPod.Name)
		dsc.enqueueDaemonSet(ds)
		changedToReady := !podutil.IsPodReady(oldPod) && podutil.IsPodReady(curPod)
		if changedToReady && ds.Spec.MinReadySeconds > 0 {
			dsc.enqueueDaemonSetAfter(ds, (time.Duration(ds.Spec.MinReadySeconds)*time.Second)+time.Second)
		}
		return
	}

	// 孤儿pod
	dss := dsc.getDaemonSetsForPod(curPod)
	if len(dss) == 0 {
		return
	}
	klog.V(4).Infof("Orphan Pod %s updated.", curPod.Name)
	labelChanged := !reflect.DeepEqual(curPod.Labels, oldPod.Labels)
	if labelChanged || controllerRefChanged {
		for _, ds := range dss {
			dsc.enqueueDaemonSet(ds)
		}
	}
}
deletePod
func (dsc *DaemonSetsController) deletePod(obj interface{}) {
	pod, ok := obj.(*v1.Pod)
	if !ok {
		tombstone, ok := obj.(cache.DeletedFinalStateUnknown)
		pod, ok = tombstone.Obj.(*v1.Pod)
	}
	cotrollerRef := metav1.GetControllerOf(pod)
	ds := dsc.resolveControllerRef(pod.Namespace, controllerRef)
	dsKey, err := controller.KeyFunc(ds)
	dsc.expectations.DeletionObserved(dsKey)
	dsc.enqueueDaemonSet(ds) // 找到对应ds进行删除处理
}
addNode
func (dsc *DaemonSetsController) addNode(obj interface{}) {
	// 获取所有ds
	dsList, err := dsc.dsLister.List(labels.Everything())
	node := obj.(*v1.Node)
	for _, ds := range dsList {
		if shouldRun, _ := NodeShouldRunDaemonPod(node, ds); shouldRun {
			dsc.enqueueDaemonSet(ds)	// 计算ds是否应该运行在该node上
		}
	}
}
updateNode
func (dsc *DaemonSetsController) updateNode(old, cur interface{}) {
	oldNode := old.(*v1.Node)
	curNode := cur.(*v1.Node)
	// 获取所有ds
	dsList, err := dsc.dsLister.List(labels.Everything())
	// 计算node之前是否运行ds跟现在是否运行ds状况是否相同,不同则需要入队处理
	for _, ds := range dsList {
		oldShouldRun, oldShouldContinueRunning := NodeShouldRunDaemonPod(oldNode, ds)
		currentShouldRun, currentShouldContinueRunning := NodeShouldRunDaemonPod(curNode, ds)
		if (oldShouldRun != currentShouldRun) || (oldShouldContinueRunning != currentShouldContinueRunning) {
			dsc.enqueueDaemonSet(ds)
		}
	}
}
NodeShouldRunDaemonPod
func NodeShouldRunDaemonPod(node *v1.Node, ds *apps.DaemonSet) (bool, bool) {
	pod := NewPod(ds, node.Name)
	// 非目标节点忽略
	if !(ds.Spec.Template.Spec.NodeName == "" || ds.Spec.Template.Spec.NodeName == node.Name) {
		return false, false
	}
	taints := node.Spec.Taints
	fitsNodeName, fitsNodeAffinity, fitsTaints := predicates(pod, node, taints)
	if !fitsNodeName || !fitsNodeAffinity {
		// 不匹配亲和性目标节点忽略
		return false, false
	}
	// 不符合污点shouldRun为false
	if !fitsTaints {
		// 如果已经在运行了则要验证是否可以容忍污点确认是否要驱逐
		_, hasUntoleratedTaint := v1helper.FindMatchingUntoleratedTaint(taints, pod.Spec.Tolerations, func(t *v1.Taint) bool {
			return t.Effect == v1.TaintEffectNoExecute
		})
		return false, !hasUntoleratedTaint
	}
	return true, true
}
predicates
func predicates(pod *v1.Pod, node *v1.Node, taints []v1.Taint) (fitsNodeName, fitsNodeAffinity, fitsTaints bool) {
	// 节点匹配
	fitsNodeName = len(pod.Spec.NodeName) == 0 || pod.Spec.NodeName == node.Name
	// 匹配节点亲和性
	fitsNodeAffinity, _ = nodeaffinity.GetRequiredNodeAffinity(pod).Match(node)
	_, hasUntoleratedTaint := v1helper.FindMatchingUntoleratedTaint(taints, pod.Spec.Tolerations, func(t *v1.Taint) bool {
		return t.Effect == v1.TaintEffectNoExecute || t.Effect == v1.TaintEffectNoSchedule
	})
	// 匹配污点
	fitsTaints = !hasUntoleratedTaint
	return
}
syncDaemonSet
func (dsc *DaemonSetsController) syncDaemonSet(ctx context.Context, key string) error {
	namespace, name, err := cache.SplitMetaNamespaceKey(key)
	// 获取目标ds
	ds, err := dsc.dsLister.DaemonSets(namespace).Get(name)
	if apierrors.IsNotFound(err) {
		// 找不到的就该被删除
		dsc.expectations.DeleteExpectations(key)
		return nil
	}
	// 所有节点
	nodeList, err := dsc.nodeLister.List(labels.Everything())
	if err != nil {
		return fmt.Errorf("couldn't get list of nodes when syncing daemon set %#v: %v", ds, err)
	}
	...
	// 如果存在正在被删除的DaemonSet,那么暂时结束此次操作
	if ds.DeletionTimestamp != nil {
		return nil
	}
	// Construct histories of the DaemonSet, and get the hash of current history
	// 构建新ds hash
	cur, old, err := dsc.constructHistory(ctx, ds)
	monSetUniqueLabelKey]
	if !dsc.expectations.SatisfiedExpectations(dsKey) {
		// ds不满足预期运行则只更新status
		return dsc.updateDaemonSetStatus(ctx, ds, nodeList, hash, false)
	}
	// 处理ds
	err = dsc.manage(ctx, ds, nodeList, hash)
	if dsc.expectations.SatisfiedExpectations(dsKey) {
		switch ds.Spec.UpdateStrategy.Type {
		case apps.RollingUpdateDaemonSetStrategyType:
			// 升级ds
			err = dsc.rollingUpdate(ctx, ds, nodeList, hash)
	}
	// 清理ds历史
	err = dsc.cleanupHistory(ctx, ds, old)
	// 更新ds status
	return dsc.updateDaemonSetStatus(ctx, ds, nodeList, hash, true)
}
manage
func (dsc *DaemonSetsController) manage(ctx context.Context, ds *apps.DaemonSet, nodeList []*v1.Node, hash string) error {
	// 找出要运行ds的具体pod对应节点
	nodeToDaemonPods, err := dsc.getNodesToDaemonPods(ctx, ds)
	// 计算所有节点,对该运行pod的节点进行创建,不该运行的进行停止
	var nodesNeedingDaemonPods, podsToDelete []string
	for _, node := range nodeList {
		nodesNeedingDaemonPodsOnNode, podsToDeleteOnNode := dsc.podsShouldBeOnNode(
			node, nodeToDaemonPods, ds, hash)
		nodesNeedingDaemonPods = append(nodesNeedingDaemonPods, nodesNeedingDaemonPodsOnNode...)
		podsToDelete = append(podsToDelete, podsToDeleteOnNode...)
	}
	podsToDelete = append(podsToDelete, getUnscheduledPodsWithoutNode(nodeList, nodeToDaemonPods)...)
	// 更新节点
	if err = dsc.syncNodes(ctx, ds, podsToDelete, nodesNeedingDaemonPods, hash); err != nil {
		return err
	}
	return nil
}
podsShouldBeOnNode
func (dsc *DaemonSetsController) podsShouldBeOnNode(
	node *v1.Node,
	nodeToDaemonPods map[string][]*v1.Pod,
	ds *apps.DaemonSet,
	hash string,
) (nodesNeedingDaemonPods, podsToDelete []string) {
shouldRun, shouldContinueRunning := dsc.nodeShouldRunDaemonPod(node, ds)
	daemonPods, exists := nodeToDaemonPods[node.Name]

	switch {
	case shouldRun && !exists:
		// 如果需要并且不存在则创建
		nodesNeedingDaemonPods = append(nodesNeedingDaemonPods, node.Name)
	case shouldContinueRunning:
		// 如果创建失败则删除
		...
	case !shouldContinueRunning && exists:
		// 如果不应该运行但是存在则要删除他
		for _, pod := range daemonPods {
			if pod.DeletionTimestamp != nil {
				continue
			}
			podsToDelete = append(podsToDelete, pod.Name)
		}
	}
	return nodesNeedingDaemonPods, podsToDelete
}
syncNodes
func (dsc *DaemonSetsController) syncNodes(ctx context.Context, ds *apps.DaemonSet, podsToDelete, nodesNeedingDaemonPods []string, hash string) error {
	...
	// 批量创建pod
	createWait.Add(batchSize)
		for i := pos; i < pos+batchSize; i++ {
			go func(ix int) {
				defer createWait.Done()

				podTemplate := template.DeepCopy()
				podTemplate.Spec.Affinity = util.ReplaceDaemonSetPodNodeNameNodeAffinity(
					podTemplate.Spec.Affinity, nodesNeedingDaemonPods[ix])
				err := dsc.podControl.CreatePods(ctx, ds.Namespace, podTemplate,
					ds, metav1.NewControllerRef(ds, controllerKind))
			}(i)
		}
		createWait.Wait()
	...
	// 删除pod
	deleteWait := sync.WaitGroup{}
	deleteWait.Add(deleteDiff)
	for i := 0; i < deleteDiff; i++ {
		go func(ix int) {
			defer deleteWait.Done()
			if err := dsc.podControl.DeletePod(ctx, ds.Namespace, podsToDelete[ix], ds); err != nil {
				dsc.expectations.DeletionObserved(dsKey)
			}
		}(i)
	}
	deleteWait.Wait()
}
cleanupHistory
func (dsc *DaemonSetsController) cleanupHistory(ctx context.Context, ds *apps.DaemonSet, old []*apps.ControllerRevision) error {
	nodesToDaemonPods, err := dsc.getNodesToDaemonPods(ctx, ds)
	// 保留版本历史足够忽略
	toKeep := int(*ds.Spec.RevisionHistoryLimit)
	toKill := len(old) - toKeep
	if toKill <= 0 {
		return nil
	}

	// 当前运行的必须保留
	liveHashes := make(map[string]bool)
	for _, pods := range nodesToDaemonPods {
		for _, pod := range pods {
			if hash := pod.Labels[apps.DefaultDaemonSetUniqueLabelKey]; len(hash) > 0 {
				liveHashes[hash] = true
			}
		}
	}
	// 对历史版本排序,清理最久的版本直到满足设置的数量
	sort.Sort(historiesByRevision(old))
	for _, history := range old {
		if toKill <= 0 {
			break
		}
		if hash := history.Labels[apps.DefaultDaemonSetUniqueLabelKey]; liveHashes[hash] {
			continue
		}
		// 清理
		err := dsc.kubeClient.AppsV1().ControllerRevisions(ds.Namespace).Delete(ctx, history.Name, metav1.DeleteOptions{})
		if err != nil {
			return err
		}
		toKill--
	}
	return nil
}
posted @ 2022-02-10 14:51  北方姆Q  阅读(94)  评论(0编辑  收藏  举报