kube-controller之ttl

在k8s中TTLController负责根据集群大小在Node上设置ttl annotations。TTL annotations告诉kubelet在重新请求APISerever之前
可以缓存资源对象(比如: secrets,configmap)多长时间。
// pkg/controller/ttl/ttl_controller.go
// Controller sets ttl annotations on nodes, based on cluster size.
type Controller struct {
	kubeClient clientset.Interface

	// 缓存集群节点信息
	nodeStore listers.NodeLister

	// Nodes that need to be synced.
	queue workqueue.RateLimitingInterface

	// Returns true if all underlying informers are synced.
	hasSynced func() bool

	lock sync.RWMutex

	// 集群节点大小
	nodeCount int

    // 期望的TTL
	desiredTTLSeconds int


    // 表示目前所处的集群规模大小
    // 0, 1, 2, 3, 4
    // boundaryStep  --> 节点范围  --> desiredTTLSeconds
    // 0 --> [0,100]     0
    // 1 --> [90, 500]   15
    // 2 --> [450, 100]  30
    // 3 --> [900, 2000] 60
    // 4 --> [1800, ~]   300
	boundaryStep int
}

// NewTTLController creates a new TTLController
func NewTTLController(ctx context.Context, nodeInformer informers.NodeInformer, kubeClient clientset.Interface) *Controller {
	ttlc := &Controller{
		kubeClient: kubeClient,
		queue:      workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "ttlcontroller"),
	}
	logger := klog.FromContext(ctx)
	nodeInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
		AddFunc: func(obj interface{}) {
			ttlc.addNode(logger, obj)
		},
		UpdateFunc: func(old, newObj interface{}) {
			ttlc.updateNode(logger, old, newObj)
		},
		DeleteFunc: ttlc.deleteNode,
	})

	ttlc.nodeStore = listers.NewNodeLister(nodeInformer.Informer().GetIndexer())
	ttlc.hasSynced = nodeInformer.Informer().HasSynced

	return ttlc
}

type ttlBoundary struct {
	sizeMin    int
	sizeMax    int
	ttlSeconds int
}

// 定义了不同节点范围的集群下的TTL
var (
	ttlBoundaries = []ttlBoundary{
		{sizeMin: 0, sizeMax: 100, ttlSeconds: 0},
		{sizeMin: 90, sizeMax: 500, ttlSeconds: 15},
		{sizeMin: 450, sizeMax: 1000, ttlSeconds: 30},
		{sizeMin: 900, sizeMax: 2000, ttlSeconds: 60},
		{sizeMin: 1800, sizeMax: math.MaxInt32, ttlSeconds: 300},
	}
)

// Run begins watching and syncing.
func (ttlc *Controller) Run(ctx context.Context, workers int) {
	defer utilruntime.HandleCrash()
	defer ttlc.queue.ShutDown()
	logger := klog.FromContext(ctx)
	logger.Info("Starting TTL controller")
	defer logger.Info("Shutting down TTL controller")

	if !cache.WaitForNamedCacheSync("TTL", ctx.Done(), ttlc.hasSynced) {
		return
	}

	for i := 0; i < workers; i++ {
		go wait.UntilWithContext(ctx, ttlc.worker, time.Second)
	}

	<-ctx.Done()
}

func (ttlc *Controller) addNode(logger klog.Logger, obj interface{}) {
	node, ok := obj.(*v1.Node)
	if !ok {
		utilruntime.HandleError(fmt.Errorf("unexpected object type: %v", obj))
		return
	}

	func() {
		ttlc.lock.Lock()
		defer ttlc.lock.Unlock()
        // 集群节点数加1
		ttlc.nodeCount++
		if ttlc.nodeCount > ttlBoundaries[ttlc.boundaryStep].sizeMax {
            // 超过当前规模的最大值，则加1进入下一规模，ttlSeconds相应的也会增加
			ttlc.boundaryStep++
			ttlc.desiredTTLSeconds = ttlBoundaries[ttlc.boundaryStep].ttlSeconds
		}
	}()
    // 入队操作
	ttlc.enqueueNode(logger, node)
}

func (ttlc *Controller) updateNode(logger klog.Logger, _, newObj interface{}) {
	node, ok := newObj.(*v1.Node)
	if !ok {
		utilruntime.HandleError(fmt.Errorf("unexpected object type: %v", newObj))
		return
	}
	// Processing all updates of nodes guarantees that we will update
	// the ttl annotation, when cluster size changes.
	// We are relying on the fact that Kubelet is updating node status
	// every 10s (or generally every X seconds), which means that whenever
	// required, its ttl annotation should be updated within that period.
	ttlc.enqueueNode(logger, node)
}

func (ttlc *Controller) deleteNode(obj interface{}) {
	_, ok := obj.(*v1.Node)
	if !ok {
		tombstone, ok := obj.(cache.DeletedFinalStateUnknown)
		if !ok {
			utilruntime.HandleError(fmt.Errorf("unexpected object type: %v", obj))
			return
		}
		_, ok = tombstone.Obj.(*v1.Node)
		if !ok {
			utilruntime.HandleError(fmt.Errorf("unexpected object types: %v", obj))
			return
		}
	}

	func() {
		ttlc.lock.Lock()
		defer ttlc.lock.Unlock()
        // 节点减1
		ttlc.nodeCount--
		if ttlc.nodeCount < ttlBoundaries[ttlc.boundaryStep].sizeMin {
            // 如果小于当前规模的最小值，则减1进入回到上一规模，ttlSeconds相应的也会减小
			ttlc.boundaryStep--
			ttlc.desiredTTLSeconds = ttlBoundaries[ttlc.boundaryStep].ttlSeconds
		}
	}()
    // 这里我们不再对节点进行处理，因为节点已经不存在了
}

// 入队操作
func (ttlc *Controller) enqueueNode(logger klog.Logger, node *v1.Node) {
	key, err := controller.KeyFunc(node)
	if err != nil {
		logger.Error(nil, "Couldn't get key for object", "object", klog.KObj(node))
		return
	}
	ttlc.queue.Add(key)
}

func (ttlc *Controller) worker(ctx context.Context) {
	for ttlc.processItem(ctx) {
	}
}

func (ttlc *Controller) processItem(ctx context.Context) bool {
	key, quit := ttlc.queue.Get()
	if quit {
		return false
	}
	defer ttlc.queue.Done(key)

    // 这里就是调谐函数
	err := ttlc.updateNodeIfNeeded(ctx, key.(string))
	if err == nil {
		ttlc.queue.Forget(key)
		return true
	}

	ttlc.queue.AddRateLimited(key)
	utilruntime.HandleError(err)
	return true
}

func (ttlc *Controller) getDesiredTTLSeconds() int {
	ttlc.lock.RLock()
	defer ttlc.lock.RUnlock()
	return ttlc.desiredTTLSeconds
}

func getIntFromAnnotation(ctx context.Context, node *v1.Node, annotationKey string) (int, bool) {
	if node.Annotations == nil {
		return 0, false
	}
	annotationValue, ok := node.Annotations[annotationKey]
	if !ok {
		return 0, false
	}
	intValue, err := strconv.Atoi(annotationValue)
	if err != nil {
		logger := klog.FromContext(ctx)
		logger.Info("Could not convert the value with annotation key for the node", "annotationValue",
			annotationValue, "annotationKey", annotationKey, "node", klog.KObj(node))
		return 0, false
	}
	return intValue, true
}

func setIntAnnotation(node *v1.Node, annotationKey string, value int) {
	if node.Annotations == nil {
		node.Annotations = make(map[string]string)
	}
	node.Annotations[annotationKey] = strconv.Itoa(value)
}

func (ttlc *Controller) patchNodeWithAnnotation(ctx context.Context, node *v1.Node, annotationKey string, value int) error {
	oldData, err := json.Marshal(node)
	if err != nil {
		return err
	}
	setIntAnnotation(node, annotationKey, value)
	newData, err := json.Marshal(node)
	if err != nil {
		return err
	}
	patchBytes, err := strategicpatch.CreateTwoWayMergePatch(oldData, newData, &v1.Node{})
	if err != nil {
		return err
	}
	_, err = ttlc.kubeClient.CoreV1().Nodes().Patch(ctx, node.Name, types.StrategicMergePatchType, patchBytes, metav1.PatchOptions{})
	logger := klog.FromContext(ctx)
	if err != nil {
		logger.V(2).Info("Failed to change ttl annotation for node", "node", klog.KObj(node), "err", err)
		return err
	}
	logger.V(2).Info("Changed ttl annotation", "node", klog.KObj(node), "TTL", time.Duration(value)*time.Second)
	return nil
}

func (ttlc *Controller) updateNodeIfNeeded(ctx context.Context, key string) error {
	node, err := ttlc.nodeStore.Get(key)
	if err != nil {
		if apierrors.IsNotFound(err) {
			return nil
		}
		return err
	}
    // 预期的TTL
	desiredTTL := ttlc.getDesiredTTLSeconds()
    // 当前的TTL
	currentTTL, ok := getIntFromAnnotation(ctx, node, v1.ObjectTTLAnnotationKey)
	if ok && currentTTL == desiredTTL {
		return nil
	}

    // 更新TTL
	return ttlc.patchNodeWithAnnotation(ctx, node.DeepCopy(), v1.ObjectTTLAnnotationKey, desiredTTL)
}
REF:
1.pkg/controller/ttl/ttl_controller.go