kubelet之pod_manager

Manager定义了访问Pod的一系列方法,维护了static podmirror pod之间的关系。
kubelet从三个来源获取pod变化:file,http,apiserverpod中除了apiserver这一来源
pod都叫static pod(apiserver并不知道这些pod的存在)。为了让apiserver感知到这些pod的存在,
kubelet为每个static pod都创建了mirror pod,这些mirror pod只能查看不能修改。
mirror pod作为static pod的副本(尽管有不同的元数据如UID…)拥有一样的全名称(相同的名称和命名空间)。
kubelet通过pod全名apiserver上报pod状态。当static pod被删除,对应的mirror pod也会被删除。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
// pkg/kubelet/pod/pod_manager.go
type Manager interface {
// GetPods returns the regular pods bound to the kubelet and their spec.
GetPods() []*v1.Pod
// GetPodByFullName returns the (non-mirror) pod that matches full name, as well as
// whether the pod was found.
GetPodByFullName(podFullName string) (*v1.Pod, bool)
// GetPodByName provides the (non-mirror) pod that matches namespace and
// name, as well as whether the pod was found.
GetPodByName(namespace, name string) (*v1.Pod, bool)
// GetPodByUID provides the (non-mirror) pod that matches pod UID, as well as
// whether the pod is found.
GetPodByUID(types.UID) (*v1.Pod, bool)
// GetPodByMirrorPod returns the static pod for the given mirror pod and
// whether it was known to the pod manager.
GetPodByMirrorPod(*v1.Pod) (*v1.Pod, bool)
// GetMirrorPodByPod returns the mirror pod for the given static pod and
// whether it was known to the pod manager.
GetMirrorPodByPod(*v1.Pod) (*v1.Pod, bool)
// GetPodsAndMirrorPods returns the both regular and mirror pods.
GetPodsAndMirrorPods() ([]*v1.Pod, []*v1.Pod)
// SetPods replaces the internal pods with the new pods.
// It is currently only used for testing.
SetPods(pods []*v1.Pod)
// AddPod adds the given pod to the manager.
AddPod(pod *v1.Pod)
// UpdatePod updates the given pod in the manager.
UpdatePod(pod *v1.Pod)
// DeletePod deletes the given pod from the manager. For mirror pods,
// this means deleting the mappings related to mirror pods. For non-
// mirror pods, this means deleting from indexes for all non-mirror pods.
DeletePod(pod *v1.Pod)
// GetOrphanedMirrorPodNames returns names of orphaned mirror pods
GetOrphanedMirrorPodNames() []string
// TranslatePodUID returns the actual UID of a pod. If the UID belongs to
// a mirror pod, returns the UID of its static pod. Otherwise, returns the
// original UID.
//
// All public-facing functions should perform this translation for UIDs
// because user may provide a mirror pod UID, which is not recognized by
// internal Kubelet functions.
TranslatePodUID(uid types.UID) kubetypes.ResolvedPodUID
// GetUIDTranslations returns the mappings of static pod UIDs to mirror pod
// UIDs and mirror pod UIDs to static pod UIDs.
GetUIDTranslations() (podToMirror map[kubetypes.ResolvedPodUID]kubetypes.MirrorPodUID, mirrorToPod map[kubetypes.MirrorPodUID]kubetypes.ResolvedPodUID)
// IsMirrorPodOf returns true if mirrorPod is a correct representation of
// pod; false otherwise.
IsMirrorPodOf(mirrorPod, pod *v1.Pod) bool

MirrorClient
}

// basicManager实现Manager接口方法
type basicManager struct {
// Protects all internal maps.
lock sync.RWMutex

// Regular pods indexed by UID.
podByUID map[kubetypes.ResolvedPodUID]*v1.Pod
// Mirror pods indexed by UID.
mirrorPodByUID map[kubetypes.MirrorPodUID]*v1.Pod

// Pods indexed by full name for easy access.
podByFullName map[string]*v1.Pod
mirrorPodByFullName map[string]*v1.Pod

// Mirror pod UID to pod UID map.
translationByUID map[kubetypes.MirrorPodUID]kubetypes.ResolvedPodUID

// A mirror pod client to create/delete mirror pods.
// MirrorClient实现了CreateMirrorPod和DeleteMirrorPod方法
MirrorClient
}

//
func NewBasicPodManager(client MirrorClient) Manager {
pm := &basicManager{}
pm.MirrorClient = client
pm.SetPods(nil)
return pm
}

// Set the internal pods based on the new pods.
func (pm *basicManager) SetPods(newPods []*v1.Pod) {
pm.lock.Lock()
defer pm.lock.Unlock()

// 初始化对象
pm.podByUID = make(map[kubetypes.ResolvedPodUID]*v1.Pod)
pm.podByFullName = make(map[string]*v1.Pod)
pm.mirrorPodByUID = make(map[kubetypes.MirrorPodUID]*v1.Pod)
pm.mirrorPodByFullName = make(map[string]*v1.Pod)
pm.translationByUID = make(map[kubetypes.MirrorPodUID]kubetypes.ResolvedPodUID)

pm.updatePodsInternal(newPods...)
}

//添加一个pod
func (pm *basicManager) AddPod(pod *v1.Pod) {
pm.UpdatePod(pod)
}

// 更新pod
func (pm *basicManager) UpdatePod(pod *v1.Pod) {
pm.lock.Lock()
defer pm.lock.Unlock()
pm.updatePodsInternal(pod)
}

// updateMetrics updates the metrics surfaced by the pod manager.
// oldPod or newPod may be nil to signify creation or deletion.
func updateMetrics(oldPod, newPod *v1.Pod) {
var numEC int
if oldPod != nil {
numEC -= len(oldPod.Spec.EphemeralContainers)
}
if newPod != nil {
numEC += len(newPod.Spec.EphemeralContainers)
}
if numEC != 0 {
metrics.ManagedEphemeralContainers.Add(float64(numEC))
}
}

// updatePodsInternal replaces the given pods in the current state of the
// manager, updating the various indices. The caller is assumed to hold the
// lock.
// 更新缓存中的pod,调用此方法需要加锁
func (pm *basicManager) updatePodsInternal(pods ...*v1.Pod) {
for _, pod := range pods {
podFullName := kubecontainer.GetPodFullName(pod)
// This logic relies on a static pod and its mirror to have the same name.
// It is safe to type convert here due to the IsMirrorPod guard.
if kubetypes.IsMirrorPod(pod) {
mirrorPodUID := kubetypes.MirrorPodUID(pod.UID)
pm.mirrorPodByUID[mirrorPodUID] = pod
pm.mirrorPodByFullName[podFullName] = pod
if p, ok := pm.podByFullName[podFullName]; ok {
pm.translationByUID[mirrorPodUID] = kubetypes.ResolvedPodUID(p.UID)
}
} else {
resolvedPodUID := kubetypes.ResolvedPodUID(pod.UID)
updateMetrics(pm.podByUID[resolvedPodUID], pod)
pm.podByUID[resolvedPodUID] = pod
pm.podByFullName[podFullName] = pod
if mirror, ok := pm.mirrorPodByFullName[podFullName]; ok {
pm.translationByUID[kubetypes.MirrorPodUID(mirror.UID)] = resolvedPodUID
}
}
}
}

func (pm *basicManager) DeletePod(pod *v1.Pod) {
updateMetrics(pod, nil)
pm.lock.Lock()
defer pm.lock.Unlock()
podFullName := kubecontainer.GetPodFullName(pod)
// It is safe to type convert here due to the IsMirrorPod guard.
if kubetypes.IsMirrorPod(pod) {
mirrorPodUID := kubetypes.MirrorPodUID(pod.UID)
delete(pm.mirrorPodByUID, mirrorPodUID)
delete(pm.mirrorPodByFullName, podFullName)
delete(pm.translationByUID, mirrorPodUID)
} else {
delete(pm.podByUID, kubetypes.ResolvedPodUID(pod.UID))
delete(pm.podByFullName, podFullName)
}
}

// 获取缓存中的pods
func (pm *basicManager) GetPods() []*v1.Pod {
pm.lock.RLock()
defer pm.lock.RUnlock()
return podsMapToPods(pm.podByUID)
}

func (pm *basicManager) GetPodsAndMirrorPods() ([]*v1.Pod, []*v1.Pod) {
pm.lock.RLock()
defer pm.lock.RUnlock()
pods := podsMapToPods(pm.podByUID)
mirrorPods := mirrorPodsMapToMirrorPods(pm.mirrorPodByUID)
return pods, mirrorPods
}

func (pm *basicManager) GetPodByUID(uid types.UID) (*v1.Pod, bool) {
pm.lock.RLock()
defer pm.lock.RUnlock()
pod, ok := pm.podByUID[kubetypes.ResolvedPodUID(uid)] // Safe conversion, map only holds non-mirrors.
return pod, ok
}

func (pm *basicManager) GetPodByName(namespace, name string) (*v1.Pod, bool) {
podFullName := kubecontainer.BuildPodFullName(name, namespace)
return pm.GetPodByFullName(podFullName)
}

func (pm *basicManager) GetPodByFullName(podFullName string) (*v1.Pod, bool) {
pm.lock.RLock()
defer pm.lock.RUnlock()
pod, ok := pm.podByFullName[podFullName]
return pod, ok
}

func (pm *basicManager) TranslatePodUID(uid types.UID) kubetypes.ResolvedPodUID {
// It is safe to type convert to a resolved UID because type conversion is idempotent.
if uid == "" {
return kubetypes.ResolvedPodUID(uid)
}

pm.lock.RLock()
defer pm.lock.RUnlock()
if translated, ok := pm.translationByUID[kubetypes.MirrorPodUID(uid)]; ok {
return translated
}
return kubetypes.ResolvedPodUID(uid)
}

func (pm *basicManager) GetUIDTranslations() (podToMirror map[kubetypes.ResolvedPodUID]kubetypes.MirrorPodUID,
mirrorToPod map[kubetypes.MirrorPodUID]kubetypes.ResolvedPodUID) {
pm.lock.RLock()
defer pm.lock.RUnlock()

podToMirror = make(map[kubetypes.ResolvedPodUID]kubetypes.MirrorPodUID, len(pm.translationByUID))
mirrorToPod = make(map[kubetypes.MirrorPodUID]kubetypes.ResolvedPodUID, len(pm.translationByUID))
// Insert empty translation mapping for all static pods.
for uid, pod := range pm.podByUID {
if !kubetypes.IsStaticPod(pod) {
continue
}
podToMirror[uid] = ""
}
// Fill in translations. Notice that if there is no mirror pod for a
// static pod, its uid will be translated into empty string "". This
// is WAI, from the caller side we can know that the static pod doesn't
// have a corresponding mirror pod instead of using static pod uid directly.
for k, v := range pm.translationByUID {
mirrorToPod[k] = v
podToMirror[v] = k
}
return podToMirror, mirrorToPod
}

func (pm *basicManager) GetOrphanedMirrorPodNames() []string {
pm.lock.RLock()
defer pm.lock.RUnlock()
var podFullNames []string
for podFullName := range pm.mirrorPodByFullName {
if _, ok := pm.podByFullName[podFullName]; !ok {
podFullNames = append(podFullNames, podFullName)
}
}
return podFullNames
}

func (pm *basicManager) IsMirrorPodOf(mirrorPod, pod *v1.Pod) bool {
// Check name and namespace first.
if pod.Name != mirrorPod.Name || pod.Namespace != mirrorPod.Namespace {
return false
}
hash, ok := getHashFromMirrorPod(mirrorPod)
if !ok {
return false
}
return hash == getPodHash(pod)
}

func podsMapToPods(UIDMap map[kubetypes.ResolvedPodUID]*v1.Pod) []*v1.Pod {
pods := make([]*v1.Pod, 0, len(UIDMap))
for _, pod := range UIDMap {
pods = append(pods, pod)
}
return pods
}

func mirrorPodsMapToMirrorPods(UIDMap map[kubetypes.MirrorPodUID]*v1.Pod) []*v1.Pod {
pods := make([]*v1.Pod, 0, len(UIDMap))
for _, pod := range UIDMap {
pods = append(pods, pod)
}
return pods
}

func (pm *basicManager) GetMirrorPodByPod(pod *v1.Pod) (*v1.Pod, bool) {
pm.lock.RLock()
defer pm.lock.RUnlock()
mirrorPod, ok := pm.mirrorPodByFullName[kubecontainer.GetPodFullName(pod)]
return mirrorPod, ok
}

func (pm *basicManager) GetPodByMirrorPod(mirrorPod *v1.Pod) (*v1.Pod, bool) {
pm.lock.RLock()
defer pm.lock.RUnlock()
pod, ok := pm.podByFullName[kubecontainer.GetPodFullName(mirrorPod)]
return pod, ok
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
// pkg/kubelet/pod/mirror_client.go
type MirrorClient interface {
// CreateMirrorPod creates a mirror pod in the API server for the given
// pod or returns an error. The mirror pod will have the same annotations
// as the given pod as well as an extra annotation containing the hash of
// the static pod.
CreateMirrorPod(pod *v1.Pod) error
// DeleteMirrorPod deletes the mirror pod with the given full name from
// the API server or returns an error.
DeleteMirrorPod(podFullName string, uid *types.UID) (bool, error)
}

// nodeGetter is a subset of NodeLister, simplified for testing.
type nodeGetter interface {
// Get retrieves the Node for a given name.
Get(name string) (*v1.Node, error)
}

// basicMirrorClient is a functional MirrorClient. Mirror pods are stored in
// the kubelet directly because they need to be in sync with the internal
// pods.
type basicMirrorClient struct {
apiserverClient clientset.Interface
nodeGetter nodeGetter
nodeName string
}

// NewBasicMirrorClient returns a new MirrorClient.
func NewBasicMirrorClient(apiserverClient clientset.Interface, nodeName string, nodeGetter nodeGetter) MirrorClient {
return &basicMirrorClient{
apiserverClient: apiserverClient,
nodeName: nodeName,
nodeGetter: nodeGetter,
}
}

// 创建MirrorPod
func (mc *basicMirrorClient) CreateMirrorPod(pod *v1.Pod) error {
if mc.apiserverClient == nil {
return nil
}
// Make a copy of the pod.
copyPod := *pod
copyPod.Annotations = make(map[string]string)

for k, v := range pod.Annotations {
copyPod.Annotations[k] = v
}
hash := getPodHash(pod)
copyPod.Annotations[kubetypes.ConfigMirrorAnnotationKey] = hash

// With the MirrorPodNodeRestriction feature, mirror pods are required to have an owner reference
// to the owning node.
// See https://git.k8s.io/enhancements/keps/sig-auth/1314-node-restriction-pods/README.md
nodeUID, err := mc.getNodeUID()
if err != nil {
return fmt.Errorf("failed to get node UID: %v", err)
}
controller := true
copyPod.OwnerReferences = []metav1.OwnerReference{{
APIVersion: v1.SchemeGroupVersion.String(),
Kind: "Node",
Name: mc.nodeName,
UID: nodeUID,
Controller: &controller,
}}

apiPod, err := mc.apiserverClient.CoreV1().Pods(copyPod.Namespace).Create(context.TODO(), &copyPod, metav1.CreateOptions{})
if err != nil && apierrors.IsAlreadyExists(err) {
// Check if the existing pod is the same as the pod we want to create.
if h, ok := apiPod.Annotations[kubetypes.ConfigMirrorAnnotationKey]; ok && h == hash {
return nil
}
}
return err
}

// DeleteMirrorPod deletes a mirror pod.
// It takes the full name of the pod and optionally a UID. If the UID
// is non-nil, the pod is deleted only if its UID matches the supplied UID.
// It returns whether the pod was actually deleted, and any error returned
// while parsing the name of the pod.
// Non-existence of the pod or UID mismatch is not treated as an error; the
// routine simply returns false in that case.
func (mc *basicMirrorClient) DeleteMirrorPod(podFullName string, uid *types.UID) (bool, error) {
if mc.apiserverClient == nil {
return false, nil
}
name, namespace, err := kubecontainer.ParsePodFullName(podFullName)
if err != nil {
klog.ErrorS(err, "Failed to parse a pod full name", "podFullName", podFullName)
return false, err
}

var uidValue types.UID
if uid != nil {
uidValue = *uid
}
klog.V(2).InfoS("Deleting a mirror pod", "pod", klog.KRef(namespace, name), "podUID", uidValue)

var GracePeriodSeconds int64
if err := mc.apiserverClient.CoreV1().Pods(namespace).Delete(context.TODO(), name, metav1.DeleteOptions{GracePeriodSeconds: &GracePeriodSeconds, Preconditions: &metav1.Preconditions{UID: uid}}); err != nil {
// Unfortunately, there's no generic error for failing a precondition
if !(apierrors.IsNotFound(err) || apierrors.IsConflict(err)) {
// We should return the error here, but historically this routine does
// not return an error unless it can't parse the pod name
klog.ErrorS(err, "Failed deleting a mirror pod", "pod", klog.KRef(namespace, name))
}
return false, nil
}
return true, nil
}

func (mc *basicMirrorClient) getNodeUID() (types.UID, error) {
node, err := mc.nodeGetter.Get(mc.nodeName)
if err != nil {
return "", err
}
if node.UID == "" {
return "", fmt.Errorf("UID unset for node %s", mc.nodeName)
}
return node.UID, nil
}

func getHashFromMirrorPod(pod *v1.Pod) (string, bool) {
hash, ok := pod.Annotations[kubetypes.ConfigMirrorAnnotationKey]
return hash, ok
}

func getPodHash(pod *v1.Pod) string {
// The annotation exists for all static pods.
return pod.Annotations[kubetypes.ConfigHashAnnotationKey]
}

REF:
1.pkg/kubelet/pod/mirror_client.go
2.pkg/kubelet/pod/pod_manager.go