kubelet之imagegc-manager

imagegc-manager负责管理容器镜像生命周期。当磁盘的使用率达到所设置的值时会将没有使用的镜像给删除掉。

kubelet通过如下的的参数来设置ImageGC策略:

  • ImageGCHighThresholdPercent:触发gc的阈值,超过该值将会执行gc,当值为100时,不启动gc
  • ImageGCLowThresholdPercent:低于这个值不会进行gc
  • ImageMinimumGCAge:最短GC年龄(即距离首次被探测到的间隔),小于该阈值时不会被gc
1
2
3
4
5
6
// pkg/kubelet/kubelet.go
imageGCPolicy := images.ImageGCPolicy{
MinAge: kubeCfg.ImageMinimumGCAge.Duration,
HighThresholdPercent: int(kubeCfg.ImageGCHighThresholdPercent),
LowThresholdPercent: int(kubeCfg.ImageGCLowThresholdPercent),
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31

// pkg/kubelet/images/types.go
// 定义了镜像拉取时的一些错误类型
var (
// ErrImagePullBackOff - Container image pull failed, kubelet is backing off image pull
ErrImagePullBackOff = errors.New("ImagePullBackOff")

// ErrImageInspect - Unable to inspect image
ErrImageInspect = errors.New("ImageInspectError")

// ErrImagePull - General image pull error
ErrImagePull = errors.New("ErrImagePull")

// ErrImageNeverPull - Required Image is absent on host and PullPolicy is NeverPullImage
// 镜像拉取策略为Never且镜像不存在对应的节点上
ErrImageNeverPull = errors.New("ErrImageNeverPull")

// ErrInvalidImageName - Unable to parse the image name.
ErrInvalidImageName = errors.New("InvalidImageName")
)

// ImageManager provides an interface to manage the lifecycle of images.
// Implementations of this interface are expected to deal with pulling (downloading),
// managing, and deleting container images.
// Implementations are expected to abstract the underlying runtimes.
// Implementations are expected to be thread safe.
type ImageManager interface {
// 判断镜像是否被使用
EnsureImageExists(ctx context.Context, pod *v1.Pod, container *v1.Container, pullSecrets []v1.Secret, podSandboxConfig *runtimeapi.PodSandboxConfig) (string, string, error)

}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94

// pkg/kubelet/images/puller.go
type pullResult struct {
imageRef string
err error
pullDuration time.Duration
}

type imagePuller interface {
pullImage(context.Context, kubecontainer.ImageSpec, []v1.Secret, chan<- pullResult, *runtimeapi.PodSandboxConfig)
}

// 确保parallelImagePuller,serialImagePuller都实现了imagePuller接口
var _, _ imagePuller = &parallelImagePuller{}, &serialImagePuller{}

// 通过channel实现并行拉取镜像
type parallelImagePuller struct {
imageService kubecontainer.ImageService
tokens chan struct{}
}

func newParallelImagePuller(imageService kubecontainer.ImageService, maxParallelImagePulls *int32) imagePuller {
if maxParallelImagePulls == nil || *maxParallelImagePulls < 1 {
return &parallelImagePuller{imageService, nil}
}
return &parallelImagePuller{imageService, make(chan struct{}, *maxParallelImagePulls)}
}

func (pip *parallelImagePuller) pullImage(ctx context.Context, spec kubecontainer.ImageSpec, pullSecrets []v1.Secret, pullChan chan<- pullResult, podSandboxConfig *runtimeapi.PodSandboxConfig) {
go func() {
// 如果pip.tokens不为nil
if pip.tokens != nil {
// 发送数据到pip.tokens,如果channel已满则会阻塞
// 否则往下走拉取镜像
pip.tokens <- struct{}{}
// 从channel读取数据,释放一个空位
defer func() { <-pip.tokens }()
}
startTime := time.Now()
imageRef, err := pip.imageService.PullImage(ctx, spec, pullSecrets, podSandboxConfig)
pullChan <- pullResult{
imageRef: imageRef,
err: err,
pullDuration: time.Since(startTime),
}
}()
}

// Maximum number of image pull requests than can be queued.
// 最大请求排队数,如果超过了这个数则会发生阻塞
const maxImagePullRequests = 10

type serialImagePuller struct {
imageService kubecontainer.ImageService
pullRequests chan *imagePullRequest
}

func newSerialImagePuller(imageService kubecontainer.ImageService) imagePuller {
imagePuller := &serialImagePuller{imageService, make(chan *imagePullRequest, maxImagePullRequests)}
// 启动一个协程不断的从pullRequest通道中获取数据并串行拉取镜像
go wait.Until(imagePuller.processImagePullRequests, time.Second, wait.NeverStop)
return imagePuller
}

type imagePullRequest struct {
ctx context.Context
spec kubecontainer.ImageSpec
pullSecrets []v1.Secret
pullChan chan<- pullResult
podSandboxConfig *runtimeapi.PodSandboxConfig
}

// 这方法会在EnsureImageExists中调用
func (sip *serialImagePuller) pullImage(ctx context.Context, spec kubecontainer.ImageSpec, pullSecrets []v1.Secret, pullChan chan<- pullResult, podSandboxConfig *runtimeapi.PodSandboxConfig) {
sip.pullRequests <- &imagePullRequest{
ctx: ctx,
spec: spec,
pullSecrets: pullSecrets,
pullChan: pullChan,
podSandboxConfig: podSandboxConfig,
}
}

func (sip *serialImagePuller) processImagePullRequests() {
for pullRequest := range sip.pullRequests {
startTime := time.Now()
imageRef, err := sip.imageService.PullImage(pullRequest.ctx, pullRequest.spec, pullRequest.pullSecrets, pullRequest.podSandboxConfig)
pullRequest.pullChan <- pullResult{
imageRef: imageRef,
err: err,
pullDuration: time.Since(startTime),
}
}
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
// pkg/kubelet/images/image_manager.go
type ImagePodPullingTimeRecorder interface {
RecordImageStartedPulling(podUID types.UID)
RecordImageFinishedPulling(podUID types.UID)
}

// imageManager provides the functionalities for image pulling.
type imageManager struct {
recorder record.EventRecorder
imageService kubecontainer.ImageService
backOff *flowcontrol.Backoff
// It will check the presence of the image, and report the 'image pulling', image pulled' events correspondingly.
puller imagePuller

podPullingTimeRecorder ImagePodPullingTimeRecorder
}

// 确保imageManager实现了ImageManager接口
var _ ImageManager = &imageManager{}

// NewImageManager instantiates a new ImageManager object.
func NewImageManager(recorder record.EventRecorder, imageService kubecontainer.ImageService, imageBackOff *flowcontrol.Backoff, serialized bool, maxParallelImagePulls *int32, qps float32, burst int, podPullingTimeRecorder ImagePodPullingTimeRecorder) ImageManager {
imageService = throttleImagePulling(imageService, qps, burst)

var puller imagePuller
// 判断使用哪种镜像拉取策略(串行或并行)
if serialized {
puller = newSerialImagePuller(imageService)
} else {
puller = newParallelImagePuller(imageService, maxParallelImagePulls)
}
return &imageManager{
recorder: recorder,
imageService: imageService,
backOff: imageBackOff,
puller: puller,
podPullingTimeRecorder: podPullingTimeRecorder,
}
}

// shouldPullImage returns whether we should pull an image according to
// the presence and pull policy of the image.
func shouldPullImage(container *v1.Container, imagePresent bool) bool {
if container.ImagePullPolicy == v1.PullNever {
return false
}

if container.ImagePullPolicy == v1.PullAlways ||
(container.ImagePullPolicy == v1.PullIfNotPresent && (!imagePresent)) {
return true
}

return false
}

// records an event using ref, event msg. log to glog using prefix, msg, logFn
func (m *imageManager) logIt(ref *v1.ObjectReference, eventtype, event, prefix, msg string, logFn func(args ...interface{})) {
if ref != nil {
m.recorder.Event(ref, eventtype, event, msg)
} else {
logFn(fmt.Sprint(prefix, " ", msg))
}
}

// EnsureImageExists pulls the image for the specified pod and container, and returns
// (imageRef, error message, error).
// 为pod和container拉取指定的镜像
func (m *imageManager) EnsureImageExists(ctx context.Context, pod *v1.Pod, container *v1.Container, pullSecrets []v1.Secret, podSandboxConfig *runtimeapi.PodSandboxConfig) (string, string, error) {
logPrefix := fmt.Sprintf("%s/%s/%s", pod.Namespace, pod.Name, container.Image)

// 返回*v1.ObjectReference,指向container(如果这个container属于这个pod)
ref, err := kubecontainer.GenerateContainerRef(pod, container)
if err != nil {
klog.ErrorS(err, "Couldn't make a ref to pod", "pod", klog.KObj(pod), "containerName", container.Name)
}

// 如果镜像不包含tag和摘要,为这个镜像设置默认的tag, latest
image, err := applyDefaultImageTag(container.Image)
if err != nil {
msg := fmt.Sprintf("Failed to apply default image tag %q: %v", container.Image, err)
m.logIt(ref, v1.EventTypeWarning, events.FailedToInspectImage, logPrefix, msg, klog.Warning)
return "", msg, ErrInvalidImageName
}

// 获取容器注解
var podAnnotations []kubecontainer.Annotation
for k, v := range pod.GetAnnotations() {
podAnnotations = append(podAnnotations, kubecontainer.Annotation{
Name: k,
Value: v,
})
}

spec := kubecontainer.ImageSpec{
Image: image,
Annotations: podAnnotations,
}
// 返回镜像ID或digest,如果不存在则返回("", nil)
imageRef, err := m.imageService.GetImageRef(ctx, spec)
if err != nil {
msg := fmt.Sprintf("Failed to inspect image %q: %v", container.Image, err)
m.logIt(ref, v1.EventTypeWarning, events.FailedToInspectImage, logPrefix, msg, klog.Warning)
return "", msg, ErrImageInspect
}

// present为true,表示镜像已存在
present := imageRef != ""
if !shouldPullImage(container, present) {
if present {
msg := fmt.Sprintf("Container image %q already present on machine", container.Image)
m.logIt(ref, v1.EventTypeNormal, events.PulledImage, logPrefix, msg, klog.Info)
return imageRef, "", nil
}
msg := fmt.Sprintf("Container image %q is not present with pull policy of Never", container.Image)
m.logIt(ref, v1.EventTypeWarning, events.ErrImageNeverPullPolicy, logPrefix, msg, klog.Warning)
return "", msg, ErrImageNeverPull
}

backOffKey := fmt.Sprintf("%s_%s", pod.UID, container.Image)
if m.backOff.IsInBackOffSinceUpdate(backOffKey, m.backOff.Clock.Now()) {
msg := fmt.Sprintf("Back-off pulling image %q", container.Image)
m.logIt(ref, v1.EventTypeNormal, events.BackOffPullImage, logPrefix, msg, klog.Info)
return "", msg, ErrImagePullBackOff
}
m.podPullingTimeRecorder.RecordImageStartedPulling(pod.UID)
m.logIt(ref, v1.EventTypeNormal, events.PullingImage, logPrefix, fmt.Sprintf("Pulling image %q", container.Image), klog.Info)
startTime := time.Now()
pullChan := make(chan pullResult)
// 拉取镜像
m.puller.pullImage(ctx, spec, pullSecrets, pullChan, podSandboxConfig)
imagePullResult := <-pullChan
if imagePullResult.err != nil {
m.logIt(ref, v1.EventTypeWarning, events.FailedToPullImage, logPrefix, fmt.Sprintf("Failed to pull image %q: %v", container.Image, imagePullResult.err), klog.Warning)
m.backOff.Next(backOffKey, m.backOff.Clock.Now())

msg, err := evalCRIPullErr(container, imagePullResult.err)
return "", msg, err
}
m.podPullingTimeRecorder.RecordImageFinishedPulling(pod.UID)
m.logIt(ref, v1.EventTypeNormal, events.PulledImage, logPrefix, fmt.Sprintf("Successfully pulled image %q in %v (%v including waiting)",
container.Image, imagePullResult.pullDuration.Truncate(time.Millisecond), time.Since(startTime).Truncate(time.Millisecond)), klog.Info)
m.backOff.GC()
return imagePullResult.imageRef, "", nil
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
// pkg/kubelet/images/image_gc_manager.go
// 定义了ImageGCManager要实现的方法
type ImageGCManager interface {
// Applies the garbage collection policy. Errors include being unable to free
// enough space as per the garbage collection policy.
GarbageCollect(ctx context.Context) error

// Start async garbage collection of images.
Start()

GetImageList() ([]container.Image, error)

// Delete all unused images.
DeleteUnusedImages(ctx context.Context) error
}

type ImageGCPolicy struct {
// Any usage above this threshold will always trigger garbage collection.
// This is the highest usage we will allow.
// 超过高水位总会解发gc
HighThresholdPercent int

// Any usage below this threshold will never trigger garbage collection.
// This is the lowest threshold we will try to garbage collect to.
LowThresholdPercent int

// Minimum age at which an image can be garbage collected.
MinAge time.Duration
}

type realImageGCManager struct {
// Container runtime
runtime container.Runtime

// Records of images and their use.
imageRecords map[string]*imageRecord
imageRecordsLock sync.Mutex

// The image garbage collection policy in use.
policy ImageGCPolicy

// statsProvider provides stats used during image garbage collection.
statsProvider StatsProvider

// Recorder for Kubernetes events.
recorder record.EventRecorder

// Reference to this node.
nodeRef *v1.ObjectReference

// Track initialization
initialized bool

// imageCache is the cache of latest image list.
imageCache imageCache

// sandbox image exempted from GC
sandboxImage string

// tracer for recording spans
tracer trace.Tracer
}

// imageCache实现了set和get方法
type imageCache struct {
// sync.Mutex is the mutex protects the image cache.
sync.Mutex
// images is the image cache.
images []container.Image
}

func (i *imageCache) set(images []container.Image) {
i.Lock()
defer i.Unlock()
// The image list needs to be sorted when it gets read and used in
// setNodeStatusImages. We sort the list on write instead of on read,
// because the image cache is more often read than written
sort.Sort(sliceutils.ByImageSize(images))
i.images = images
}


func (i *imageCache) get() []container.Image {
i.Lock()
defer i.Unlock()
return i.images
}

// 记录追踪的镜像的一些信息
type imageRecord struct {
// Time when this image was first detected.
firstDetected time.Time

// Time when we last saw this image being used.
lastUsed time.Time

// Size of the image in bytes.
size int64

// Pinned status of the image
pinned bool
}

// 创建ImageGCManager
func NewImageGCManager(runtime container.Runtime, statsProvider StatsProvider, recorder record.EventRecorder, nodeRef *v1.ObjectReference, policy ImageGCPolicy, sandboxImage string, tracerProvider trace.TracerProvider) (ImageGCManager, error) {
// 对策略的一些判断
if policy.HighThresholdPercent < 0 || policy.HighThresholdPercent > 100 {
return nil, fmt.Errorf("invalid HighThresholdPercent %d, must be in range [0-100]", policy.HighThresholdPercent)
}
if policy.LowThresholdPercent < 0 || policy.LowThresholdPercent > 100 {
return nil, fmt.Errorf("invalid LowThresholdPercent %d, must be in range [0-100]", policy.LowThresholdPercent)
}
if policy.LowThresholdPercent > policy.HighThresholdPercent {
return nil, fmt.Errorf("LowThresholdPercent %d can not be higher than HighThresholdPercent %d", policy.LowThresholdPercent, policy.HighThresholdPercent)
}
tracer := tracerProvider.Tracer(instrumentationScope)
im := &realImageGCManager{
runtime: runtime,
policy: policy,
imageRecords: make(map[string]*imageRecord),
statsProvider: statsProvider,
recorder: recorder,
nodeRef: nodeRef,
initialized: false,
sandboxImage: sandboxImage,
tracer: tracer,
}

return im, nil
}


func (im *realImageGCManager) Start() {
ctx := context.Background()
// 5分钟执行一次detectImages
go wait.Until(func() {
// Initial detection make detected time "unknown" in the past.
var ts time.Time
if im.initialized {
ts = time.Now()
}
_, err := im.detectImages(ctx, ts)
if err != nil {
klog.InfoS("Failed to monitor images", "err", err)
} else {
im.initialized = true
}
}, 5*time.Minute, wait.NeverStop)

// Start a goroutine periodically updates image cache.
// 每30s更新下imageCache
go wait.Until(func() {
// 获取镜像列表
images, err := im.runtime.ListImages(ctx)
if err != nil {
klog.InfoS("Failed to update image list", "err", err)
} else {
im.imageCache.set(images)
}
}, 30*time.Second, wait.NeverStop)

}

func (im *realImageGCManager) detectImages(ctx context.Context, detectTime time.Time) (sets.String, error) {
// 定义一个集合记录正在使用中的镜像
imagesInUse := sets.NewString()

// Always consider the container runtime pod sandbox image in use
// sandboxImage永远被认为在使用中
imageRef, err := im.runtime.GetImageRef(ctx, container.ImageSpec{Image: im.sandboxImage})
if err == nil && imageRef != "" {
imagesInUse.Insert(imageRef)
}
// 获取当前节点上的所有镜像
images, err := im.runtime.ListImages(ctx)
if err != nil {
return imagesInUse, err
}
// 返回pod列表
pods, err := im.runtime.GetPods(ctx, true)
if err != nil {
return imagesInUse, err
}

// Make a set of images in use by containers.
// 遍历pod中的container将镜像ID添加到集合中
for _, pod := range pods {
for _, container := range pod.Containers {
klog.V(5).InfoS("Container uses image", "pod", klog.KRef(pod.Namespace, pod.Name), "containerName", container.Name, "containerImage", container.Image, "imageID", container.ImageID)
imagesInUse.Insert(container.ImageID)
}
}

// Add new images and record those being used.
now := time.Now()
currentImages := sets.NewString()
im.imageRecordsLock.Lock()
defer im.imageRecordsLock.Unlock()
// 遍历当前节点上的所有镜像
for _, image := range images {
klog.V(5).InfoS("Adding image ID to currentImages", "imageID", image.ID)
currentImages.Insert(image.ID)

// New image, set it as detected now.
if _, ok := im.imageRecords[image.ID]; !ok {
// 镜像不存在imageRecords中
klog.V(5).InfoS("Image ID is new", "imageID", image.ID)
im.imageRecords[image.ID] = &imageRecord{
firstDetected: detectTime,
}
}

// Set last used time to now if the image is being used.
if isImageUsed(image.ID, imagesInUse) {
klog.V(5).InfoS("Setting Image ID lastUsed", "imageID", image.ID, "lastUsed", now)
im.imageRecords[image.ID].lastUsed = now
}

klog.V(5).InfoS("Image ID has size", "imageID", image.ID, "size", image.Size)
im.imageRecords[image.ID].size = image.Size

klog.V(5).InfoS("Image ID is pinned", "imageID", image.ID, "pinned", image.Pinned)
im.imageRecords[image.ID].pinned = image.Pinned
}

// Remove old images from our records.
for image := range im.imageRecords {
if !currentImages.Has(image) {
klog.V(5).InfoS("Image ID is no longer present; removing from imageRecords", "imageID", image)
// 如果一个不存在这个节点的镜像列表
delete(im.imageRecords, image)
}
}

return imagesInUse, nil
}

// 会在StartGarbageCollection中被调用,每5分钟执行一次
func (im *realImageGCManager) GarbageCollect(ctx context.Context) error {
ctx, otelSpan := im.tracer.Start(ctx, "Images/GarbageCollect")
defer otelSpan.End()
// Get disk usage on disk holding images.
fsStats, err := im.statsProvider.ImageFsStats(ctx)
if err != nil {
return err
}

var capacity, available int64
if fsStats.CapacityBytes != nil {
capacity = int64(*fsStats.CapacityBytes)
}
if fsStats.AvailableBytes != nil {
available = int64(*fsStats.AvailableBytes)
}

if available > capacity {
klog.InfoS("Availability is larger than capacity", "available", available, "capacity", capacity)
available = capacity
}

// Check valid capacity.
if capacity == 0 {
err := goerrors.New("invalid capacity 0 on image filesystem")
im.recorder.Eventf(im.nodeRef, v1.EventTypeWarning, events.InvalidDiskCapacity, err.Error())
return err
}

// If over the max threshold, free enough to place us at the lower threshold.
usagePercent := 100 - int(available*100/capacity)
if usagePercent >= im.policy.HighThresholdPercent {
amountToFree := capacity*int64(100-im.policy.LowThresholdPercent)/100 - available
klog.InfoS("Disk usage on image filesystem is over the high threshold, trying to free bytes down to the low threshold", "usage", usagePercent, "highThreshold", im.policy.HighThresholdPercent, "amountToFree", amountToFree, "lowThreshold", im.policy.LowThresholdPercent)
// 删除不使用的镜像
freed, err := im.freeSpace(ctx, amountToFree, time.Now())
if err != nil {
return err
}

if freed < amountToFree {
err := fmt.Errorf("Failed to garbage collect required amount of images. Attempted to free %d bytes, but only found %d bytes eligible to free.", amountToFree, freed)
im.recorder.Eventf(im.nodeRef, v1.EventTypeWarning, events.FreeDiskSpaceFailed, err.Error())
return err
}
}

return nil
}

func (im *realImageGCManager) freeSpace(ctx context.Context, bytesToFree int64, freeTime time.Time) (int64, error) {
imagesInUse, err := im.detectImages(ctx, freeTime)
if err != nil {
return 0, err
}

im.imageRecordsLock.Lock()
defer im.imageRecordsLock.Unlock()

// Get all images in eviction order.
images := make([]evictionInfo, 0, len(im.imageRecords))
for image, record := range im.imageRecords {
if isImageUsed(image, imagesInUse) {
klog.V(5).InfoS("Image ID is being used", "imageID", image)
continue
}
// Check if image is pinned, prevent garbage collection
if record.pinned {
klog.V(5).InfoS("Image is pinned, skipping garbage collection", "imageID", image)
continue

}
images = append(images, evictionInfo{
id: image,
imageRecord: *record,
})
}
sort.Sort(byLastUsedAndDetected(images))

// Delete unused images until we've freed up enough space.
var deletionErrors []error
spaceFreed := int64(0)
for _, image := range images {
klog.V(5).InfoS("Evaluating image ID for possible garbage collection", "imageID", image.id)
// Images that are currently in used were given a newer lastUsed.
if image.lastUsed.Equal(freeTime) || image.lastUsed.After(freeTime) {
klog.V(5).InfoS("Image ID was used too recently, not eligible for garbage collection", "imageID", image.id, "lastUsed", image.lastUsed, "freeTime", freeTime)
continue
}

// Avoid garbage collect the image if the image is not old enough.
// In such a case, the image may have just been pulled down, and will be used by a container right away.
// 如果镜像加入缓存的时间 < im.policy.MinAge,则不会对镜像进行回收
if freeTime.Sub(image.firstDetected) < im.policy.MinAge {
klog.V(5).InfoS("Image ID's age is less than the policy's minAge, not eligible for garbage collection", "imageID", image.id, "age", freeTime.Sub(image.firstDetected), "minAge", im.policy.MinAge)
continue
}

// Remove image. Continue despite errors.
klog.InfoS("Removing image to free bytes", "imageID", image.id, "size", image.size)
err := im.runtime.RemoveImage(ctx, container.ImageSpec{Image: image.id})
if err != nil {
deletionErrors = append(deletionErrors, err)
continue
}
delete(im.imageRecords, image.id)
spaceFreed += image.size

if spaceFreed >= bytesToFree {
break
}
}

if len(deletionErrors) > 0 {
return spaceFreed, fmt.Errorf("wanted to free %d bytes, but freed %d bytes space with errors in image deletion: %v", bytesToFree, spaceFreed, errors.NewAggregate(deletionErrors))
}
return spaceFreed, nil
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
// cmd/kubelet/app/server.go
func createAndInitKubelet(kubeServer *options.KubeletServer,
kubeDeps *kubelet.Dependencies,
hostname string,
hostnameOverridden bool,
nodeName types.NodeName,
nodeIPs []net.IP) (k kubelet.Bootstrap, err error) {
...
k.StartGarbageCollection()
...
}

func (kl *Kubelet) StartGarbageCollection() {
...
if kl.kubeletConfiguration.ImageGCHighThresholdPercent == 100 {
klog.V(2).InfoS("ImageGCHighThresholdPercent is set 100, Disable image GC")
return
}
go wait.Until(func() {
ctx := context.Background()
if err := kl.imageManager.GarbageCollect(ctx); err != nil {
if prevImageGCFailed {
klog.ErrorS(err, "Image garbage collection failed multiple times in a row")
// Only create an event for repeated failures
kl.recorder.Eventf(kl.nodeRef, v1.EventTypeWarning, events.ImageGCFailed, err.Error())
} else {
klog.ErrorS(err, "Image garbage collection failed once. Stats initialization may not have completed yet")
}
prevImageGCFailed = true
} else {
var vLevel klog.Level = 4
if prevImageGCFailed {
vLevel = 1
prevImageGCFailed = false
}

klog.V(vLevel).InfoS("Image garbage collection succeeded")
}
}, ImageGCPeriod, wait.NeverStop)
...
}

REF:
1.pkg/kubelet/images/types.go
2.pkg/kubelet/images/image_gc_manager.go
3.pkg/kubelet/images/image_manager.go
4.pkg/kubelet/images/puller.go