Browse code

container: add a span to CheckpointTo

Signed-off-by: Albin Kerouanton <albinker@gmail.com>

Albin Kerouanton authored on 2024/04/22 14:06:34
Showing 14 changed files
... ...
@@ -42,6 +42,9 @@ import (
42 42
 	"github.com/moby/sys/symlink"
43 43
 	ocispec "github.com/opencontainers/image-spec/specs-go/v1"
44 44
 	"github.com/pkg/errors"
45
+	"go.opentelemetry.io/otel"
46
+	"go.opentelemetry.io/otel/attribute"
47
+	"go.opentelemetry.io/otel/trace"
45 48
 )
46 49
 
47 50
 const (
... ...
@@ -200,7 +203,12 @@ func (container *Container) toDisk() (*Container, error) {
200 200
 
201 201
 // CheckpointTo makes the Container's current state visible to queries, and persists state.
202 202
 // Callers must hold a Container lock.
203
-func (container *Container) CheckpointTo(store *ViewDB) error {
203
+func (container *Container) CheckpointTo(ctx context.Context, store *ViewDB) error {
204
+	ctx, span := otel.Tracer("").Start(ctx, "container.CheckpointTo", trace.WithAttributes(
205
+		attribute.String("container.ID", container.ID),
206
+		attribute.String("container.Name", container.Name)))
207
+	defer span.End()
208
+
204 209
 	deepCopy, err := container.toDisk()
205 210
 	if err != nil {
206 211
 		return err
... ...
@@ -1,6 +1,7 @@
1 1
 package container // import "github.com/docker/docker/container"
2 2
 
3 3
 import (
4
+	"context"
4 5
 	"math/rand"
5 6
 	"os"
6 7
 	"path/filepath"
... ...
@@ -46,7 +47,7 @@ func TestViewSaveDelete(t *testing.T) {
46 46
 		t.Fatal(err)
47 47
 	}
48 48
 	c := newContainer(t)
49
-	if err := c.CheckpointTo(db); err != nil {
49
+	if err := c.CheckpointTo(context.Background(), db); err != nil {
50 50
 		t.Fatal(err)
51 51
 	}
52 52
 	if err := db.Delete(c); err != nil {
... ...
@@ -61,11 +62,11 @@ func TestViewAll(t *testing.T) {
61 61
 		two   = newContainer(t)
62 62
 	)
63 63
 	one.Pid = 10
64
-	if err := one.CheckpointTo(db); err != nil {
64
+	if err := one.CheckpointTo(context.Background(), db); err != nil {
65 65
 		t.Fatal(err)
66 66
 	}
67 67
 	two.Pid = 20
68
-	if err := two.CheckpointTo(db); err != nil {
68
+	if err := two.CheckpointTo(context.Background(), db); err != nil {
69 69
 		t.Fatal(err)
70 70
 	}
71 71
 
... ...
@@ -94,7 +95,7 @@ func TestViewGet(t *testing.T) {
94 94
 		one   = newContainer(t)
95 95
 	)
96 96
 	one.ImageID = "some-image-123"
97
-	if err := one.CheckpointTo(db); err != nil {
97
+	if err := one.CheckpointTo(context.Background(), db); err != nil {
98 98
 		t.Fatal(err)
99 99
 	}
100 100
 	s, err := db.Snapshot().Get(one.ID)
... ...
@@ -174,7 +175,7 @@ func TestViewWithHealthCheck(t *testing.T) {
174 174
 			Status: "starting",
175 175
 		},
176 176
 	}
177
-	if err := one.CheckpointTo(db); err != nil {
177
+	if err := one.CheckpointTo(context.Background(), db); err != nil {
178 178
 		t.Fatal(err)
179 179
 	}
180 180
 	s, err := db.Snapshot().Get(one.ID)
... ...
@@ -117,7 +117,7 @@ func (daemon *Daemon) Register(c *container.Container) error {
117 117
 	defer c.Unlock()
118 118
 
119 119
 	daemon.containers.Add(c.ID, c)
120
-	return c.CheckpointTo(daemon.containersReplica)
120
+	return c.CheckpointTo(context.TODO(), daemon.containersReplica)
121 121
 }
122 122
 
123 123
 func (daemon *Daemon) newContainer(name string, operatingSystem string, config *containertypes.Config, hostConfig *containertypes.HostConfig, imgID image.ID, managed bool) (*container.Container, error) {
... ...
@@ -1085,7 +1085,7 @@ func (daemon *Daemon) ConnectToNetwork(ctx context.Context, container *container
1085 1085
 		}
1086 1086
 	}
1087 1087
 
1088
-	return container.CheckpointTo(daemon.containersReplica)
1088
+	return container.CheckpointTo(ctx, daemon.containersReplica)
1089 1089
 }
1090 1090
 
1091 1091
 // DisconnectFromNetwork disconnects container from network n.
... ...
@@ -1119,7 +1119,7 @@ func (daemon *Daemon) DisconnectFromNetwork(ctx context.Context, container *cont
1119 1119
 		return err
1120 1120
 	}
1121 1121
 
1122
-	if err := container.CheckpointTo(daemon.containersReplica); err != nil {
1122
+	if err := container.CheckpointTo(ctx, daemon.containersReplica); err != nil {
1123 1123
 		return err
1124 1124
 	}
1125 1125
 
... ...
@@ -473,7 +473,7 @@ func (daemon *Daemon) restore(cfg *configStore) error {
473 473
 						c.Paused = false
474 474
 						daemon.setStateCounter(c)
475 475
 						daemon.initHealthMonitor(c)
476
-						if err := c.CheckpointTo(daemon.containersReplica); err != nil {
476
+						if err := c.CheckpointTo(context.TODO(), daemon.containersReplica); err != nil {
477 477
 							baseLogger.WithError(err).Error("failed to update paused container state")
478 478
 						}
479 479
 						c.Unlock()
... ...
@@ -497,7 +497,7 @@ func (daemon *Daemon) restore(cfg *configStore) error {
497 497
 					}
498 498
 					c.SetStopped(&ces)
499 499
 					daemon.Cleanup(context.TODO(), c)
500
-					if err := c.CheckpointTo(daemon.containersReplica); err != nil {
500
+					if err := c.CheckpointTo(context.TODO(), daemon.containersReplica); err != nil {
501 501
 						baseLogger.WithError(err).Error("failed to update stopped container state")
502 502
 					}
503 503
 					c.Unlock()
... ...
@@ -564,7 +564,7 @@ func (daemon *Daemon) restore(cfg *configStore) error {
564 564
 				// state and leave further processing up to them.
565 565
 				c.RemovalInProgress = false
566 566
 				c.Dead = true
567
-				if err := c.CheckpointTo(daemon.containersReplica); err != nil {
567
+				if err := c.CheckpointTo(context.TODO(), daemon.containersReplica); err != nil {
568 568
 					baseLogger.WithError(err).Error("failed to update RemovalInProgress container state")
569 569
 				} else {
570 570
 					baseLogger.Debugf("reset RemovalInProgress state for container")
... ...
@@ -1615,7 +1615,7 @@ func RemapContainerdNamespaces(config *config.Config) (ns string, pluginNs strin
1615 1615
 func (daemon *Daemon) checkpointAndSave(container *container.Container) error {
1616 1616
 	container.Lock()
1617 1617
 	defer container.Unlock()
1618
-	if err := container.CheckpointTo(daemon.containersReplica); err != nil {
1618
+	if err := container.CheckpointTo(context.TODO(), daemon.containersReplica); err != nil {
1619 1619
 		return fmt.Errorf("Error saving container state: %v", err)
1620 1620
 	}
1621 1621
 	return nil
... ...
@@ -128,7 +128,7 @@ func (daemon *Daemon) cleanupContainer(container *container.Container, config ba
128 128
 	// Save container state to disk. So that if error happens before
129 129
 	// container meta file got removed from disk, then a restart of
130 130
 	// docker should not make a dead container alive.
131
-	if err := container.CheckpointTo(daemon.containersReplica); err != nil && !os.IsNotExist(err) {
131
+	if err := container.CheckpointTo(context.WithoutCancel(context.TODO()), daemon.containersReplica); err != nil && !os.IsNotExist(err) {
132 132
 		log.G(context.TODO()).Errorf("Error saving dying container to disk: %v", err)
133 133
 	}
134 134
 	container.Unlock()
... ...
@@ -88,7 +88,7 @@ func (daemon *Daemon) killWithSignal(container *containerpkg.Container, stopSign
88 88
 
89 89
 	if !daemon.IsShuttingDown() {
90 90
 		container.HasBeenManuallyStopped = true
91
-		if err := container.CheckpointTo(daemon.containersReplica); err != nil {
91
+		if err := container.CheckpointTo(context.WithoutCancel(context.TODO()), daemon.containersReplica); err != nil {
92 92
 			log.G(context.TODO()).WithFields(log.Fields{
93 93
 				"error":     err,
94 94
 				"container": container.ID,
... ...
@@ -109,7 +109,7 @@ func (daemon *Daemon) handleContainerExit(c *container.Container, e *libcontaine
109 109
 	defer c.Unlock() // needs to be called before autoRemove
110 110
 
111 111
 	daemon.setStateCounter(c)
112
-	checkpointErr := c.CheckpointTo(daemon.containersReplica)
112
+	checkpointErr := c.CheckpointTo(context.TODO(), daemon.containersReplica)
113 113
 
114 114
 	daemon.LogContainerEventWithAttributes(c, events.ActionDie, attributes)
115 115
 
... ...
@@ -134,7 +134,7 @@ func (daemon *Daemon) handleContainerExit(c *container.Container, e *libcontaine
134 134
 				c.Lock()
135 135
 				c.SetStopped(&exitStatus)
136 136
 				daemon.setStateCounter(c)
137
-				c.CheckpointTo(daemon.containersReplica)
137
+				c.CheckpointTo(context.TODO(), daemon.containersReplica)
138 138
 				c.Unlock()
139 139
 				defer daemon.autoRemove(&cfg.Config, c)
140 140
 				if err != restartmanager.ErrRestartCanceled {
... ...
@@ -165,7 +165,7 @@ func (daemon *Daemon) ProcessEvent(id string, e libcontainerdtypes.EventType, ei
165 165
 		defer c.Unlock()
166 166
 		c.OOMKilled = true
167 167
 		daemon.updateHealthMonitor(c)
168
-		if err := c.CheckpointTo(daemon.containersReplica); err != nil {
168
+		if err := c.CheckpointTo(context.TODO(), daemon.containersReplica); err != nil {
169 169
 			return err
170 170
 		}
171 171
 
... ...
@@ -261,7 +261,7 @@ func (daemon *Daemon) ProcessEvent(id string, e libcontainerdtypes.EventType, ei
261 261
 
262 262
 			daemon.initHealthMonitor(c)
263 263
 
264
-			if err := c.CheckpointTo(daemon.containersReplica); err != nil {
264
+			if err := c.CheckpointTo(context.TODO(), daemon.containersReplica); err != nil {
265 265
 				return err
266 266
 			}
267 267
 			daemon.LogContainerEvent(c, events.ActionStart)
... ...
@@ -275,7 +275,7 @@ func (daemon *Daemon) ProcessEvent(id string, e libcontainerdtypes.EventType, ei
275 275
 			c.Paused = true
276 276
 			daemon.setStateCounter(c)
277 277
 			daemon.updateHealthMonitor(c)
278
-			if err := c.CheckpointTo(daemon.containersReplica); err != nil {
278
+			if err := c.CheckpointTo(context.TODO(), daemon.containersReplica); err != nil {
279 279
 				return err
280 280
 			}
281 281
 			daemon.LogContainerEvent(c, events.ActionPause)
... ...
@@ -289,7 +289,7 @@ func (daemon *Daemon) ProcessEvent(id string, e libcontainerdtypes.EventType, ei
289 289
 			daemon.setStateCounter(c)
290 290
 			daemon.updateHealthMonitor(c)
291 291
 
292
-			if err := c.CheckpointTo(daemon.containersReplica); err != nil {
292
+			if err := c.CheckpointTo(context.TODO(), daemon.containersReplica); err != nil {
293 293
 				return err
294 294
 			}
295 295
 			daemon.LogContainerEvent(c, events.ActionUnPause)
... ...
@@ -49,7 +49,7 @@ func (daemon *Daemon) containerPause(container *container.Container) error {
49 49
 	daemon.updateHealthMonitor(container)
50 50
 	daemon.LogContainerEvent(container, events.ActionPause)
51 51
 
52
-	if err := container.CheckpointTo(daemon.containersReplica); err != nil {
52
+	if err := container.CheckpointTo(context.WithoutCancel(context.TODO()), daemon.containersReplica); err != nil {
53 53
 		log.G(context.TODO()).WithError(err).Warn("could not save container to disk")
54 54
 	}
55 55
 
... ...
@@ -78,7 +78,7 @@ func (daemon *Daemon) ContainerRename(oldName, newName string) (retErr error) {
78 78
 		daemon.linkIndex.unlink(oldName+k, v, container)
79 79
 		daemon.containersReplica.ReleaseName(oldName + k)
80 80
 	}
81
-	if err := container.CheckpointTo(daemon.containersReplica); err != nil {
81
+	if err := container.CheckpointTo(context.TODO(), daemon.containersReplica); err != nil {
82 82
 		return err
83 83
 	}
84 84
 
... ...
@@ -92,7 +92,7 @@ func (daemon *Daemon) ContainerRename(oldName, newName string) (retErr error) {
92 92
 	defer func() {
93 93
 		if retErr != nil {
94 94
 			container.Name = oldName
95
-			if err := container.CheckpointTo(daemon.containersReplica); err != nil {
95
+			if err := container.CheckpointTo(context.WithoutCancel(context.TODO()), daemon.containersReplica); err != nil {
96 96
 				log.G(context.TODO()).WithFields(log.Fields{
97 97
 					"containerID": container.ID,
98 98
 					"error":       err,
... ...
@@ -93,7 +93,7 @@ func (daemon *Daemon) containerStart(ctx context.Context, daemonCfg *configStore
93 93
 			if container.ExitCode() == 0 {
94 94
 				container.SetExitCode(exitUnknown)
95 95
 			}
96
-			if err := container.CheckpointTo(daemon.containersReplica); err != nil {
96
+			if err := container.CheckpointTo(context.WithoutCancel(ctx), daemon.containersReplica); err != nil {
97 97
 				log.G(ctx).Errorf("%s: failed saving state on start failure: %v", container.ID, err)
98 98
 			}
99 99
 			container.Reset(false)
... ...
@@ -211,7 +211,7 @@ func (daemon *Daemon) containerStart(ctx context.Context, daemonCfg *configStore
211 211
 
212 212
 	daemon.initHealthMonitor(container)
213 213
 
214
-	if err := container.CheckpointTo(daemon.containersReplica); err != nil {
214
+	if err := container.CheckpointTo(context.WithoutCancel(ctx), daemon.containersReplica); err != nil {
215 215
 		log.G(ctx).WithError(err).WithField("container", container.ID).
216 216
 			Errorf("failed to store container")
217 217
 	}
... ...
@@ -3,6 +3,8 @@
3 3
 package daemon // import "github.com/docker/docker/daemon"
4 4
 
5 5
 import (
6
+	"context"
7
+
6 8
 	"github.com/docker/docker/container"
7 9
 )
8 10
 
... ...
@@ -11,7 +13,7 @@ func (daemon *Daemon) getLibcontainerdCreateOptions(daemonCfg *configStore, cont
11 11
 	// Ensure a runtime has been assigned to this container
12 12
 	if container.HostConfig.Runtime == "" {
13 13
 		container.HostConfig.Runtime = daemonCfg.Runtimes.Default
14
-		container.CheckpointTo(daemon.containersReplica)
14
+		container.CheckpointTo(context.WithoutCancel(context.TODO()), daemon.containersReplica)
15 15
 	}
16 16
 
17 17
 	shim, opts, err := daemonCfg.Runtimes.Get(container.HostConfig.Runtime)
... ...
@@ -41,7 +41,7 @@ func (daemon *Daemon) containerUnpause(ctr *container.Container) error {
41 41
 	daemon.updateHealthMonitor(ctr)
42 42
 	daemon.LogContainerEvent(ctr, events.ActionUnPause)
43 43
 
44
-	if err := ctr.CheckpointTo(daemon.containersReplica); err != nil {
44
+	if err := ctr.CheckpointTo(context.WithoutCancel(context.TODO()), daemon.containersReplica); err != nil {
45 45
 		log.G(context.TODO()).WithError(err).Warn("could not save container to disk")
46 46
 	}
47 47
 
... ...
@@ -45,7 +45,7 @@ func (daemon *Daemon) update(name string, hostConfig *container.HostConfig) erro
45 45
 			ctr.Lock()
46 46
 			if !ctr.RemovalInProgress && !ctr.Dead {
47 47
 				ctr.HostConfig = &backupHostConfig
48
-				ctr.CheckpointTo(daemon.containersReplica)
48
+				ctr.CheckpointTo(context.WithoutCancel(context.TODO()), daemon.containersReplica)
49 49
 			}
50 50
 			ctr.Unlock()
51 51
 		}
... ...
@@ -63,7 +63,7 @@ func (daemon *Daemon) update(name string, hostConfig *container.HostConfig) erro
63 63
 		ctr.Unlock()
64 64
 		return errCannotUpdate(ctr.ID, err)
65 65
 	}
66
-	if err := ctr.CheckpointTo(daemon.containersReplica); err != nil {
66
+	if err := ctr.CheckpointTo(context.TODO(), daemon.containersReplica); err != nil {
67 67
 		restoreConfig = true
68 68
 		ctr.Unlock()
69 69
 		return errCannotUpdate(ctr.ID, err)