Browse code

daemon: handleContainerExit: ignore networking errors

Prior to commit fe856b9, containers' network sandbox and interfaces were
created before the containerd task. Now, it's created after.

If this step fails, the containerd task is forcefully deleted, and an
event is sent to the c8d event monitor, which triggers `handleContainerExit`.
Then this method tries to restart the faulty container.

This leads to containers with a published port already in use to be
stuck in a tight restart loop (if they're started with
`--restart=always`) until the port is available. This is needlessly
spamming the daemon logs.

Prior to that commit, a published port already in use wouldn't trigger
the restart process.

This commit adds a check to `handleContainerExit` to ignore exit events
if the latest container error is related to networking setup.

Signed-off-by: Albin Kerouanton <albinker@gmail.com>

Albin Kerouanton authored on 2025/02/20 22:43:11
Showing 4 changed files
... ...
@@ -37,6 +37,8 @@ import (
37 37
 	"go.opentelemetry.io/otel/trace"
38 38
 )
39 39
 
40
+const errSetupNetworking = "failed to set up container networking"
41
+
40 42
 func ipAddresses(ips []net.IP) []string {
41 43
 	var addrs []string
42 44
 	for _, ip := range ips {
... ...
@@ -3,6 +3,7 @@ package daemon // import "github.com/docker/docker/daemon"
3 3
 import (
4 4
 	"context"
5 5
 	"strconv"
6
+	"strings"
6 7
 	"time"
7 8
 
8 9
 	"github.com/containerd/log"
... ...
@@ -32,6 +33,20 @@ func (daemon *Daemon) handleContainerExit(c *container.Container, e *libcontaine
32 32
 	var ctrExitStatus container.ExitStatus
33 33
 	c.Lock()
34 34
 
35
+	// If the latest container error is related to networking setup, don't try
36
+	// to restart the container, and don't change the container state to
37
+	// 'exited'. This happens when, for example, [daemon.allocateNetwork] fails
38
+	// due to published ports being already in use. In that case, we want to
39
+	// keep the container in the 'created' state.
40
+	//
41
+	// c.ErrorMsg is set by [daemon.containerStart], and doesn't preserve the
42
+	// error type (because this field is persisted on disk). So, use string
43
+	// matching instead of usual error comparison methods.
44
+	if strings.Contains(c.ErrorMsg, errSetupNetworking) {
45
+		c.Unlock()
46
+		return nil
47
+	}
48
+
35 49
 	cfg := daemon.config()
36 50
 
37 51
 	// Health checks will be automatically restarted if/when the
... ...
@@ -34,5 +34,8 @@ func (daemon *Daemon) initializeCreatedTask(
34 34
 			return errdefs.System(err)
35 35
 		}
36 36
 	}
37
-	return daemon.allocateNetwork(ctx, cfg, ctr)
37
+	if err := daemon.allocateNetwork(ctx, cfg, ctr); err != nil {
38
+		return fmt.Errorf("%s: %w", errSetupNetworking, err)
39
+	}
40
+	return nil
38 41
 }
... ...
@@ -19,6 +19,7 @@ import (
19 19
 	"github.com/docker/docker/libnetwork/netlabel"
20 20
 	"github.com/docker/docker/testutil"
21 21
 	"github.com/docker/docker/testutil/daemon"
22
+	"github.com/docker/go-connections/nat"
22 23
 	"github.com/vishvananda/netlink"
23 24
 	"gotest.tools/v3/assert"
24 25
 	is "gotest.tools/v3/assert/cmp"
... ...
@@ -423,3 +424,33 @@ func TestEndpointWithCustomIfname(t *testing.T) {
423 423
 	assert.NilError(t, err)
424 424
 	assert.Assert(t, strings.Contains(out.Stdout, ": foobar@if"), "expected ': foobar@if' in 'ip link show':\n%s", out.Stdout)
425 425
 }
426
+
427
+// TestPublishedPortAlreadyInUse checks that a container that can't start
428
+// because of one its published port being already in use doesn't end up
429
+// triggering the restart loop.
430
+//
431
+// Regression test for: https://github.com/moby/moby/issues/49501
432
+func TestPublishedPortAlreadyInUse(t *testing.T) {
433
+	ctx := setupTest(t)
434
+	apiClient := testEnv.APIClient()
435
+
436
+	ctr1 := ctr.Run(ctx, t, apiClient,
437
+		ctr.WithCmd("top"),
438
+		ctr.WithExposedPorts("80/tcp"),
439
+		ctr.WithPortMap(nat.PortMap{"80/tcp": {{HostPort: "8000"}}}))
440
+	defer ctr.Remove(ctx, t, apiClient, ctr1, containertypes.RemoveOptions{Force: true})
441
+
442
+	ctr2 := ctr.Create(ctx, t, apiClient,
443
+		ctr.WithCmd("top"),
444
+		ctr.WithRestartPolicy(containertypes.RestartPolicyAlways),
445
+		ctr.WithExposedPorts("80/tcp"),
446
+		ctr.WithPortMap(nat.PortMap{"80/tcp": {{HostPort: "8000"}}}))
447
+	defer ctr.Remove(ctx, t, apiClient, ctr2, containertypes.RemoveOptions{Force: true})
448
+
449
+	err := apiClient.ContainerStart(ctx, ctr2, containertypes.StartOptions{})
450
+	assert.Assert(t, is.ErrorContains(err, "failed to set up container networking"))
451
+
452
+	inspect, err := apiClient.ContainerInspect(ctx, ctr2)
453
+	assert.NilError(t, err)
454
+	assert.Check(t, is.Equal(inspect.State.Status, "created"))
455
+}