Browse code

Restore the SetKey prestart hook.

Partially reverts 0046b16 "daemon: set libnetwork sandbox key w/o OCI hook"

Running SetKey to store the OCI Sandbox key after task creation, rather
than from the OCI prestart hook, meant it happened after sysctl settings
were applied by the runtime - which was the intention, we wanted to
complete Sandbox configuration after IPv6 had been disabled by a sysctl
if that was going to happen.

But, it meant '--sysctl' options for a specfic network interface caused
container task creation to fail, because the interface is only moved into
the network namespace during SetKey.

This change restores the SetKey prestart hook, and regenerates config
files that depend on the container's support for IPv6 after the task has
been created. It also adds a regression test that makes sure it's possible
to set an interface-specfic sysctl.

Signed-off-by: Rob Murray <rob.murray@docker.com>

Rob Murray authored on 2024/03/24 01:49:53
Showing 5 changed files
... ...
@@ -24,6 +24,7 @@ import (
24 24
 	"github.com/docker/docker/oci/caps"
25 25
 	"github.com/docker/docker/pkg/idtools"
26 26
 	"github.com/docker/docker/pkg/rootless/specconv"
27
+	"github.com/docker/docker/pkg/stringid"
27 28
 	volumemounts "github.com/docker/docker/volume/mounts"
28 29
 	"github.com/moby/sys/mount"
29 30
 	"github.com/moby/sys/mountinfo"
... ...
@@ -60,6 +61,28 @@ func withRlimits(daemon *Daemon, daemonCfg *dconfig.Config, c *container.Contain
60 60
 	}
61 61
 }
62 62
 
63
+// withLibnetwork sets the libnetwork hook
64
+func withLibnetwork(daemon *Daemon, daemonCfg *dconfig.Config, c *container.Container) coci.SpecOpts {
65
+	return func(ctx context.Context, _ coci.Client, _ *containers.Container, s *coci.Spec) error {
66
+		if c.Config.NetworkDisabled {
67
+			return nil
68
+		}
69
+		for _, ns := range s.Linux.Namespaces {
70
+			if ns.Type == specs.NetworkNamespace && ns.Path == "" {
71
+				if s.Hooks == nil {
72
+					s.Hooks = &specs.Hooks{}
73
+				}
74
+				shortNetCtlrID := stringid.TruncateID(daemon.netController.ID())
75
+				s.Hooks.Prestart = append(s.Hooks.Prestart, specs.Hook{
76
+					Path: filepath.Join("/proc", strconv.Itoa(os.Getpid()), "exe"),
77
+					Args: []string{"libnetwork-setkey", "-exec-root=" + daemonCfg.GetExecRoot(), c.ID, shortNetCtlrID},
78
+				})
79
+			}
80
+		}
81
+		return nil
82
+	}
83
+}
84
+
63 85
 // withRootless sets the spec to the rootless configuration
64 86
 func withRootless(daemon *Daemon, daemonCfg *dconfig.Config) coci.SpecOpts {
65 87
 	return func(_ context.Context, _ coci.Client, _ *containers.Container, s *coci.Spec) error {
... ...
@@ -1015,6 +1038,7 @@ func (daemon *Daemon) createSpec(ctx context.Context, daemonCfg *configStore, c
1015 1015
 		WithCapabilities(c),
1016 1016
 		WithSeccomp(daemon, c),
1017 1017
 		withMounts(daemon, daemonCfg, c, mounts),
1018
+		withLibnetwork(daemon, &daemonCfg.Config, c),
1018 1019
 		WithApparmor(c),
1019 1020
 		WithSelinux(c),
1020 1021
 		WithOOMScore(&c.HostConfig.OomScoreAdj),
... ...
@@ -2,14 +2,12 @@ package daemon // import "github.com/docker/docker/daemon"
2 2
 
3 3
 import (
4 4
 	"context"
5
-	"fmt"
6
-
7
-	specs "github.com/opencontainers/runtime-spec/specs-go"
8 5
 
9 6
 	"github.com/docker/docker/container"
10 7
 	"github.com/docker/docker/errdefs"
11 8
 	"github.com/docker/docker/libcontainerd/types"
12 9
 	"github.com/docker/docker/oci"
10
+	specs "github.com/opencontainers/runtime-spec/specs-go"
13 11
 )
14 12
 
15 13
 // initializeCreatedTask performs any initialization that needs to be done to
... ...
@@ -22,9 +20,7 @@ func (daemon *Daemon) initializeCreatedTask(ctx context.Context, tsk types.Task,
22 22
 			if err != nil {
23 23
 				return errdefs.System(err)
24 24
 			}
25
-			if err := sb.SetKey(fmt.Sprintf("/proc/%d/ns/net", tsk.Pid())); err != nil {
26
-				return errdefs.System(err)
27
-			}
25
+			return sb.FinishConfig()
28 26
 		}
29 27
 	}
30 28
 	return nil
... ...
@@ -675,3 +675,31 @@ func TestDisableIPv6Addrs(t *testing.T) {
675 675
 		})
676 676
 	}
677 677
 }
678
+
679
+// Test that it's possible to set a sysctl on an interface in the container.
680
+// Regression test for https://github.com/moby/moby/issues/47619
681
+func TestSetInterfaceSysctl(t *testing.T) {
682
+	skip.If(t, testEnv.DaemonInfo.OSType == "windows", "no sysctl on Windows")
683
+
684
+	ctx := setupTest(t)
685
+	d := daemon.New(t)
686
+	d.StartWithBusybox(ctx, t)
687
+	defer d.Stop(t)
688
+
689
+	c := d.NewClientT(t)
690
+	defer c.Close()
691
+
692
+	const scName = "net.ipv4.conf.eth0.forwarding"
693
+	opts := []func(config *container.TestContainerConfig){
694
+		container.WithCmd("sysctl", scName),
695
+		container.WithSysctls(map[string]string{scName: "1"}),
696
+	}
697
+
698
+	runRes := container.RunAttach(ctx, t, c, opts...)
699
+	defer c.ContainerRemove(ctx, runRes.ContainerID,
700
+		containertypes.RemoveOptions{Force: true},
701
+	)
702
+
703
+	stdout := runRes.Stdout.String()
704
+	assert.Check(t, is.Contains(stdout, scName))
705
+}
... ...
@@ -545,26 +545,38 @@ func (n *Namespace) Restore(interfaces map[Iface][]IfaceOption, routes []*types.
545 545
 	return nil
546 546
 }
547 547
 
548
-// IPv6LoEnabled checks whether the loopback interface has an IPv6 address ('::1'
549
-// is assigned by the kernel if IPv6 is enabled).
548
+// IPv6LoEnabled returns true if the loopback interface had an IPv6 address when
549
+// last checked. It's always checked on the first call, and by RefreshIPv6LoEnabled.
550
+// ('::1' is assigned by the kernel if IPv6 is enabled.)
550 551
 func (n *Namespace) IPv6LoEnabled() bool {
551 552
 	n.ipv6LoEnabledOnce.Do(func() {
552
-		// If anything goes wrong, assume no-IPv6.
553
-		iface, err := n.nlHandle.LinkByName("lo")
554
-		if err != nil {
555
-			log.G(context.TODO()).WithError(err).Warn("Unable to find 'lo' to determine IPv6 support")
556
-			return
557
-		}
558
-		addrs, err := n.nlHandle.AddrList(iface, nl.FAMILY_V6)
559
-		if err != nil {
560
-			log.G(context.TODO()).WithError(err).Warn("Unable to get 'lo' addresses to determine IPv6 support")
561
-			return
562
-		}
563
-		n.ipv6LoEnabledCached = len(addrs) > 0
553
+		n.RefreshIPv6LoEnabled()
564 554
 	})
555
+	n.mu.Lock()
556
+	defer n.mu.Unlock()
565 557
 	return n.ipv6LoEnabledCached
566 558
 }
567 559
 
560
+// RefreshIPv6LoEnabled refreshes the cached result returned by IPv6LoEnabled.
561
+func (n *Namespace) RefreshIPv6LoEnabled() {
562
+	n.mu.Lock()
563
+	defer n.mu.Unlock()
564
+
565
+	// If anything goes wrong, assume no-IPv6.
566
+	n.ipv6LoEnabledCached = false
567
+	iface, err := n.nlHandle.LinkByName("lo")
568
+	if err != nil {
569
+		log.G(context.TODO()).WithError(err).Warn("Unable to find 'lo' to determine IPv6 support")
570
+		return
571
+	}
572
+	addrs, err := n.nlHandle.AddrList(iface, nl.FAMILY_V6)
573
+	if err != nil {
574
+		log.G(context.TODO()).WithError(err).Warn("Unable to get 'lo' addresses to determine IPv6 support")
575
+		return
576
+	}
577
+	n.ipv6LoEnabledCached = len(addrs) > 0
578
+}
579
+
568 580
 // ApplyOSTweaks applies operating system specific knobs on the sandbox.
569 581
 func (n *Namespace) ApplyOSTweaks(types []SandboxType) {
570 582
 	for _, t := range types {
... ...
@@ -90,12 +90,8 @@ func (sb *Sandbox) updateGateway(ep *Endpoint) error {
90 90
 		return fmt.Errorf("failed to set gateway while updating gateway: %v", err)
91 91
 	}
92 92
 
93
-	// If IPv6 has been disabled in the sandbox a gateway may still have been
94
-	// configured, don't attempt to apply it.
95
-	if ipv6, ok := sb.ipv6Enabled(); !ok || ipv6 {
96
-		if err := osSbox.SetGatewayIPv6(joinInfo.gw6); err != nil {
97
-			return fmt.Errorf("failed to set IPv6 gateway while updating gateway: %v", err)
98
-		}
93
+	if err := osSbox.SetGatewayIPv6(joinInfo.gw6); err != nil {
94
+		return fmt.Errorf("failed to set IPv6 gateway while updating gateway: %v", err)
99 95
 	}
100 96
 
101 97
 	return nil
... ...
@@ -162,6 +158,10 @@ func (sb *Sandbox) SetKey(basePath string) error {
162 162
 		}
163 163
 	}
164 164
 
165
+	// Set up hosts and resolv.conf files. IPv6 support in the container can't be
166
+	// determined yet, as sysctls haven't been applied by the runtime. Calling
167
+	// FinishInit after the container task has been created, when sysctls have been
168
+	// applied will regenerate these files.
165 169
 	if err := sb.finishInitDNS(); err != nil {
166 170
 		return err
167 171
 	}
... ...
@@ -175,6 +175,27 @@ func (sb *Sandbox) SetKey(basePath string) error {
175 175
 	return nil
176 176
 }
177 177
 
178
+// FinishConfig completes Sandbox configuration. If called after the container task has been
179
+// created, and sysctl settings applied, the configuration will be based on the container's
180
+// IPv6 support.
181
+func (sb *Sandbox) FinishConfig() error {
182
+	if sb.config.useDefaultSandBox {
183
+		return nil
184
+	}
185
+
186
+	sb.mu.Lock()
187
+	osSbox := sb.osSbox
188
+	sb.mu.Unlock()
189
+	if osSbox == nil {
190
+		return nil
191
+	}
192
+
193
+	// If sysctl changes have been made, IPv6 may have been enabled/disabled since last checked.
194
+	osSbox.RefreshIPv6LoEnabled()
195
+
196
+	return sb.finishInitDNS()
197
+}
198
+
178 199
 // IPv6 support can always be determined for host networking. For other network
179 200
 // types it can only be determined once there's a container namespace to probe,
180 201
 // return ok=false in that case.
... ...
@@ -283,12 +304,7 @@ func (sb *Sandbox) populateNetworkResources(ep *Endpoint) error {
283 283
 
284 284
 		ifaceOptions = append(ifaceOptions, osl.WithIPv4Address(i.addr), osl.WithRoutes(i.routes))
285 285
 		if i.addrv6 != nil && i.addrv6.IP.To16() != nil {
286
-			// If IPv6 has been disabled in the Sandbox, an IPv6 address will still have
287
-			// been allocated. Don't apply it, because doing so would enable IPv6 on the
288
-			// interface.
289
-			if ipv6, ok := sb.ipv6Enabled(); !ok || ipv6 {
290
-				ifaceOptions = append(ifaceOptions, osl.WithIPv6Address(i.addrv6))
291
-			}
286
+			ifaceOptions = append(ifaceOptions, osl.WithIPv6Address(i.addrv6))
292 287
 		}
293 288
 		if len(i.llAddrs) != 0 {
294 289
 			ifaceOptions = append(ifaceOptions, osl.WithLinkLocalAddresses(i.llAddrs))