Browse code

Detect IPv6 support in containers.

Some configuration in a container depends on whether it has support for
IPv6 (including default entries for '::1' etc in '/etc/hosts').

Before this change, the container's support for IPv6 was determined by
whether it was connected to any IPv6-enabled networks. But, that can
change over time, it isn't a property of the container itself.

So, instead, detect IPv6 support by looking for '::1' on the container's
loopback interface. It will not be present if the kernel does not have
IPv6 support, or the user has disabled it in new namespaces by other
means.

Once IPv6 support has been determined for the container, its '/etc/hosts'
is re-generated accordingly.

The daemon no longer disables IPv6 on all interfaces during initialisation.
It now disables IPv6 only for interfaces that have not been assigned an
IPv6 address. (But, even if IPv6 is disabled for the container using the
sysctl 'net.ipv6.conf.all.disable_ipv6=1', interfaces connected to IPv6
networks still get IPv6 addresses that appear in the internal DNS. There's
more to-do!)

Signed-off-by: Rob Murray <rob.murray@docker.com>

Rob Murray authored on 2024/01/12 01:44:58
Showing 11 changed files
... ...
@@ -1,6 +1,7 @@
1 1
 package container
2 2
 
3 3
 import (
4
+	"maps"
4 5
 	"strings"
5 6
 
6 7
 	"github.com/docker/docker/api/types/container"
... ...
@@ -46,6 +47,13 @@ func WithNetworkMode(mode string) func(*TestContainerConfig) {
46 46
 	}
47 47
 }
48 48
 
49
+// WithSysctls sets sysctl options for the container
50
+func WithSysctls(sysctls map[string]string) func(*TestContainerConfig) {
51
+	return func(c *TestContainerConfig) {
52
+		c.HostConfig.Sysctls = maps.Clone(sysctls)
53
+	}
54
+}
55
+
49 56
 // WithExposedPorts sets the exposed ports of the container
50 57
 func WithExposedPorts(ports ...string) func(*TestContainerConfig) {
51 58
 	return func(c *TestContainerConfig) {
52 59
new file mode 100644
... ...
@@ -0,0 +1,107 @@
0
+package networking
1
+
2
+import (
3
+	"context"
4
+	"testing"
5
+	"time"
6
+
7
+	containertypes "github.com/docker/docker/api/types/container"
8
+	"github.com/docker/docker/integration/internal/container"
9
+	"github.com/docker/docker/testutil"
10
+	"github.com/docker/docker/testutil/daemon"
11
+	"gotest.tools/v3/assert"
12
+	is "gotest.tools/v3/assert/cmp"
13
+	"gotest.tools/v3/skip"
14
+)
15
+
16
+// Check that the '/etc/hosts' file in a container is created according to
17
+// whether the container supports IPv6.
18
+// Regression test for https://github.com/moby/moby/issues/35954
19
+func TestEtcHostsIpv6(t *testing.T) {
20
+	skip.If(t, testEnv.DaemonInfo.OSType == "windows")
21
+
22
+	ctx := setupTest(t)
23
+	d := daemon.New(t)
24
+	d.StartWithBusybox(ctx, t,
25
+		"--ipv6",
26
+		"--ip6tables",
27
+		"--experimental",
28
+		"--fixed-cidr-v6=fdc8:ffe2:d8d7:1234::/64")
29
+	defer d.Stop(t)
30
+
31
+	c := d.NewClientT(t)
32
+	defer c.Close()
33
+
34
+	testcases := []struct {
35
+		name           string
36
+		sysctls        map[string]string
37
+		expIPv6Enabled bool
38
+		expEtcHosts    string
39
+	}{
40
+		{
41
+			// Create a container with no overrides, on the IPv6-enabled default bridge.
42
+			// Expect the container to have a working '::1' address, on the assumption
43
+			// the test host's kernel supports IPv6 - and for its '/etc/hosts' file to
44
+			// include IPv6 addresses.
45
+			name:           "IPv6 enabled",
46
+			expIPv6Enabled: true,
47
+			expEtcHosts: `127.0.0.1	localhost
48
+::1	localhost ip6-localhost ip6-loopback
49
+fe00::0	ip6-localnet
50
+ff00::0	ip6-mcastprefix
51
+ff02::1	ip6-allnodes
52
+ff02::2	ip6-allrouters
53
+`,
54
+		},
55
+		{
56
+			// Create a container in the same network, with IPv6 disabled. Expect '::1'
57
+			// not to be pingable, and no IPv6 addresses in its '/etc/hosts'.
58
+			name:           "IPv6 disabled",
59
+			sysctls:        map[string]string{"net.ipv6.conf.all.disable_ipv6": "1"},
60
+			expIPv6Enabled: false,
61
+			expEtcHosts:    "127.0.0.1\tlocalhost\n",
62
+		},
63
+	}
64
+
65
+	for _, tc := range testcases {
66
+		t.Run(tc.name, func(t *testing.T) {
67
+			ctx := testutil.StartSpan(ctx, t)
68
+			ctrId := container.Run(ctx, t, c,
69
+				container.WithName("etchosts_"+sanitizeCtrName(t.Name())),
70
+				container.WithImage("busybox:latest"),
71
+				container.WithCmd("top"),
72
+				container.WithSysctls(tc.sysctls),
73
+			)
74
+			defer func() {
75
+				c.ContainerRemove(ctx, ctrId, containertypes.RemoveOptions{Force: true})
76
+			}()
77
+
78
+			runCmd := func(ctrId string, cmd []string, expExitCode int) string {
79
+				t.Helper()
80
+				execCtx, cancel := context.WithTimeout(ctx, 5*time.Second)
81
+				defer cancel()
82
+				res, err := container.Exec(execCtx, c, ctrId, cmd)
83
+				assert.Check(t, is.Nil(err))
84
+				assert.Check(t, is.Equal(res.ExitCode, expExitCode))
85
+				return res.Stdout()
86
+			}
87
+
88
+			// Check that IPv6 is/isn't enabled, as expected.
89
+			var expPingExitStatus int
90
+			if !tc.expIPv6Enabled {
91
+				expPingExitStatus = 1
92
+			}
93
+			runCmd(ctrId, []string{"ping", "-6", "-c1", "-W3", "::1"}, expPingExitStatus)
94
+
95
+			// Check the contents of /etc/hosts.
96
+			stdout := runCmd(ctrId, []string{"cat", "/etc/hosts"}, 0)
97
+			// Append the container's own addresses/name to the expected hosts file content.
98
+			inspect := container.Inspect(ctx, t, c, ctrId)
99
+			exp := tc.expEtcHosts + inspect.NetworkSettings.IPAddress + "\t" + inspect.Config.Hostname + "\n"
100
+			if tc.expIPv6Enabled {
101
+				exp += inspect.NetworkSettings.GlobalIPv6Address + "\t" + inspect.Config.Hostname + "\n"
102
+			}
103
+			assert.Check(t, is.Equal(stdout, exp))
104
+		})
105
+	}
106
+}
... ...
@@ -6,9 +6,9 @@ import (
6 6
 	"errors"
7 7
 	"fmt"
8 8
 	"net"
9
-	"sync"
10 9
 
11 10
 	"github.com/containerd/log"
11
+	"github.com/docker/docker/libnetwork/netutils"
12 12
 	"github.com/docker/docker/libnetwork/types"
13 13
 	"github.com/ishidawataru/sctp"
14 14
 )
... ...
@@ -55,7 +55,7 @@ func (n *bridgeNetwork) allocatePortsInternal(bindings []types.PortBinding, cont
55 55
 		// skip adding implicit v6 addr, when the kernel was booted with `ipv6.disable=1`
56 56
 		// https://github.com/moby/moby/issues/42288
57 57
 		isV6Binding := c.HostIP != nil && c.HostIP.To4() == nil
58
-		if !isV6Binding && !IsV6Listenable() {
58
+		if !isV6Binding && !netutils.IsV6Listenable() {
59 59
 			continue
60 60
 		}
61 61
 
... ...
@@ -219,26 +219,3 @@ func (n *bridgeNetwork) releasePort(bnd types.PortBinding) error {
219 219
 
220 220
 	return portmapper.Unmap(host)
221 221
 }
222
-
223
-var (
224
-	v6ListenableCached bool
225
-	v6ListenableOnce   sync.Once
226
-)
227
-
228
-// IsV6Listenable returns true when `[::1]:0` is listenable.
229
-// IsV6Listenable returns false mostly when the kernel was booted with `ipv6.disable=1` option.
230
-func IsV6Listenable() bool {
231
-	v6ListenableOnce.Do(func() {
232
-		ln, err := net.Listen("tcp6", "[::1]:0")
233
-		if err != nil {
234
-			// When the kernel was booted with `ipv6.disable=1`,
235
-			// we get err "listen tcp6 [::1]:0: socket: address family not supported by protocol"
236
-			// https://github.com/moby/moby/issues/42288
237
-			log.G(context.TODO()).Debugf("port_mapping: v6Listenable=false (%v)", err)
238
-		} else {
239
-			v6ListenableCached = true
240
-			ln.Close()
241
-		}
242
-	})
243
-	return v6ListenableCached
244
-}
... ...
@@ -478,18 +478,8 @@ func (ep *Endpoint) sbJoin(sb *Sandbox, options ...EndpointOption) (err error) {
478 478
 		}
479 479
 	}
480 480
 
481
-	// Do not update hosts file with internal networks endpoint IP
482
-	if !n.ingress && n.Name() != libnGWNetwork {
483
-		var addresses []string
484
-		if ip := ep.getFirstInterfaceIPv4Address(); ip != nil {
485
-			addresses = append(addresses, ip.String())
486
-		}
487
-		if ip := ep.getFirstInterfaceIPv6Address(); ip != nil {
488
-			addresses = append(addresses, ip.String())
489
-		}
490
-		if err = sb.updateHostsFile(addresses); err != nil {
491
-			return err
492
-		}
481
+	if err := sb.updateHostsFile(ep.getEtcHostsAddrs()); err != nil {
482
+		return err
493 483
 	}
494 484
 	if err = sb.updateDNS(n.enableIPv6); err != nil {
495 485
 		return err
... ...
@@ -860,26 +850,24 @@ func (ep *Endpoint) getSandbox() (*Sandbox, bool) {
860 860
 	return ps, ok
861 861
 }
862 862
 
863
-func (ep *Endpoint) getFirstInterfaceIPv4Address() net.IP {
863
+// Return a list of this endpoint's addresses to add to '/etc/hosts'.
864
+func (ep *Endpoint) getEtcHostsAddrs() []string {
864 865
 	ep.mu.Lock()
865 866
 	defer ep.mu.Unlock()
866 867
 
867
-	if ep.iface.addr != nil {
868
-		return ep.iface.addr.IP
868
+	// Do not update hosts file with internal network's endpoint IP
869
+	if n := ep.network; n == nil || n.ingress || n.Name() == libnGWNetwork {
870
+		return nil
869 871
 	}
870 872
 
871
-	return nil
872
-}
873
-
874
-func (ep *Endpoint) getFirstInterfaceIPv6Address() net.IP {
875
-	ep.mu.Lock()
876
-	defer ep.mu.Unlock()
877
-
873
+	var addresses []string
874
+	if ep.iface.addr != nil {
875
+		addresses = append(addresses, ep.iface.addr.IP.String())
876
+	}
878 877
 	if ep.iface.addrv6 != nil {
879
-		return ep.iface.addrv6.IP
878
+		addresses = append(addresses, ep.iface.addrv6.IP.String())
880 879
 	}
881
-
882
-	return nil
880
+	return addresses
883 881
 }
884 882
 
885 883
 // EndpointOptionGeneric function returns an option setter for a Generic option defined
... ...
@@ -5,6 +5,7 @@ import (
5 5
 	"bytes"
6 6
 	"fmt"
7 7
 	"io"
8
+	"net/netip"
8 9
 	"os"
9 10
 	"regexp"
10 11
 	"strings"
... ...
@@ -25,8 +26,10 @@ func (r Record) WriteTo(w io.Writer) (int64, error) {
25 25
 
26 26
 var (
27 27
 	// Default hosts config records slice
28
-	defaultContent = []Record{
28
+	defaultContentIPv4 = []Record{
29 29
 		{Hosts: "localhost", IP: "127.0.0.1"},
30
+	}
31
+	defaultContentIPv6 = []Record{
30 32
 		{Hosts: "localhost ip6-localhost ip6-loopback", IP: "::1"},
31 33
 		{Hosts: "ip6-localnet", IP: "fe00::0"},
32 34
 		{Hosts: "ip6-mcastprefix", IP: "ff00::0"},
... ...
@@ -71,9 +74,34 @@ func Drop(path string) {
71 71
 // IP, hostname, and domainname set main record leave empty for no master record
72 72
 // extraContent is an array of extra host records.
73 73
 func Build(path, IP, hostname, domainname string, extraContent []Record) error {
74
+	return build(path, IP, hostname, domainname, defaultContentIPv4, defaultContentIPv6, extraContent)
75
+}
76
+
77
+// BuildNoIPv6 is the same as Build, but will not include IPv6 entries.
78
+func BuildNoIPv6(path, IP, hostname, domainname string, extraContent []Record) error {
79
+	if isIPv6(IP) {
80
+		IP = ""
81
+	}
82
+
83
+	var ipv4ExtraContent []Record
84
+	for _, rec := range extraContent {
85
+		if !isIPv6(rec.IP) {
86
+			ipv4ExtraContent = append(ipv4ExtraContent, rec)
87
+		}
88
+	}
89
+
90
+	return build(path, IP, hostname, domainname, defaultContentIPv4, ipv4ExtraContent)
91
+}
92
+
93
+func isIPv6(s string) bool {
94
+	addr, err := netip.ParseAddr(s)
95
+	return err == nil && addr.Is6()
96
+}
97
+
98
+func build(path, IP, hostname, domainname string, contents ...[]Record) error {
74 99
 	defer pathLock(path)()
75 100
 
76
-	content := bytes.NewBuffer(nil)
101
+	buf := bytes.NewBuffer(nil)
77 102
 	if IP != "" {
78 103
 		// set main record
79 104
 		var mainRec Record
... ...
@@ -89,24 +117,21 @@ func Build(path, IP, hostname, domainname string, extraContent []Record) error {
89 89
 		if hostName, _, ok := strings.Cut(fqdn, "."); ok {
90 90
 			mainRec.Hosts += " " + hostName
91 91
 		}
92
-		if _, err := mainRec.WriteTo(content); err != nil {
93
-			return err
94
-		}
95
-	}
96
-	// Write defaultContent slice to buffer
97
-	for _, r := range defaultContent {
98
-		if _, err := r.WriteTo(content); err != nil {
92
+		if _, err := mainRec.WriteTo(buf); err != nil {
99 93
 			return err
100 94
 		}
101 95
 	}
102
-	// Write extra content from function arguments
103
-	for _, r := range extraContent {
104
-		if _, err := r.WriteTo(content); err != nil {
105
-			return err
96
+
97
+	// Write content from function arguments
98
+	for _, content := range contents {
99
+		for _, c := range content {
100
+			if _, err := c.WriteTo(buf); err != nil {
101
+				return err
102
+			}
106 103
 		}
107 104
 	}
108 105
 
109
-	return os.WriteFile(path, content.Bytes(), 0o644)
106
+	return os.WriteFile(path, buf.Bytes(), 0o644)
110 107
 }
111 108
 
112 109
 // Add adds an arbitrary number of Records to an already existing /etc/hosts file
... ...
@@ -4,9 +4,12 @@ import (
4 4
 	"bytes"
5 5
 	"fmt"
6 6
 	"os"
7
+	"path/filepath"
7 8
 	"testing"
8 9
 
9 10
 	"golang.org/x/sync/errgroup"
11
+	"gotest.tools/v3/assert"
12
+	is "gotest.tools/v3/assert/cmp"
10 13
 )
11 14
 
12 15
 func TestBuildDefault(t *testing.T) {
... ...
@@ -35,6 +38,26 @@ func TestBuildDefault(t *testing.T) {
35 35
 	}
36 36
 }
37 37
 
38
+func TestBuildNoIPv6(t *testing.T) {
39
+	d := t.TempDir()
40
+	filename := filepath.Join(d, "hosts")
41
+
42
+	err := BuildNoIPv6(filename, "fdbb:c59c:d015::2", "an.example", "", []Record{
43
+		{
44
+			Hosts: "another.example",
45
+			IP:    "fdbb:c59c:d015::3",
46
+		},
47
+		{
48
+			Hosts: "another.example",
49
+			IP:    "10.11.12.13",
50
+		},
51
+	})
52
+	assert.NilError(t, err)
53
+	content, err := os.ReadFile(filename)
54
+	assert.NilError(t, err)
55
+	assert.Check(t, is.DeepEqual(string(content), "127.0.0.1\tlocalhost\n10.11.12.13\tanother.example\n"))
56
+}
57
+
38 58
 func TestBuildHostnameDomainname(t *testing.T) {
39 59
 	file, err := os.CreateTemp("", "")
40 60
 	if err != nil {
... ...
@@ -3,6 +3,7 @@
3 3
 package netutils
4 4
 
5 5
 import (
6
+	"context"
6 7
 	"crypto/rand"
7 8
 	"encoding/hex"
8 9
 	"errors"
... ...
@@ -10,7 +11,9 @@ import (
10 10
 	"io"
11 11
 	"net"
12 12
 	"strings"
13
+	"sync"
13 14
 
15
+	"github.com/containerd/log"
14 16
 	"github.com/docker/docker/libnetwork/types"
15 17
 )
16 18
 
... ...
@@ -144,3 +147,26 @@ func ReverseIP(IP string) string {
144 144
 
145 145
 	return strings.Join(reverseIP, ".")
146 146
 }
147
+
148
+var (
149
+	v6ListenableCached bool
150
+	v6ListenableOnce   sync.Once
151
+)
152
+
153
+// IsV6Listenable returns true when `[::1]:0` is listenable.
154
+// IsV6Listenable returns false mostly when the kernel was booted with `ipv6.disable=1` option.
155
+func IsV6Listenable() bool {
156
+	v6ListenableOnce.Do(func() {
157
+		ln, err := net.Listen("tcp6", "[::1]:0")
158
+		if err != nil {
159
+			// When the kernel was booted with `ipv6.disable=1`,
160
+			// we get err "listen tcp6 [::1]:0: socket: address family not supported by protocol"
161
+			// https://github.com/moby/moby/issues/42288
162
+			log.G(context.TODO()).Debugf("v6Listenable=false (%v)", err)
163
+		} else {
164
+			v6ListenableCached = true
165
+			ln.Close()
166
+		}
167
+	})
168
+	return v6ListenableCached
169
+}
... ...
@@ -257,8 +257,6 @@ func (n *Namespace) AddInterface(srcName, dstPrefix string, options ...IfaceOpti
257 257
 	n.iFaces = append(n.iFaces, i)
258 258
 	n.mu.Unlock()
259 259
 
260
-	n.checkLoV6()
261
-
262 260
 	return nil
263 261
 }
264 262
 
... ...
@@ -311,8 +309,6 @@ func (n *Namespace) RemoveInterface(i *Interface) error {
311 311
 	}
312 312
 	n.mu.Unlock()
313 313
 
314
-	// TODO(aker): This function will disable IPv6 on lo interface if the removed interface was the last one offering IPv6 connectivity. That's a weird behavior, and shouldn't be hiding this deep down in this function.
315
-	n.checkLoV6()
316 314
 	return nil
317 315
 }
318 316
 
... ...
@@ -20,6 +20,7 @@ import (
20 20
 	"github.com/docker/docker/libnetwork/osl/kernel"
21 21
 	"github.com/docker/docker/libnetwork/types"
22 22
 	"github.com/vishvananda/netlink"
23
+	"github.com/vishvananda/netlink/nl"
23 24
 	"github.com/vishvananda/netns"
24 25
 	"golang.org/x/sys/unix"
25 26
 )
... ...
@@ -206,16 +207,6 @@ func NewSandbox(key string, osCreate, isRestore bool) (*Namespace, error) {
206 206
 	if err != nil {
207 207
 		log.G(context.TODO()).Warnf("Failed to set the timeout on the sandbox netlink handle sockets: %v", err)
208 208
 	}
209
-	// In live-restore mode, IPV6 entries are getting cleaned up due to below code
210
-	// We should retain IPV6 configurations in live-restore mode when Docker Daemon
211
-	// comes back. It should work as it is on other cases
212
-	// As starting point, disable IPv6 on all interfaces
213
-	if !isRestore && !n.isDefault {
214
-		err = setIPv6(n.path, "all", false)
215
-		if err != nil {
216
-			log.G(context.TODO()).Warnf("Failed to disable IPv6 on all interfaces on network namespace %q: %v", n.path, err)
217
-		}
218
-	}
219 209
 
220 210
 	if err = n.loopbackUp(); err != nil {
221 211
 		n.nlHandle.Close()
... ...
@@ -260,12 +251,6 @@ func GetSandboxForExternalKey(basePath string, key string) (*Namespace, error) {
260 260
 		log.G(context.TODO()).Warnf("Failed to set the timeout on the sandbox netlink handle sockets: %v", err)
261 261
 	}
262 262
 
263
-	// As starting point, disable IPv6 on all interfaces
264
-	err = setIPv6(n.path, "all", false)
265
-	if err != nil {
266
-		log.G(context.TODO()).Warnf("Failed to disable IPv6 on all interfaces on network namespace %q: %v", n.path, err)
267
-	}
268
-
269 263
 	if err = n.loopbackUp(); err != nil {
270 264
 		n.nlHandle.Close()
271 265
 		return nil, err
... ...
@@ -325,17 +310,18 @@ func createNamespaceFile(path string) error {
325 325
 // or sets the gateway etc. It holds a list of Interfaces, routes etc., and more
326 326
 // can be added dynamically.
327 327
 type Namespace struct {
328
-	path         string
329
-	iFaces       []*Interface
330
-	gw           net.IP
331
-	gwv6         net.IP
332
-	staticRoutes []*types.StaticRoute
333
-	neighbors    []*neigh
334
-	nextIfIndex  map[string]int
335
-	isDefault    bool
336
-	nlHandle     *netlink.Handle
337
-	loV6Enabled  bool
338
-	mu           sync.Mutex
328
+	path                string
329
+	iFaces              []*Interface
330
+	gw                  net.IP
331
+	gwv6                net.IP
332
+	staticRoutes        []*types.StaticRoute
333
+	neighbors           []*neigh
334
+	nextIfIndex         map[string]int
335
+	isDefault           bool
336
+	ipv6LoEnabledOnce   sync.Once
337
+	ipv6LoEnabledCached bool
338
+	nlHandle            *netlink.Handle
339
+	mu                  sync.Mutex
339 340
 }
340 341
 
341 342
 // Interfaces returns the collection of Interface previously added with the AddInterface
... ...
@@ -559,32 +545,24 @@ func (n *Namespace) Restore(interfaces map[Iface][]IfaceOption, routes []*types.
559 559
 	return nil
560 560
 }
561 561
 
562
-// Checks whether IPv6 needs to be enabled/disabled on the loopback interface
563
-func (n *Namespace) checkLoV6() {
564
-	var (
565
-		enable = false
566
-		action = "disable"
567
-	)
568
-
569
-	n.mu.Lock()
570
-	for _, iface := range n.iFaces {
571
-		if iface.AddressIPv6() != nil {
572
-			enable = true
573
-			action = "enable"
574
-			break
562
+// IPv6LoEnabled checks whether the loopback interface has an IPv6 address ('::1'
563
+// is assigned by the kernel if IPv6 is enabled).
564
+func (n *Namespace) IPv6LoEnabled() bool {
565
+	n.ipv6LoEnabledOnce.Do(func() {
566
+		// If anything goes wrong, assume no-IPv6.
567
+		iface, err := n.nlHandle.LinkByName("lo")
568
+		if err != nil {
569
+			log.G(context.TODO()).WithError(err).Warn("Unable to find 'lo' to determine IPv6 support")
570
+			return
575 571
 		}
576
-	}
577
-	n.mu.Unlock()
578
-
579
-	if n.loV6Enabled == enable {
580
-		return
581
-	}
582
-
583
-	if err := setIPv6(n.path, "lo", enable); err != nil {
584
-		log.G(context.TODO()).Warnf("Failed to %s IPv6 on loopback interface on network namespace %q: %v", action, n.path, err)
585
-	}
586
-
587
-	n.loV6Enabled = enable
572
+		addrs, err := n.nlHandle.AddrList(iface, nl.FAMILY_V6)
573
+		if err != nil {
574
+			log.G(context.TODO()).WithError(err).Warn("Unable to get 'lo' addresses to determine IPv6 support")
575
+			return
576
+		}
577
+		n.ipv6LoEnabledCached = len(addrs) > 0
578
+	})
579
+	return n.ipv6LoEnabledCached
588 580
 }
589 581
 
590 582
 // ApplyOSTweaks applies operating system specific knobs on the sandbox.
... ...
@@ -7,6 +7,7 @@ import (
7 7
 	"context"
8 8
 	"fmt"
9 9
 	"net"
10
+	"net/netip"
10 11
 	"os"
11 12
 	"path"
12 13
 	"path/filepath"
... ...
@@ -14,6 +15,7 @@ import (
14 14
 	"strings"
15 15
 
16 16
 	"github.com/containerd/log"
17
+	"github.com/docker/docker/errdefs"
17 18
 	"github.com/docker/docker/libnetwork/etchosts"
18 19
 	"github.com/docker/docker/libnetwork/resolvconf"
19 20
 	"github.com/docker/docker/libnetwork/types"
... ...
@@ -27,6 +29,21 @@ const (
27 27
 	resolverIPSandbox = "127.0.0.11"
28 28
 )
29 29
 
30
+// finishInitDNS is to be called after the container namespace has been created,
31
+// before it the user process is started. The container's support for IPv6 can be
32
+// determined at this point.
33
+func (sb *Sandbox) finishInitDNS() error {
34
+	if err := sb.buildHostsFile(); err != nil {
35
+		return errdefs.System(err)
36
+	}
37
+	for _, ep := range sb.Endpoints() {
38
+		if err := sb.updateHostsFile(ep.getEtcHostsAddrs()); err != nil {
39
+			return errdefs.System(err)
40
+		}
41
+	}
42
+	return nil
43
+}
44
+
30 45
 func (sb *Sandbox) startResolver(restore bool) {
31 46
 	sb.resolverOnce.Do(func() {
32 47
 		var err error
... ...
@@ -65,11 +82,17 @@ func (sb *Sandbox) startResolver(restore bool) {
65 65
 }
66 66
 
67 67
 func (sb *Sandbox) setupResolutionFiles() error {
68
-	if err := sb.buildHostsFile(); err != nil {
68
+	// Create a hosts file that can be mounted during container setup. For most
69
+	// networking modes (not host networking) it will be re-created before the
70
+	// container start, once its support for IPv6 is known.
71
+	if sb.config.hostsPath == "" {
72
+		sb.config.hostsPath = defaultPrefix + "/" + sb.id + "/hosts"
73
+	}
74
+	dir, _ := filepath.Split(sb.config.hostsPath)
75
+	if err := createBasePath(dir); err != nil {
69 76
 		return err
70 77
 	}
71
-
72
-	if err := sb.updateParentHosts(); err != nil {
78
+	if err := sb.buildHostsFile(); err != nil {
73 79
 		return err
74 80
 	}
75 81
 
... ...
@@ -77,15 +100,6 @@ func (sb *Sandbox) setupResolutionFiles() error {
77 77
 }
78 78
 
79 79
 func (sb *Sandbox) buildHostsFile() error {
80
-	if sb.config.hostsPath == "" {
81
-		sb.config.hostsPath = defaultPrefix + "/" + sb.id + "/hosts"
82
-	}
83
-
84
-	dir, _ := filepath.Split(sb.config.hostsPath)
85
-	if err := createBasePath(dir); err != nil {
86
-		return err
87
-	}
88
-
89 80
 	// This is for the host mode networking
90 81
 	if sb.config.useDefaultSandBox && len(sb.config.extraHosts) == 0 {
91 82
 		// We are working under the assumption that the origin file option had been properly expressed by the upper layer
... ...
@@ -101,7 +115,16 @@ func (sb *Sandbox) buildHostsFile() error {
101 101
 		extraContent = append(extraContent, etchosts.Record{Hosts: extraHost.name, IP: extraHost.IP})
102 102
 	}
103 103
 
104
-	return etchosts.Build(sb.config.hostsPath, "", sb.config.hostName, sb.config.domainName, extraContent)
104
+	// Assume IPv6 support, unless it's definitely disabled.
105
+	buildf := etchosts.Build
106
+	if en, ok := sb.ipv6Enabled(); ok && !en {
107
+		buildf = etchosts.BuildNoIPv6
108
+	}
109
+	if err := buildf(sb.config.hostsPath, "", sb.config.hostName, sb.config.domainName, extraContent); err != nil {
110
+		return err
111
+	}
112
+
113
+	return sb.updateParentHosts()
105 114
 }
106 115
 
107 116
 func (sb *Sandbox) updateHostsFile(ifaceIPs []string) error {
... ...
@@ -135,6 +158,16 @@ func (sb *Sandbox) updateHostsFile(ifaceIPs []string) error {
135 135
 }
136 136
 
137 137
 func (sb *Sandbox) addHostsEntries(recs []etchosts.Record) {
138
+	// Assume IPv6 support, unless it's definitely disabled.
139
+	if en, ok := sb.ipv6Enabled(); ok && !en {
140
+		var filtered []etchosts.Record
141
+		for _, rec := range recs {
142
+			if addr, err := netip.ParseAddr(rec.IP); err == nil && !addr.Is6() {
143
+				filtered = append(filtered, rec)
144
+			}
145
+		}
146
+		recs = filtered
147
+	}
138 148
 	if err := etchosts.Add(sb.config.hostsPath, recs); err != nil {
139 149
 		log.G(context.TODO()).Warnf("Failed adding service host entries to the running container: %v", err)
140 150
 	}
... ...
@@ -157,6 +190,16 @@ func (sb *Sandbox) updateParentHosts() error {
157 157
 		if pSb == nil {
158 158
 			continue
159 159
 		}
160
+		// TODO(robmry) - filter out IPv6 addresses here if !sb.ipv6Enabled() but...
161
+		// - this is part of the implementation of '--link', which will be removed along
162
+		//   with the rest of legacy networking.
163
+		// - IPv6 addresses shouldn't be allocated if IPv6 is not available in a container,
164
+		//   and that change will come along later.
165
+		// - I think this may be dead code, it's not possible to start a parent container with
166
+		//   '--link child' unless the child has already started ("Error response from daemon:
167
+		//   Cannot link to a non running container"). So, when the child starts and this method
168
+		//   is called with updates for parents, the parents aren't running and GetSandbox()
169
+		//   returns nil.)
160 170
 		if err := etchosts.Update(pSb.config.hostsPath, update.ip, update.name); err != nil {
161 171
 			return err
162 172
 		}
... ...
@@ -7,6 +7,7 @@ import (
7 7
 	"time"
8 8
 
9 9
 	"github.com/containerd/log"
10
+	"github.com/docker/docker/libnetwork/netutils"
10 11
 	"github.com/docker/docker/libnetwork/osl"
11 12
 	"github.com/docker/docker/libnetwork/types"
12 13
 )
... ...
@@ -157,14 +158,39 @@ func (sb *Sandbox) SetKey(basePath string) error {
157 157
 		}
158 158
 	}
159 159
 
160
+	if err := sb.finishInitDNS(); err != nil {
161
+		return err
162
+	}
163
+
160 164
 	for _, ep := range sb.Endpoints() {
161 165
 		if err = sb.populateNetworkResources(ep); err != nil {
162 166
 			return err
163 167
 		}
164 168
 	}
169
+
165 170
 	return nil
166 171
 }
167 172
 
173
+// IPv6 support can always be determined for host networking. For other network
174
+// types it can only be determined once there's a container namespace to probe,
175
+// return ok=false in that case.
176
+func (sb *Sandbox) ipv6Enabled() (enabled, ok bool) {
177
+	// For host networking, IPv6 support depends on the host.
178
+	if sb.config.useDefaultSandBox {
179
+		return netutils.IsV6Listenable(), true
180
+	}
181
+
182
+	// For other network types, look at whether the container's loopback interface has an IPv6 address.
183
+	sb.mu.Lock()
184
+	osSbox := sb.osSbox
185
+	sb.mu.Unlock()
186
+
187
+	if osSbox == nil {
188
+		return false, false
189
+	}
190
+	return osSbox.IPv6LoEnabled(), true
191
+}
192
+
168 193
 func (sb *Sandbox) releaseOSSbox() error {
169 194
 	sb.mu.Lock()
170 195
 	osSbox := sb.osSbox