Browse code

Release IPv6 address if unused due to sysctl setting

When running:
docker network create --ipv6 b46
docker run --rm -ti \
--network name=b46,driver-opt=com.docker.network.endpoint.sysctls=net.ipv6.conf.IFNAME.disable_ipv6=1 \
busybox

IPv6 is enabled in the container and the network, so an IPv6 address
will be allocated for the endpoint.

But, when the sysctl is applied, the IPv6 address will be removed
from the interface ... so, no unsolicited neighbour advertisement
should be (or can be) sent and, the endpoint should not be treated
as dual-stack when selecting a gateway endpoint and, if it is
selected as the gateway endpoint, setting up an IPv6 route via the
network will fail.

So, if the IPv6 address disappears after sysctls have been applied,
release the address and remove it from the endpoint's config.

Signed-off-by: Rob Murray <rob.murray@docker.com>

Rob Murray authored on 2024/11/28 03:45:58
Showing 12 changed files
... ...
@@ -739,11 +739,22 @@ func (daemon *Daemon) connectToNetwork(ctx context.Context, cfg *config.Config,
739 739
 			delete(ctr.NetworkSettings.Networks, nwName)
740 740
 		}
741 741
 	}()
742
-	if err := daemon.updateEndpointNetworkSettings(cfg, ctr, n, ep); err != nil {
743
-		return err
744
-	}
745 742
 
746 743
 	if nwName == network.DefaultNetwork {
744
+		// Legacy links must be prepared before the Endpoint.Join, because the network
745
+		// driver needs info about them - and, the daemon's network settings need to be
746
+		// filled-in for daemon.addLegacyLinks(). So, set up both here.
747
+		//
748
+		// However, this means if the Endpoint.Join drops the endpoint's IPv6 address
749
+		// (because there's a sysctl setting or some equivalent disabling IPv6 on the
750
+		// interface), host entries set up by addLegacyLinks() for IPv6 addresses of the
751
+		// linked container will be left behind in the container's /etc/hosts file. It
752
+		// won't be able to use those addresses, because it won't have IPv6 on that
753
+		// interface. So, even if the address is recycled by another container on the
754
+		// network, the old hosts entry can't access the wrong container.
755
+		if err := daemon.updateEndpointNetworkSettings(cfg, ctr, n, ep); err != nil {
756
+			return err
757
+		}
747 758
 		if err := daemon.addLegacyLinks(ctx, cfg, ctr, endpointConfig, sb); err != nil {
748 759
 			return err
749 760
 		}
... ...
@@ -754,10 +765,17 @@ func (daemon *Daemon) connectToNetwork(ctx context.Context, cfg *config.Config,
754 754
 		return err
755 755
 	}
756 756
 
757
+	// Connect the container to the network. Note that this will release the IPv6
758
+	// address assigned to the Endpoint, if IPv6 is disabled on the interface
759
+	// (probably by an endpoint specific sysctl setting).
757 760
 	if err := ep.Join(ctx, sb, joinOptions...); err != nil {
758 761
 		return err
759 762
 	}
760 763
 
764
+	if err := daemon.updateEndpointNetworkSettings(cfg, ctr, n, ep); err != nil {
765
+		return err
766
+	}
767
+
761 768
 	if !ctr.Managed {
762 769
 		// add container name/alias to DNS
763 770
 		if err := daemon.ActivateContainerServiceBinding(ctr.Name); err != nil {
... ...
@@ -107,6 +107,15 @@ type ExtConner interface {
107 107
 	ProgramExternalConnectivity(ctx context.Context, nid, eid string, gw4Id, gw6Id string) error
108 108
 }
109 109
 
110
+// IPv6Releaser is an optional interface for a network driver.
111
+type IPv6Releaser interface {
112
+	// ReleaseIPv6 tells the driver that an endpoint has no IPv6 address, even
113
+	// if the options passed to Driver.CreateEndpoint specified an address. This
114
+	// happens when, for example, sysctls applied after configuring the interface
115
+	// disable IPv6.
116
+	ReleaseIPv6(ctx context.Context, nid, eid string) error
117
+}
118
+
110 119
 // GwAllocChecker is an optional interface for a network driver.
111 120
 type GwAllocChecker interface {
112 121
 	// GetSkipGwAlloc returns true if the opts describe a network
... ...
@@ -127,7 +127,6 @@ type containerConfiguration struct {
127 127
 type connectivityConfiguration struct {
128 128
 	PortBindings []portmapperapi.PortBindingReq
129 129
 	ExposedPorts []types.TransportPort
130
-	NoProxy6To4  bool
131 130
 }
132 131
 
133 132
 type bridgeEndpoint struct {
... ...
@@ -167,6 +166,9 @@ type driver struct {
167 167
 	mu sync.Mutex
168 168
 }
169 169
 
170
+// Assert that the driver is a driverapi.IPv6Releaser.
171
+var _ driverapi.IPv6Releaser = (*driver)(nil)
172
+
170 173
 type gwMode string
171 174
 
172 175
 const (
... ...
@@ -1437,6 +1439,29 @@ func (d *driver) Join(ctx context.Context, nid, eid string, sboxKey string, jinf
1437 1437
 	return nil
1438 1438
 }
1439 1439
 
1440
+func (d *driver) ReleaseIPv6(ctx context.Context, nid, eid string) error {
1441
+	network, err := d.getNetwork(nid)
1442
+	if err != nil {
1443
+		return err
1444
+	}
1445
+
1446
+	endpoint, err := network.getEndpoint(eid)
1447
+	if err != nil {
1448
+		return err
1449
+	}
1450
+
1451
+	if endpoint == nil {
1452
+		return endpointNotFoundError(eid)
1453
+	}
1454
+
1455
+	_, netip6 := endpoint.netipAddrs()
1456
+	if err := network.firewallerNetwork.DelEndpoint(ctx, netip.Addr{}, netip6); err != nil {
1457
+		return fmt.Errorf("removing firewall rules while releasing IPv6 address: %v", err)
1458
+	}
1459
+	endpoint.addrv6 = nil
1460
+	return nil
1461
+}
1462
+
1440 1463
 // Leave method is invoked when a Sandbox detaches from an endpoint.
1441 1464
 func (d *driver) Leave(nid, eid string) error {
1442 1465
 	network, err := d.getNetwork(nid)
... ...
@@ -531,6 +531,8 @@ func (ep *Endpoint) sbJoin(ctx context.Context, sb *Sandbox, options ...Endpoint
531 531
 		return fmt.Errorf("failed to get driver during join: %v", err)
532 532
 	}
533 533
 
534
+	// Tell the driver about the new endpoint. The driver populates ep.joinInfo using
535
+	// the Endpoint's JoinInfo interface.
534 536
 	if err := d.Join(ctx, nid, epid, sb.Key(), ep, ep.generic, sb.Labels()); err != nil {
535 537
 		return err
536 538
 	}
... ...
@@ -938,7 +940,7 @@ func (ep *Endpoint) Delete(ctx context.Context, force bool) error {
938 938
 		return err
939 939
 	}
940 940
 
941
-	ep.releaseAddress()
941
+	ep.releaseIPAddresses()
942 942
 
943 943
 	return nil
944 944
 }
... ...
@@ -1228,7 +1230,7 @@ func (ep *Endpoint) assignAddressVersion(ipVer int, ipam ipamapi.Ipam) error {
1228 1228
 	return fmt.Errorf("no available IPv%d addresses on this network's address pools: %s (%s)", ipVer, n.Name(), n.ID())
1229 1229
 }
1230 1230
 
1231
-func (ep *Endpoint) releaseAddress() {
1231
+func (ep *Endpoint) releaseIPAddresses() {
1232 1232
 	n := ep.getNetwork()
1233 1233
 	if n.hasSpecialDriver() {
1234 1234
 		return
... ...
@@ -1255,6 +1257,53 @@ func (ep *Endpoint) releaseAddress() {
1255 1255
 	}
1256 1256
 }
1257 1257
 
1258
+func (ep *Endpoint) releaseIPv6Address(ctx context.Context) error {
1259
+	n := ep.network
1260
+	ctx = log.WithLogger(ctx, log.G(ctx).WithFields(log.Fields{
1261
+		"net": n.Name(),
1262
+		"ep":  ep.name,
1263
+		"ip":  ep.iface.addrv6,
1264
+	}))
1265
+
1266
+	if ep.iface.addrv6 == nil || n.hasSpecialDriver() {
1267
+		return nil
1268
+	}
1269
+
1270
+	log.G(ctx).Debug("Releasing IPv6 address for endpoint")
1271
+
1272
+	ipam, _, err := n.getController().getIPAMDriver(n.ipamType)
1273
+	if err != nil {
1274
+		log.G(ctx).WithError(err).Warn("Failed to retrieve ipam driver to release IPv6 address")
1275
+		return err
1276
+	}
1277
+
1278
+	if err := ipam.ReleaseAddress(ep.iface.v6PoolID, ep.iface.addrv6.IP); err != nil {
1279
+		log.G(ctx).WithError(err).Warn("Failed to release IPv6 address")
1280
+		return err
1281
+	}
1282
+
1283
+	ep.iface.addrv6 = nil
1284
+	if ep.joinInfo != nil {
1285
+		ep.joinInfo.gw6 = nil
1286
+	}
1287
+
1288
+	d, err := n.driver(true)
1289
+	if err != nil {
1290
+		return fmt.Errorf("fetching driver to release IPv6 address: %v", err)
1291
+	}
1292
+	if dr, ok := d.(driverapi.IPv6Releaser); ok {
1293
+		if err := dr.ReleaseIPv6(ctx, n.id, ep.id); err != nil {
1294
+			return fmt.Errorf("releasing IPv6 address: %v", err)
1295
+		}
1296
+	}
1297
+
1298
+	if err := ep.network.getController().updateToStore(ctx, ep); err != nil {
1299
+		return err
1300
+	}
1301
+
1302
+	return nil
1303
+}
1304
+
1258 1305
 func (c *Controller) cleanupLocalEndpoints() error {
1259 1306
 	// Get used endpoints
1260 1307
 	eps := make(map[string]any)
... ...
@@ -1240,7 +1240,7 @@ func (n *Network) createEndpoint(ctx context.Context, name string, options ...En
1240 1240
 	}
1241 1241
 	defer func() {
1242 1242
 		if err != nil {
1243
-			ep.releaseAddress()
1243
+			ep.releaseIPAddresses()
1244 1244
 		}
1245 1245
 	}()
1246 1246
 
... ...
@@ -228,6 +228,9 @@ func moveLink(ctx context.Context, nlhHost nlwrap.Handle, iface netlink.Link, i
228 228
 // an auto-generated dest name that combines the provided dstPrefix and a
229 229
 // numeric suffix.
230 230
 //
231
+// If an IPv6 address is configured, but unused because of sysctl settings applied
232
+// after address assignment, it will be removed from the Interface.
233
+//
231 234
 // It's safe to call concurrently.
232 235
 func (n *Namespace) AddInterface(ctx context.Context, srcName, dstPrefix, dstName string, options ...IfaceOption) error {
233 236
 	ctx, span := otel.Tracer("").Start(ctx, "libnetwork.osl.AddInterface", trace.WithAttributes(
... ...
@@ -874,6 +877,23 @@ func (n *Namespace) configureInterface(ctx context.Context, nlh nlwrap.Handle, i
874 874
 		return err
875 875
 	}
876 876
 
877
+	// If an IPv6 address was configured, and now it's gone away, it's because of a sysctl
878
+	// setting. Remove the address from the Interface so that there's no attempt to send
879
+	// Neighbour Advertisements for it, and the caller knows to release the address.
880
+	if i.addressIPv6 != nil {
881
+		v6addrs, err := nlh.AddrList(iface, netlink.FAMILY_V6)
882
+		if err != nil {
883
+			return fmt.Errorf("failed to check IPv6 addresses: %v", err)
884
+		}
885
+		if len(v6addrs) == 0 {
886
+			log.G(ctx).WithFields(log.Fields{
887
+				"ip":     i.addressIPv6.String(),
888
+				"ifname": i.dstName,
889
+			}).Debug("IPv6 address not present after applying sysctls")
890
+			i.addressIPv6 = nil
891
+		}
892
+	}
893
+
877 894
 	return nil
878 895
 }
879 896
 
... ...
@@ -249,7 +249,8 @@ func (n *Namespace) Interfaces() []*Interface {
249 249
 	return ifaces
250 250
 }
251 251
 
252
-func (n *Namespace) ifaceBySrcName(srcName string) *Interface {
252
+// InterfaceBySrcName returns a pointer to the Interface with a matching srcName, else nil.
253
+func (n *Namespace) InterfaceBySrcName(srcName string) *Interface {
253 254
 	n.mu.Lock()
254 255
 	defer n.mu.Unlock()
255 256
 	for _, iface := range n.iFaces {
... ...
@@ -505,7 +506,6 @@ func (n *Namespace) IPv6LoEnabled() bool {
505 505
 func (n *Namespace) RefreshIPv6LoEnabled() {
506 506
 	n.mu.Lock()
507 507
 	defer n.mu.Unlock()
508
-
509 508
 	// If anything goes wrong, assume no-IPv6.
510 509
 	n.ipv6LoEnabledCached = false
511 510
 	iface, err := n.nlHandle.LinkByName("lo")
... ...
@@ -240,7 +240,7 @@ func (n *Namespace) SetDefaultRouteIPv6(srcName string) error {
240 240
 }
241 241
 
242 242
 func (n *Namespace) setDefaultRoute(srcName string, routeMatcher func(*net.IPNet) bool) error {
243
-	iface := n.ifaceBySrcName(srcName)
243
+	iface := n.InterfaceBySrcName(srcName)
244 244
 	if iface == nil {
245 245
 		return errors.New("no interface")
246 246
 	}
... ...
@@ -308,7 +308,7 @@ func (n *Namespace) unsetDefaultRoute(srcName string, routeMatcher func(*net.IPN
308 308
 		return nil
309 309
 	}
310 310
 
311
-	iface := n.ifaceBySrcName(srcName)
311
+	iface := n.InterfaceBySrcName(srcName)
312 312
 	if iface == nil {
313 313
 		return nil
314 314
 	}
... ...
@@ -326,6 +326,17 @@ func (sb *Sandbox) addEndpoint(ep *Endpoint) {
326 326
 	sb.endpoints = slices.Insert(sb.endpoints, i, ep)
327 327
 }
328 328
 
329
+func (sb *Sandbox) updateGwPriorityOrdering(ep *Endpoint) {
330
+	sb.mu.Lock()
331
+	defer sb.mu.Unlock()
332
+
333
+	sb.endpoints = slices.DeleteFunc(sb.endpoints, func(other *Endpoint) bool { return other.id == ep.id })
334
+	i := sort.Search(len(sb.endpoints), func(j int) bool {
335
+		return ep.Less(sb.endpoints[j])
336
+	})
337
+	sb.endpoints = slices.Insert(sb.endpoints, i, ep)
338
+}
339
+
329 340
 func (sb *Sandbox) populateNetworkResources(ctx context.Context, ep *Endpoint) (retErr error) {
330 341
 	ctx, span := otel.Tracer("").Start(ctx, "libnetwork.Sandbox.populateNetworkResources", trace.WithAttributes(
331 342
 		attribute.String("endpoint.Name", ep.Name())))
... ...
@@ -398,6 +398,21 @@ func (sb *Sandbox) populateNetworkResourcesOS(ctx context.Context, ep *Endpoint)
398 398
 			return fmt.Errorf("failed to add interface %s to sandbox: %v", i.srcName, err)
399 399
 		}
400 400
 
401
+		// If IPv6 is configured and the address isn't on the interface, it was applied successfully
402
+		// but then removed by a sysctl setting. Release the address and update the interface config.
403
+		if i.addrv6 != nil && !inDelete {
404
+			if oslIface := sb.osSbox.InterfaceBySrcName(i.srcName); oslIface != nil {
405
+				if oslIface.AddressIPv6() == nil {
406
+					if err := ep.releaseIPv6Address(ctx); err != nil {
407
+						return err
408
+					}
409
+					// The Sandbox's list of endpoints is sorted based on IPv6 connectivity, so
410
+					// make sure this one's in the right place.
411
+					sb.updateGwPriorityOrdering(ep)
412
+				}
413
+			}
414
+		}
415
+
401 416
 		if len(ep.virtualIP) > 0 && lbModeIsDSR {
402 417
 			if sb.loadBalancerNID == "" {
403 418
 				if err := sb.osSbox.DisableARPForVIP(i.srcName); err != nil {
... ...
@@ -1117,6 +1117,10 @@ func buildEndpointInfo(networkSettings *network.Settings, n *libnetwork.Network,
1117 1117
 		onesv6, _ := iface.AddressIPv6().Mask.Size()
1118 1118
 		networkSettings.Networks[nwName].GlobalIPv6Address = iface.AddressIPv6().IP.String()
1119 1119
 		networkSettings.Networks[nwName].GlobalIPv6PrefixLen = onesv6
1120
+	} else {
1121
+		// If IPv6 was disabled on the interface, and its address was removed, remove it here too.
1122
+		networkSettings.Networks[nwName].GlobalIPv6Address = ""
1123
+		networkSettings.Networks[nwName].GlobalIPv6PrefixLen = 0
1120 1124
 	}
1121 1125
 
1122 1126
 	return nil
... ...
@@ -31,6 +31,7 @@ import (
31 31
 	"github.com/moby/moby/v2/internal/testutil/daemon"
32 32
 	"gotest.tools/v3/assert"
33 33
 	is "gotest.tools/v3/assert/cmp"
34
+	"gotest.tools/v3/icmd"
34 35
 	"gotest.tools/v3/skip"
35 36
 )
36 37
 
... ...
@@ -1025,6 +1026,81 @@ func TestDisableIPv6Addrs(t *testing.T) {
1025 1025
 	}
1026 1026
 }
1027 1027
 
1028
+// TestDisableIPv6OnInterface checks that it's possible to disable IPv6 on an
1029
+// endpoint in an IPv6 network using a sysctl.
1030
+func TestDisableIPv6OnInterface(t *testing.T) {
1031
+	ctx := setupTest(t)
1032
+	d := daemon.New(t)
1033
+	d.StartWithBusybox(ctx, t, "--ipv6")
1034
+	defer d.Stop(t)
1035
+
1036
+	c := d.NewClientT(t)
1037
+	defer c.Close()
1038
+
1039
+	tests := []struct {
1040
+		name    string
1041
+		netName string
1042
+	}{
1043
+		{
1044
+			name:    "default bridge",
1045
+			netName: "bridge",
1046
+		},
1047
+		{
1048
+			name:    "user defined bridge",
1049
+			netName: "testnet",
1050
+		},
1051
+	}
1052
+
1053
+	for _, tc := range tests {
1054
+		t.Run(tc.netName, func(t *testing.T) {
1055
+			if tc.netName != "bridge" {
1056
+				network.CreateNoError(ctx, t, c, tc.netName, network.WithIPv6())
1057
+				defer network.RemoveNoError(ctx, t, c, tc.netName)
1058
+			}
1059
+
1060
+			const ctrName = "ctr"
1061
+			ctrId := container.Run(ctx, t, c,
1062
+				container.WithName(ctrName),
1063
+				container.WithNetworkMode(tc.netName),
1064
+				container.WithExposedPorts("80/tcp"),
1065
+				container.WithPortMap(containertypes.PortMap{"80/tcp": {{HostPort: "8080"}}}),
1066
+				container.WithEndpointSettings(tc.netName, &networktypes.EndpointSettings{
1067
+					DriverOpts: map[string]string{
1068
+						netlabel.EndpointSysctls: "net.ipv6.conf.IFNAME.disable_ipv6=1",
1069
+					},
1070
+				}),
1071
+			)
1072
+			defer c.ContainerRemove(ctx, ctrId, client.ContainerRemoveOptions{Force: true})
1073
+
1074
+			// The interface should not have any IPv6 addresses.
1075
+			execRes := container.ExecT(ctx, t, c, ctrId, []string{"ip", "a", "show", "eth0"})
1076
+			assert.Check(t, !strings.Contains(execRes.Stdout(), "inet6"),
1077
+				"Unexpected IPv6 address in: %s", execRes.Stdout())
1078
+
1079
+			// Inspect should not show an IPv6 container address.
1080
+			inspRes2 := container.Inspect(ctx, t, c, ctrId)
1081
+			assert.Check(t, is.Equal("", inspRes2.NetworkSettings.Networks[tc.netName].GlobalIPv6Address))
1082
+			assert.Check(t, is.Equal(0, inspRes2.NetworkSettings.Networks[tc.netName].GlobalIPv6PrefixLen))
1083
+
1084
+			// Port mappings should be IPv4-only - but can't see the proxy processes in the rootless netns.
1085
+			if !testEnv.IsRootless() {
1086
+				checkProxies(ctx, t, c, d.Pid(), []expProxyCfg{
1087
+					{"tcp", "0.0.0.0", "8080", ctrName, tc.netName, true, "80"},
1088
+					{"tcp", "::", "8080", ctrName, tc.netName, true, "80"},
1089
+				})
1090
+			}
1091
+
1092
+			// There should not be an IPv6 DNS or /etc/hosts entry.
1093
+			runRes := container.RunAttach(ctx, t, c,
1094
+				container.WithNetworkMode(tc.netName),
1095
+				container.WithCmd("ping", "-6", ctrName),
1096
+			)
1097
+			assert.Check(t, is.Equal(runRes.ExitCode, 1))
1098
+			assert.Check(t, is.Contains(runRes.Stderr.String(), "bad address"))
1099
+		})
1100
+	}
1101
+}
1102
+
1028 1103
 // Check that a container in a network with IPv4 disabled doesn't get
1029 1104
 // IPv4 addresses.
1030 1105
 func TestDisableIPv4(t *testing.T) {
... ...
@@ -1502,9 +1578,12 @@ func checkProxies(ctx context.Context, t *testing.T, c *client.Client, daemonPid
1502 1502
 	}
1503 1503
 
1504 1504
 	gotProxies := make([]string, 0, len(exp))
1505
-	res, err := exec.Command("ps", "-f", "--ppid", strconv.Itoa(daemonPid)).CombinedOutput()
1506
-	assert.NilError(t, err)
1507
-	for _, line := range strings.Split(string(res), "\n") {
1505
+	res := icmd.RunCommand("ps", "-f", "--ppid", strconv.Itoa(daemonPid))
1506
+	if res.Error != nil {
1507
+		t.Error(res)
1508
+		return
1509
+	}
1510
+	for _, line := range strings.Split(res.Stdout(), "\n") {
1508 1511
 		_, args, ok := strings.Cut(line, "docker-proxy")
1509 1512
 		if !ok {
1510 1513
 			continue
... ...
@@ -1522,7 +1601,7 @@ func checkProxies(ctx context.Context, t *testing.T, c *client.Client, daemonPid
1522 1522
 		gotProxies = append(gotProxies, makeExpStr(proto, hostIP, hostPort, ctrIP, ctrPort))
1523 1523
 	}
1524 1524
 
1525
-	assert.DeepEqual(t, gotProxies, wantProxies)
1525
+	assert.Check(t, is.DeepEqual(gotProxies, wantProxies))
1526 1526
 }
1527 1527
 
1528 1528
 // Check that a gratuitous ARP / neighbour advertisement is sent for a new