Browse code

Ignore kernel-assigned LL addrs when selecting "bip6"

Commit facb2323 aligned the way the default bridge's IPv6 subnet
and gateway addresses are selected with IPv4.

Part of that involved looking at addresses already on the bridge,
along with daemon config options. But, for IPv6, the kernel will
assign a link-local address to the bridge.

Make sure that address is ignored when selecting "bip6" when it's
not explicitly specified.

This is made slightly complicated because we allow fixed-cidr-v6
to be a link-local subnet (either the standard "fe80::/64", or
any other non-overlapping LL subnet in "fe80::/10").

Following this change, if fixed-cidr-v6 is (or is included by)
"fe80::/64", the bridge's kernel-assigned LL address may be used
as the network's gateway address - even though it may also get an
IPAM-assigned LL address.

Signed-off-by: Rob Murray <rob.murray@docker.com>

Rob Murray authored on 2024/12/05 03:19:41
Showing 2 changed files
... ...
@@ -12,6 +12,7 @@ import (
12 12
 	"path/filepath"
13 13
 	"runtime"
14 14
 	"runtime/debug"
15
+	"slices"
15 16
 	"strconv"
16 17
 	"strings"
17 18
 	"sync"
... ...
@@ -1190,6 +1191,15 @@ func selectBIP(
1190 1190
 	if err != nil {
1191 1191
 		return nil, nil, errors.Wrap(err, "list bridge addresses failed")
1192 1192
 	}
1193
+	// For IPv6, ignore the kernel-assigned link-local address. Remove all
1194
+	// link-local addresses, unless fixed-cidr-v6 has the standard link-local
1195
+	// prefix. (If fixed-cidr-v6 is the standard LL prefix, the kernel-assigned
1196
+	// address is likely to be used instead of an IPAM assigned address.)
1197
+	if family == netlink.FAMILY_V6 && (fCidrIP == nil || !isStandardLL(fCidrIP)) {
1198
+		bridgeNws = slices.DeleteFunc(bridgeNws, func(nlAddr netlink.Addr) bool {
1199
+			return isStandardLL(nlAddr.IP)
1200
+		})
1201
+	}
1193 1202
 	if len(bridgeNws) > 0 {
1194 1203
 		// Pick any address from the bridge as a starting point.
1195 1204
 		nw := bridgeNws[0].IPNet
... ...
@@ -1238,6 +1248,16 @@ func selectBIP(
1238 1238
 	return bIP, bIPNet, nil
1239 1239
 }
1240 1240
 
1241
+// isStandardLL returns true if ip is in fe80::/64 (the link local prefix is fe80::/10,
1242
+// but only fe80::/64 is normally used - however, it's possible to ask IPAM for a
1243
+// link-local subnet that's outside that range).
1244
+func isStandardLL(ip net.IP) bool {
1245
+	if ip == nil {
1246
+		return false
1247
+	}
1248
+	return ip.Mask(net.CIDRMask(64, 128)).Equal(net.ParseIP("fe80::"))
1249
+}
1250
+
1241 1251
 // Remove default bridge interface if present (--bridge=none use case)
1242 1252
 func removeDefaultBridgeInterface() {
1243 1253
 	if lnk, err := nlwrap.LinkByName(bridge.DefaultBridgeName); err == nil {
... ...
@@ -4,6 +4,7 @@ import (
4 4
 	"context"
5 5
 	"net"
6 6
 	"net/netip"
7
+	"slices"
7 8
 	"testing"
8 9
 
9 10
 	"github.com/docker/docker/api/types/network"
... ...
@@ -14,6 +15,7 @@ import (
14 14
 	"github.com/vishvananda/netlink"
15 15
 	"gotest.tools/v3/assert"
16 16
 	is "gotest.tools/v3/assert/cmp"
17
+	"gotest.tools/v3/poll"
17 18
 	"gotest.tools/v3/skip"
18 19
 )
19 20
 
... ...
@@ -112,6 +114,29 @@ func TestDaemonDefaultBridgeIPAM_Docker0(t *testing.T) {
112 112
 			},
113 113
 		},
114 114
 		{
115
+			name: "link-local fixed-cidr-v6",
116
+			daemonArgs: []string{
117
+				"--fixed-cidr", "192.168.176.0/24",
118
+				"--fixed-cidr-v6", "fe80::/64",
119
+			},
120
+			expIPAMConfig: []network.IPAMConfig{
121
+				{Subnet: "192.168.176.0/24", IPRange: "192.168.176.0/24"},
122
+				{Subnet: "fe80::/64", IPRange: "fe80::/64", Gateway: llGwPlaceholder},
123
+			},
124
+		},
125
+		{
126
+			name:               "nonstandard link-local fixed-cidr-v6",
127
+			initialBridgeAddrs: []string{"192.168.176.88/20", "fe80:1234::88/56"},
128
+			daemonArgs: []string{
129
+				"--fixed-cidr", "192.168.176.0/24",
130
+				"--fixed-cidr-v6", "fe80:1234::/64",
131
+			},
132
+			expIPAMConfig: []network.IPAMConfig{
133
+				{Subnet: "192.168.176.0/20", IPRange: "192.168.176.0/24", Gateway: "192.168.176.88"},
134
+				{Subnet: "fe80:1234::/56", IPRange: "fe80:1234::/64", Gateway: "fe80:1234::88"},
135
+			},
136
+		},
137
+		{
115 138
 			name:               "fixed-cidr within old bridge subnet with new bip",
116 139
 			initialBridgeAddrs: []string{"192.168.176.88/20", "fdd1:8161:2d2c::/56"},
117 140
 			daemonArgs: []string{
... ...
@@ -234,6 +259,30 @@ func TestDaemonDefaultBridgeIPAM_UserBr(t *testing.T) {
234 234
 			},
235 235
 		},
236 236
 		{
237
+			name:               "link-local fixed-cidr-v6",
238
+			initialBridgeAddrs: []string{"192.168.176.88/20"},
239
+			daemonArgs: []string{
240
+				"--fixed-cidr", "192.168.176.0/24",
241
+				"--fixed-cidr-v6", "fe80::/64",
242
+			},
243
+			expIPAMConfig: []network.IPAMConfig{
244
+				{Subnet: "192.168.176.0/20", IPRange: "192.168.176.0/24", Gateway: "192.168.176.88"},
245
+				{Subnet: "fe80::/64", IPRange: "fe80::/64", Gateway: llGwPlaceholder},
246
+			},
247
+		},
248
+		{
249
+			name:               "nonstandard link-local fixed-cidr-v6",
250
+			initialBridgeAddrs: []string{"192.168.176.88/20", "fe80:1234::88/56"},
251
+			daemonArgs: []string{
252
+				"--fixed-cidr", "192.168.176.0/24",
253
+				"--fixed-cidr-v6", "fe80:1234::/64",
254
+			},
255
+			expIPAMConfig: []network.IPAMConfig{
256
+				{Subnet: "192.168.176.0/20", IPRange: "192.168.176.0/24", Gateway: "192.168.176.88"},
257
+				{Subnet: "fe80:1234::/56", IPRange: "fe80:1234::/64", Gateway: "fe80:1234::88"},
258
+			},
259
+		},
260
+		{
237 261
 			name:               "fixed-cidr bigger than bridge subnet",
238 262
 			initialBridgeAddrs: []string{"192.168.176.88/24"},
239 263
 			daemonArgs:         []string{"--fixed-cidr", "192.168.176.0/20"},
... ...
@@ -310,6 +359,11 @@ func TestDaemonDefaultBridgeIPAM_UserBr(t *testing.T) {
310 310
 	}
311 311
 }
312 312
 
313
+// llGwPlaceholder can be used as a value for "Gateway" in expected IPAM config,
314
+// before comparison with actual results it'll be replaced by the kernel assigned
315
+// link local IPv6 address for the bridge.
316
+const llGwPlaceholder = "ll-gateway-placeholder"
317
+
313 318
 type defaultBridgeIPAMTestCase struct {
314 319
 	name               string
315 320
 	bridgeName         string
... ...
@@ -333,7 +387,7 @@ func testDefaultBridgeIPAM(ctx context.Context, t *testing.T, tc defaultBridgeIP
333 333
 		defer cleanup()
334 334
 
335 335
 		host.Do(t, func() {
336
-			createBridge(t, tc.bridgeName, tc.initialBridgeAddrs)
336
+			llAddr := createBridge(t, tc.bridgeName, tc.initialBridgeAddrs)
337 337
 
338 338
 			var dArgs []string
339 339
 			if !tc.ipv4Only {
... ...
@@ -365,7 +419,13 @@ func testDefaultBridgeIPAM(ctx context.Context, t *testing.T, tc defaultBridgeIP
365 365
 
366 366
 			insp, err := c.NetworkInspect(ctx, network.NetworkBridge, network.InspectOptions{})
367 367
 			assert.NilError(t, err)
368
-			assert.Check(t, is.DeepEqual(insp.IPAM.Config, tc.expIPAMConfig))
368
+			expIPAMConfig := slices.Clone(tc.expIPAMConfig)
369
+			for i := range expIPAMConfig {
370
+				if expIPAMConfig[i].Gateway == llGwPlaceholder {
371
+					expIPAMConfig[i].Gateway = llAddr.String()
372
+				}
373
+			}
374
+			assert.Check(t, is.DeepEqual(insp.IPAM.Config, expIPAMConfig))
369 375
 		})
370 376
 	})
371 377
 }
... ...
@@ -386,7 +446,10 @@ func newHostInL3Seg(t *testing.T, name, ip4, ip6 string) (networking.Host, func(
386 386
 	return l3.Hosts[hostname], func() { l3.Destroy(t) }
387 387
 }
388 388
 
389
-func createBridge(t *testing.T, ifName string, addrs []string) {
389
+// createBridge creates a bridge device named ifName, brings it up, waits for
390
+// the kernel to assign a link-local IPv6 address, assigns addrs, and returns
391
+// the kernel-assigned LL address.
392
+func createBridge(t *testing.T, ifName string, addrs []string) net.IP {
390 393
 	t.Helper()
391 394
 
392 395
 	// Get a netlink handle in this netns.
... ...
@@ -399,9 +462,29 @@ func createBridge(t *testing.T, ifName string, addrs []string) {
399 399
 			Name: ifName,
400 400
 		},
401 401
 	}
402
-
403 402
 	err = nlh.LinkAdd(link)
404 403
 	assert.NilError(t, err)
404
+
405
+	// Bring the interface up, and wait for the kernel to assign its link-local
406
+	// address (to cause maximum confusion - the LL address shouldn't be selected
407
+	// as "bip6").
408
+	brLink, err := nlh.LinkByName(ifName)
409
+	assert.NilError(t, err)
410
+	err = nlh.LinkSetUp(brLink)
411
+	assert.NilError(t, err)
412
+	var llAddr net.IP
413
+	poll.WaitOn(t, func(t poll.LogT) poll.Result {
414
+		addrs, err := nlh.AddrList(brLink, netlink.FAMILY_V6)
415
+		if err != nil {
416
+			return poll.Error(err)
417
+		}
418
+		if len(addrs) == 0 {
419
+			return poll.Continue("no IPv6 addresses")
420
+		}
421
+		llAddr = addrs[0].IP
422
+		return poll.Success()
423
+	})
424
+
405 425
 	for _, addr := range addrs {
406 426
 		ip, ipNet, err := net.ParseCIDR(addr)
407 427
 		assert.NilError(t, err)
... ...
@@ -409,4 +492,5 @@ func createBridge(t *testing.T, ifName string, addrs []string) {
409 409
 		err = nlh.AddrAdd(link, &netlink.Addr{IPNet: ipNet})
410 410
 		assert.NilError(t, err)
411 411
 	}
412
+	return llAddr
412 413
 }