Browse code

Do not remove kernel-ll addresses from bridges

Make the behaviour enabled by env var DOCKER_BRIDGE_PRESERVE_KERNEL_LL
the default...
- don't remove kernel assigned link-local addresses
- or any address in fe80::/64
- don't assign fe80::1 to a bridge

Signed-off-by: Rob Murray <rob.murray@docker.com>

Rob Murray authored on 2024/05/01 18:43:19
Showing 5 changed files
... ...
@@ -531,41 +531,35 @@ func TestDefaultBridgeAddresses(t *testing.T) {
531 531
 		},
532 532
 	}
533 533
 
534
-	for _, preserveKernelLL := range []bool{false, true} {
535
-		var dopts []daemon.Option
536
-		if preserveKernelLL {
537
-			dopts = append(dopts, daemon.WithEnvVars("DOCKER_BRIDGE_PRESERVE_KERNEL_LL=1"))
538
-		}
539
-		d := daemon.New(t, dopts...)
540
-		c := d.NewClientT(t)
541
-
542
-		for _, tc := range testcases {
543
-			for _, step := range tc.steps {
544
-				tcName := fmt.Sprintf("kernel_ll_%v/%s/%s", preserveKernelLL, tc.name, step.stepName)
545
-				t.Run(tcName, func(t *testing.T) {
546
-					ctx := testutil.StartSpan(ctx, t)
547
-					// Check that the daemon starts - regression test for:
548
-					//   https://github.com/moby/moby/issues/46829
549
-					d.StartWithBusybox(ctx, t, "--experimental", "--ipv6", "--ip6tables", "--fixed-cidr-v6="+step.fixedCIDRV6)
550
-
551
-					// Start a container, so that the bridge is set "up" and gets a kernel_ll address.
552
-					cID := container.Run(ctx, t, c)
553
-					defer c.ContainerRemove(ctx, cID, containertypes.RemoveOptions{Force: true})
554
-
555
-					d.Stop(t)
556
-
557
-					// Check that the expected addresses have been applied to the bridge. (Skip in
558
-					// rootless mode, because the bridge is in a different network namespace.)
559
-					if !testEnv.IsRootless() {
560
-						res := testutil.RunCommand(ctx, "ip", "-6", "addr", "show", "docker0")
561
-						assert.Equal(t, res.ExitCode, 0, step.stepName)
562
-						stdout := res.Stdout()
563
-						for _, expAddr := range step.expAddrs {
564
-							assert.Check(t, is.Contains(stdout, expAddr))
565
-						}
534
+	d := daemon.New(t)
535
+	defer d.Stop(t)
536
+	c := d.NewClientT(t)
537
+
538
+	for _, tc := range testcases {
539
+		for _, step := range tc.steps {
540
+			t.Run(tc.name+"/"+step.stepName, func(t *testing.T) {
541
+				ctx := testutil.StartSpan(ctx, t)
542
+				// Check that the daemon starts - regression test for:
543
+				//   https://github.com/moby/moby/issues/46829
544
+				d.StartWithBusybox(ctx, t, "--experimental", "--ipv6", "--ip6tables", "--fixed-cidr-v6="+step.fixedCIDRV6)
545
+
546
+				// Start a container, so that the bridge is set "up" and gets a kernel_ll address.
547
+				cID := container.Run(ctx, t, c)
548
+				defer c.ContainerRemove(ctx, cID, containertypes.RemoveOptions{Force: true})
549
+
550
+				d.Stop(t)
551
+
552
+				// Check that the expected addresses have been applied to the bridge. (Skip in
553
+				// rootless mode, because the bridge is in a different network namespace.)
554
+				if !testEnv.IsRootless() {
555
+					res := testutil.RunCommand(ctx, "ip", "-6", "addr", "show", "docker0")
556
+					assert.Equal(t, res.ExitCode, 0, step.stepName)
557
+					stdout := res.Stdout()
558
+					for _, expAddr := range step.expAddrs {
559
+						assert.Check(t, is.Contains(stdout, expAddr))
566 560
 					}
567
-				})
568
-			}
561
+				}
562
+			})
569 563
 		}
570 564
 	}
571 565
 }
... ...
@@ -5,7 +5,6 @@ import (
5 5
 	"fmt"
6 6
 	"net"
7 7
 	"net/netip"
8
-	"os"
9 8
 
10 9
 	"github.com/containerd/log"
11 10
 	"github.com/docker/docker/errdefs"
... ...
@@ -71,90 +70,66 @@ func (i *bridgeInterface) addresses(family int) ([]netlink.Addr, error) {
71 71
 	return addrs, nil
72 72
 }
73 73
 
74
-func getRequiredIPv6Addrs(config *networkConfiguration) (requiredAddrs map[netip.Addr]netip.Prefix, err error) {
75
-	requiredAddrs = make(map[netip.Addr]netip.Prefix)
76
-
77
-	if os.Getenv("DOCKER_BRIDGE_PRESERVE_KERNEL_LL") != "1" {
78
-		// Always give the bridge 'fe80::1' - every interface is required to have an
79
-		// address in 'fe80::/64'. Linux may assign an address, but we'll replace it with
80
-		// 'fe80::1'. Then, if the configured prefix is 'fe80::/64', the IPAM pool
81
-		// assigned address will not be a second address in the LL subnet.
82
-		ra, ok := netiputil.ToPrefix(bridgeIPv6)
83
-		if !ok {
84
-			err = fmt.Errorf("Failed to convert Link-Local IPv6 address to netip.Prefix")
85
-			return nil, err
86
-		}
87
-		requiredAddrs[ra.Addr()] = ra
88
-	}
74
+func (i *bridgeInterface) programIPv6Addresses(config *networkConfiguration) error {
75
+	// Remember the configured addresses.
76
+	i.bridgeIPv6 = config.AddressIPv6
77
+	i.gatewayIPv6 = config.AddressIPv6.IP
89 78
 
90
-	ra, ok := netiputil.ToPrefix(config.AddressIPv6)
79
+	addrPrefix, ok := netiputil.ToPrefix(config.AddressIPv6)
91 80
 	if !ok {
92
-		err = fmt.Errorf("failed to convert bridge IPv6 address '%s' to netip.Prefix", config.AddressIPv6.String())
93
-		return nil, err
81
+		return errdefs.System(
82
+			fmt.Errorf("failed to convert bridge IPv6 address '%s' to netip.Prefix",
83
+				config.AddressIPv6.String()))
94 84
 	}
95
-	requiredAddrs[ra.Addr()] = ra
96 85
 
97
-	return requiredAddrs, nil
98
-}
99
-
100
-func (i *bridgeInterface) programIPv6Addresses(config *networkConfiguration) error {
101 86
 	// Get the IPv6 addresses currently assigned to the bridge, if any.
102 87
 	existingAddrs, err := i.addresses(netlink.FAMILY_V6)
103 88
 	if err != nil {
104 89
 		return errdefs.System(err)
105 90
 	}
106
-
107
-	// Get the list of required IPv6 addresses for this bridge.
108
-	var requiredAddrs map[netip.Addr]netip.Prefix
109
-	requiredAddrs, err = getRequiredIPv6Addrs(config)
110
-	if err != nil {
111
-		return errdefs.System(err)
112
-	}
113
-	i.bridgeIPv6 = config.AddressIPv6
114
-	i.gatewayIPv6 = config.AddressIPv6.IP
115
-
116 91
 	// Remove addresses that aren't required.
117 92
 	for _, existingAddr := range existingAddrs {
118 93
 		ea, ok := netip.AddrFromSlice(existingAddr.IP)
119 94
 		if !ok {
120
-			return errdefs.System(fmt.Errorf("Failed to convert IPv6 address '%s' to netip.Addr", config.AddressIPv6))
95
+			return errdefs.System(
96
+				fmt.Errorf("Failed to convert IPv6 address '%s' to netip.Addr", config.AddressIPv6))
121 97
 		}
122
-		// Optionally, avoid deleting the kernel-assigned link local address.
123
-		// (Don't delete fe80::1 either - if it was previously assigned to the bridge, and the
124
-		// kernel_ll address was deleted, the bridge won't get a new kernel_ll address.)
125
-		if os.Getenv("DOCKER_BRIDGE_PRESERVE_KERNEL_LL") == "1" {
126
-			if p, _ := ea.Prefix(64); p == linkLocalPrefix {
127
-				continue
128
-			}
98
+		// Don't delete the kernel-assigned link local address (or fe80::1 - if it was
99
+		// assigned to the bridge by an older version of the daemon that deleted the
100
+		// kernel_ll address, the bridge won't get a new kernel_ll address.) But, do
101
+		// delete unexpected link-local addresses (fe80::/10) that aren't in fe80::/64,
102
+		// those have been IPAM-assigned.
103
+		if p, _ := ea.Prefix(64); p == linkLocalPrefix {
104
+			continue
129 105
 		}
130 106
 		// Ignore the prefix length when comparing addresses, it's informational
131 107
 		// (RFC-5942 section 4), and removing/re-adding an address that's still valid
132 108
 		// would disrupt traffic on live-restore.
133
-		if _, required := requiredAddrs[ea]; !required {
109
+		if ea != addrPrefix.Addr() {
134 110
 			err := i.nlh.AddrDel(i.Link, &existingAddr) //#nosec G601 -- Memory aliasing is not an issue in practice as the &existingAddr pointer is not retained by the callee after the AddrDel() call returns.
135 111
 			if err != nil {
136
-				log.G(context.TODO()).WithFields(log.Fields{"error": err, "address": existingAddr.IPNet}).Warnf("Failed to remove residual IPv6 address from bridge")
112
+				log.G(context.TODO()).WithFields(log.Fields{
113
+					"error":   err,
114
+					"address": existingAddr.IPNet},
115
+				).Warnf("Failed to remove residual IPv6 address from bridge")
137 116
 			}
138 117
 		}
139 118
 	}
140
-	// Add or update required addresses.
141
-	for _, addrPrefix := range requiredAddrs {
142
-		// Using AddrReplace(), rather than AddrAdd(). When the subnet is changed for an
143
-		// existing bridge in a way that doesn't affect the bridge's assigned address,
144
-		// the old address has not been removed at this point - because that would be
145
-		// service-affecting for a running container.
146
-		//
147
-		// But if, for example, 'fixed-cidr-v6' is changed from '2000:dbe::/64' to
148
-		// '2000:dbe::/80', the default bridge will still be assigned address
149
-		// '2000:dbe::1'. In the output of 'ip a', the prefix length is displayed - and
150
-		// the user is likely to expect to see it updated from '64' to '80'.
151
-		// Unfortunately, 'netlink.AddrReplace()' ('RTM_NEWADDR' with 'NLM_F_REPLACE')
152
-		// doesn't update the prefix length. This is a cosmetic problem, the prefix
153
-		// length of an assigned address is not used to determine whether an address is
154
-		// "on-link" (RFC-5942).
155
-		if err := i.nlh.AddrReplace(i.Link, &netlink.Addr{IPNet: netiputil.ToIPNet(addrPrefix)}); err != nil {
156
-			return errdefs.System(fmt.Errorf("failed to add IPv6 address %s to bridge: %v", i.bridgeIPv6, err))
157
-		}
119
+	// Using AddrReplace(), rather than AddrAdd(). When the subnet is changed for an
120
+	// existing bridge in a way that doesn't affect the bridge's assigned address,
121
+	// the old address has not been removed at this point - because that would be
122
+	// service-affecting for a running container.
123
+	//
124
+	// But if, for example, 'fixed-cidr-v6' is changed from '2000:dbe::/64' to
125
+	// '2000:dbe::/80', the default bridge will still be assigned address
126
+	// '2000:dbe::1'. In the output of 'ip a', the prefix length is displayed - and
127
+	// the user is likely to expect to see it updated from '64' to '80'.
128
+	// Unfortunately, 'netlink.AddrReplace()' ('RTM_NEWADDR' with 'NLM_F_REPLACE')
129
+	// doesn't update the prefix length. This is a cosmetic problem, the prefix
130
+	// length of an assigned address is not used to determine whether an address is
131
+	// "on-link" (RFC-5942).
132
+	if err := i.nlh.AddrReplace(i.Link, &netlink.Addr{IPNet: netiputil.ToIPNet(addrPrefix)}); err != nil {
133
+		return errdefs.System(fmt.Errorf("failed to add IPv6 address %s to bridge: %v", i.bridgeIPv6, err))
158 134
 	}
159 135
 	return nil
160 136
 }
... ...
@@ -2,12 +2,9 @@ package bridge
2 2
 
3 3
 import (
4 4
 	"net"
5
-	"net/netip"
6
-	"strings"
7 5
 	"testing"
8 6
 
9 7
 	"github.com/docker/docker/internal/testutils/netnsutils"
10
-	"github.com/google/go-cmp/cmp"
11 8
 	"github.com/vishvananda/netlink"
12 9
 	"gotest.tools/v3/assert"
13 10
 	is "gotest.tools/v3/assert/cmp"
... ...
@@ -96,48 +93,6 @@ func TestAddressesNonEmptyInterface(t *testing.T) {
96 96
 	assert.Equal(t, addrs[0].IPNet.String(), expAddrV6)
97 97
 }
98 98
 
99
-func TestGetRequiredIPv6Addrs(t *testing.T) {
100
-	testcases := []struct {
101
-		name         string
102
-		addressIPv6  string
103
-		expReqdAddrs []string
104
-	}{
105
-		{
106
-			name:         "Regular address, expect default link local",
107
-			addressIPv6:  "2000:3000::1/80",
108
-			expReqdAddrs: []string{"fe80::1/64", "2000:3000::1/80"},
109
-		},
110
-		{
111
-			name:         "Standard link local address only",
112
-			addressIPv6:  "fe80::1/64",
113
-			expReqdAddrs: []string{"fe80::1/64"},
114
-		},
115
-		{
116
-			name:         "Nonstandard link local address",
117
-			addressIPv6:  "fe80:abcd::1/42",
118
-			expReqdAddrs: []string{"fe80:abcd::1/42", "fe80::1/64"},
119
-		},
120
-	}
121
-
122
-	for _, tc := range testcases {
123
-		t.Run(tc.name, func(t *testing.T) {
124
-			config := &networkConfiguration{
125
-				AddressIPv6: cidrToIPNet(t, tc.addressIPv6),
126
-			}
127
-
128
-			expResult := map[netip.Addr]netip.Prefix{}
129
-			for _, addr := range tc.expReqdAddrs {
130
-				expResult[netip.MustParseAddr(strings.Split(addr, "/")[0])] = netip.MustParsePrefix(addr)
131
-			}
132
-
133
-			reqd, err := getRequiredIPv6Addrs(config)
134
-			assert.Check(t, is.Nil(err))
135
-			assert.Check(t, is.DeepEqual(reqd, expResult,
136
-				cmp.Comparer(func(a, b netip.Prefix) bool { return a == b })))
137
-		})
138
-	}
139
-}
140
-
141 99
 func TestProgramIPv6Addresses(t *testing.T) {
142 100
 	defer netnsutils.SetupTestOSContext(t)()
143 101
 
... ...
@@ -159,11 +114,11 @@ func TestProgramIPv6Addresses(t *testing.T) {
159 159
 	i := prepTestBridge(t, nc)
160 160
 
161 161
 	// The bridge has no addresses, ask for a regular IPv6 network and expect it to
162
-	// be added to the bridge, with the default link local address.
162
+	// be added to the bridge.
163 163
 	nc.AddressIPv6 = cidrToIPNet(t, "2000:3000::1/64")
164 164
 	err := i.programIPv6Addresses(nc)
165 165
 	assert.NilError(t, err)
166
-	checkAddrs(i, nc, []string{"2000:3000::1/64", "fe80::1/64"})
166
+	checkAddrs(i, nc, []string{"2000:3000::1/64"})
167 167
 
168 168
 	// Shrink the subnet of that regular address, the prefix length of the address
169 169
 	// will not be modified - but it's informational-only, the address itself has
... ...
@@ -171,7 +126,7 @@ func TestProgramIPv6Addresses(t *testing.T) {
171 171
 	nc.AddressIPv6 = cidrToIPNet(t, "2000:3000::1/80")
172 172
 	err = i.programIPv6Addresses(nc)
173 173
 	assert.NilError(t, err)
174
-	checkAddrs(i, nc, []string{"2000:3000::1/64", "fe80::1/64"})
174
+	checkAddrs(i, nc, []string{"2000:3000::1/64"})
175 175
 
176 176
 	// Ask for link-local only, by specifying an address with the Link Local prefix.
177 177
 	// The regular address should be removed.
... ...
@@ -180,9 +135,16 @@ func TestProgramIPv6Addresses(t *testing.T) {
180 180
 	assert.NilError(t, err)
181 181
 	checkAddrs(i, nc, []string{"fe80::1/64"})
182 182
 
183
-	// Swap the standard link local address for a nonstandard one.
183
+	// Swap the standard link local address for a nonstandard one. The standard LL
184
+	// address will not be removed.
184 185
 	nc.AddressIPv6 = cidrToIPNet(t, "fe80:5555::1/55")
185 186
 	err = i.programIPv6Addresses(nc)
186 187
 	assert.NilError(t, err)
187 188
 	checkAddrs(i, nc, []string{"fe80:5555::1/55", "fe80::1/64"})
189
+
190
+	// Back to the original address, expect the nonstandard LL address to be replaced.
191
+	nc.AddressIPv6 = cidrToIPNet(t, "2000:3000::1/64")
192
+	err = i.programIPv6Addresses(nc)
193
+	assert.NilError(t, err)
194
+	checkAddrs(i, nc, []string{"2000:3000::1/64", "fe80::1/64"})
188 195
 }
... ...
@@ -90,7 +90,7 @@ func TestLinkCreate(t *testing.T) {
90 90
 
91 91
 	ip6 := te.iface.addrv6.IP
92 92
 	if !n.bridge.bridgeIPv6.Contains(ip6) {
93
-		t.Fatalf("IP %s is not a valid ip in the subnet %s", ip6.String(), bridgeIPv6.String())
93
+		t.Fatalf("IP %s is not a valid ip in the subnet %s", ip6.String(), n.bridge.bridgeIPv6.String())
94 94
 	}
95 95
 
96 96
 	if !te.gw.Equal(n.bridge.bridgeIPv4.IP) {
... ...
@@ -43,14 +43,14 @@ func TestSetupIPv6(t *testing.T) {
43 43
 
44 44
 	var found bool
45 45
 	for _, addr := range addrsv6 {
46
-		if bridgeIPv6.String() == addr.IPNet.String() {
46
+		if config.AddressIPv6.String() == addr.IPNet.String() {
47 47
 			found = true
48 48
 			break
49 49
 		}
50 50
 	}
51 51
 
52 52
 	if !found {
53
-		t.Fatalf("Bridge device does not have requested IPv6 address %v", bridgeIPv6)
53
+		t.Fatalf("Bridge device does not have requested IPv6 address %v", config.AddressIPv6)
54 54
 	}
55 55
 }
56 56