Make the behaviour enabled by env var DOCKER_BRIDGE_PRESERVE_KERNEL_LL
the default...
- don't remove kernel assigned link-local addresses
- or any address in fe80::/64
- don't assign fe80::1 to a bridge
Signed-off-by: Rob Murray <rob.murray@docker.com>
| ... | ... |
@@ -531,41 +531,35 @@ func TestDefaultBridgeAddresses(t *testing.T) {
|
| 531 | 531 |
}, |
| 532 | 532 |
} |
| 533 | 533 |
|
| 534 |
- for _, preserveKernelLL := range []bool{false, true} {
|
|
| 535 |
- var dopts []daemon.Option |
|
| 536 |
- if preserveKernelLL {
|
|
| 537 |
- dopts = append(dopts, daemon.WithEnvVars("DOCKER_BRIDGE_PRESERVE_KERNEL_LL=1"))
|
|
| 538 |
- } |
|
| 539 |
- d := daemon.New(t, dopts...) |
|
| 540 |
- c := d.NewClientT(t) |
|
| 541 |
- |
|
| 542 |
- for _, tc := range testcases {
|
|
| 543 |
- for _, step := range tc.steps {
|
|
| 544 |
- tcName := fmt.Sprintf("kernel_ll_%v/%s/%s", preserveKernelLL, tc.name, step.stepName)
|
|
| 545 |
- t.Run(tcName, func(t *testing.T) {
|
|
| 546 |
- ctx := testutil.StartSpan(ctx, t) |
|
| 547 |
- // Check that the daemon starts - regression test for: |
|
| 548 |
- // https://github.com/moby/moby/issues/46829 |
|
| 549 |
- d.StartWithBusybox(ctx, t, "--experimental", "--ipv6", "--ip6tables", "--fixed-cidr-v6="+step.fixedCIDRV6) |
|
| 550 |
- |
|
| 551 |
- // Start a container, so that the bridge is set "up" and gets a kernel_ll address. |
|
| 552 |
- cID := container.Run(ctx, t, c) |
|
| 553 |
- defer c.ContainerRemove(ctx, cID, containertypes.RemoveOptions{Force: true})
|
|
| 554 |
- |
|
| 555 |
- d.Stop(t) |
|
| 556 |
- |
|
| 557 |
- // Check that the expected addresses have been applied to the bridge. (Skip in |
|
| 558 |
- // rootless mode, because the bridge is in a different network namespace.) |
|
| 559 |
- if !testEnv.IsRootless() {
|
|
| 560 |
- res := testutil.RunCommand(ctx, "ip", "-6", "addr", "show", "docker0") |
|
| 561 |
- assert.Equal(t, res.ExitCode, 0, step.stepName) |
|
| 562 |
- stdout := res.Stdout() |
|
| 563 |
- for _, expAddr := range step.expAddrs {
|
|
| 564 |
- assert.Check(t, is.Contains(stdout, expAddr)) |
|
| 565 |
- } |
|
| 534 |
+ d := daemon.New(t) |
|
| 535 |
+ defer d.Stop(t) |
|
| 536 |
+ c := d.NewClientT(t) |
|
| 537 |
+ |
|
| 538 |
+ for _, tc := range testcases {
|
|
| 539 |
+ for _, step := range tc.steps {
|
|
| 540 |
+ t.Run(tc.name+"/"+step.stepName, func(t *testing.T) {
|
|
| 541 |
+ ctx := testutil.StartSpan(ctx, t) |
|
| 542 |
+ // Check that the daemon starts - regression test for: |
|
| 543 |
+ // https://github.com/moby/moby/issues/46829 |
|
| 544 |
+ d.StartWithBusybox(ctx, t, "--experimental", "--ipv6", "--ip6tables", "--fixed-cidr-v6="+step.fixedCIDRV6) |
|
| 545 |
+ |
|
| 546 |
+ // Start a container, so that the bridge is set "up" and gets a kernel_ll address. |
|
| 547 |
+ cID := container.Run(ctx, t, c) |
|
| 548 |
+ defer c.ContainerRemove(ctx, cID, containertypes.RemoveOptions{Force: true})
|
|
| 549 |
+ |
|
| 550 |
+ d.Stop(t) |
|
| 551 |
+ |
|
| 552 |
+ // Check that the expected addresses have been applied to the bridge. (Skip in |
|
| 553 |
+ // rootless mode, because the bridge is in a different network namespace.) |
|
| 554 |
+ if !testEnv.IsRootless() {
|
|
| 555 |
+ res := testutil.RunCommand(ctx, "ip", "-6", "addr", "show", "docker0") |
|
| 556 |
+ assert.Equal(t, res.ExitCode, 0, step.stepName) |
|
| 557 |
+ stdout := res.Stdout() |
|
| 558 |
+ for _, expAddr := range step.expAddrs {
|
|
| 559 |
+ assert.Check(t, is.Contains(stdout, expAddr)) |
|
| 566 | 560 |
} |
| 567 |
- }) |
|
| 568 |
- } |
|
| 561 |
+ } |
|
| 562 |
+ }) |
|
| 569 | 563 |
} |
| 570 | 564 |
} |
| 571 | 565 |
} |
| ... | ... |
@@ -5,7 +5,6 @@ import ( |
| 5 | 5 |
"fmt" |
| 6 | 6 |
"net" |
| 7 | 7 |
"net/netip" |
| 8 |
- "os" |
|
| 9 | 8 |
|
| 10 | 9 |
"github.com/containerd/log" |
| 11 | 10 |
"github.com/docker/docker/errdefs" |
| ... | ... |
@@ -71,90 +70,66 @@ func (i *bridgeInterface) addresses(family int) ([]netlink.Addr, error) {
|
| 71 | 71 |
return addrs, nil |
| 72 | 72 |
} |
| 73 | 73 |
|
| 74 |
-func getRequiredIPv6Addrs(config *networkConfiguration) (requiredAddrs map[netip.Addr]netip.Prefix, err error) {
|
|
| 75 |
- requiredAddrs = make(map[netip.Addr]netip.Prefix) |
|
| 76 |
- |
|
| 77 |
- if os.Getenv("DOCKER_BRIDGE_PRESERVE_KERNEL_LL") != "1" {
|
|
| 78 |
- // Always give the bridge 'fe80::1' - every interface is required to have an |
|
| 79 |
- // address in 'fe80::/64'. Linux may assign an address, but we'll replace it with |
|
| 80 |
- // 'fe80::1'. Then, if the configured prefix is 'fe80::/64', the IPAM pool |
|
| 81 |
- // assigned address will not be a second address in the LL subnet. |
|
| 82 |
- ra, ok := netiputil.ToPrefix(bridgeIPv6) |
|
| 83 |
- if !ok {
|
|
| 84 |
- err = fmt.Errorf("Failed to convert Link-Local IPv6 address to netip.Prefix")
|
|
| 85 |
- return nil, err |
|
| 86 |
- } |
|
| 87 |
- requiredAddrs[ra.Addr()] = ra |
|
| 88 |
- } |
|
| 74 |
+func (i *bridgeInterface) programIPv6Addresses(config *networkConfiguration) error {
|
|
| 75 |
+ // Remember the configured addresses. |
|
| 76 |
+ i.bridgeIPv6 = config.AddressIPv6 |
|
| 77 |
+ i.gatewayIPv6 = config.AddressIPv6.IP |
|
| 89 | 78 |
|
| 90 |
- ra, ok := netiputil.ToPrefix(config.AddressIPv6) |
|
| 79 |
+ addrPrefix, ok := netiputil.ToPrefix(config.AddressIPv6) |
|
| 91 | 80 |
if !ok {
|
| 92 |
- err = fmt.Errorf("failed to convert bridge IPv6 address '%s' to netip.Prefix", config.AddressIPv6.String())
|
|
| 93 |
- return nil, err |
|
| 81 |
+ return errdefs.System( |
|
| 82 |
+ fmt.Errorf("failed to convert bridge IPv6 address '%s' to netip.Prefix",
|
|
| 83 |
+ config.AddressIPv6.String())) |
|
| 94 | 84 |
} |
| 95 |
- requiredAddrs[ra.Addr()] = ra |
|
| 96 | 85 |
|
| 97 |
- return requiredAddrs, nil |
|
| 98 |
-} |
|
| 99 |
- |
|
| 100 |
-func (i *bridgeInterface) programIPv6Addresses(config *networkConfiguration) error {
|
|
| 101 | 86 |
// Get the IPv6 addresses currently assigned to the bridge, if any. |
| 102 | 87 |
existingAddrs, err := i.addresses(netlink.FAMILY_V6) |
| 103 | 88 |
if err != nil {
|
| 104 | 89 |
return errdefs.System(err) |
| 105 | 90 |
} |
| 106 |
- |
|
| 107 |
- // Get the list of required IPv6 addresses for this bridge. |
|
| 108 |
- var requiredAddrs map[netip.Addr]netip.Prefix |
|
| 109 |
- requiredAddrs, err = getRequiredIPv6Addrs(config) |
|
| 110 |
- if err != nil {
|
|
| 111 |
- return errdefs.System(err) |
|
| 112 |
- } |
|
| 113 |
- i.bridgeIPv6 = config.AddressIPv6 |
|
| 114 |
- i.gatewayIPv6 = config.AddressIPv6.IP |
|
| 115 |
- |
|
| 116 | 91 |
// Remove addresses that aren't required. |
| 117 | 92 |
for _, existingAddr := range existingAddrs {
|
| 118 | 93 |
ea, ok := netip.AddrFromSlice(existingAddr.IP) |
| 119 | 94 |
if !ok {
|
| 120 |
- return errdefs.System(fmt.Errorf("Failed to convert IPv6 address '%s' to netip.Addr", config.AddressIPv6))
|
|
| 95 |
+ return errdefs.System( |
|
| 96 |
+ fmt.Errorf("Failed to convert IPv6 address '%s' to netip.Addr", config.AddressIPv6))
|
|
| 121 | 97 |
} |
| 122 |
- // Optionally, avoid deleting the kernel-assigned link local address. |
|
| 123 |
- // (Don't delete fe80::1 either - if it was previously assigned to the bridge, and the |
|
| 124 |
- // kernel_ll address was deleted, the bridge won't get a new kernel_ll address.) |
|
| 125 |
- if os.Getenv("DOCKER_BRIDGE_PRESERVE_KERNEL_LL") == "1" {
|
|
| 126 |
- if p, _ := ea.Prefix(64); p == linkLocalPrefix {
|
|
| 127 |
- continue |
|
| 128 |
- } |
|
| 98 |
+ // Don't delete the kernel-assigned link local address (or fe80::1 - if it was |
|
| 99 |
+ // assigned to the bridge by an older version of the daemon that deleted the |
|
| 100 |
+ // kernel_ll address, the bridge won't get a new kernel_ll address.) But, do |
|
| 101 |
+ // delete unexpected link-local addresses (fe80::/10) that aren't in fe80::/64, |
|
| 102 |
+ // those have been IPAM-assigned. |
|
| 103 |
+ if p, _ := ea.Prefix(64); p == linkLocalPrefix {
|
|
| 104 |
+ continue |
|
| 129 | 105 |
} |
| 130 | 106 |
// Ignore the prefix length when comparing addresses, it's informational |
| 131 | 107 |
// (RFC-5942 section 4), and removing/re-adding an address that's still valid |
| 132 | 108 |
// would disrupt traffic on live-restore. |
| 133 |
- if _, required := requiredAddrs[ea]; !required {
|
|
| 109 |
+ if ea != addrPrefix.Addr() {
|
|
| 134 | 110 |
err := i.nlh.AddrDel(i.Link, &existingAddr) //#nosec G601 -- Memory aliasing is not an issue in practice as the &existingAddr pointer is not retained by the callee after the AddrDel() call returns. |
| 135 | 111 |
if err != nil {
|
| 136 |
- log.G(context.TODO()).WithFields(log.Fields{"error": err, "address": existingAddr.IPNet}).Warnf("Failed to remove residual IPv6 address from bridge")
|
|
| 112 |
+ log.G(context.TODO()).WithFields(log.Fields{
|
|
| 113 |
+ "error": err, |
|
| 114 |
+ "address": existingAddr.IPNet}, |
|
| 115 |
+ ).Warnf("Failed to remove residual IPv6 address from bridge")
|
|
| 137 | 116 |
} |
| 138 | 117 |
} |
| 139 | 118 |
} |
| 140 |
- // Add or update required addresses. |
|
| 141 |
- for _, addrPrefix := range requiredAddrs {
|
|
| 142 |
- // Using AddrReplace(), rather than AddrAdd(). When the subnet is changed for an |
|
| 143 |
- // existing bridge in a way that doesn't affect the bridge's assigned address, |
|
| 144 |
- // the old address has not been removed at this point - because that would be |
|
| 145 |
- // service-affecting for a running container. |
|
| 146 |
- // |
|
| 147 |
- // But if, for example, 'fixed-cidr-v6' is changed from '2000:dbe::/64' to |
|
| 148 |
- // '2000:dbe::/80', the default bridge will still be assigned address |
|
| 149 |
- // '2000:dbe::1'. In the output of 'ip a', the prefix length is displayed - and |
|
| 150 |
- // the user is likely to expect to see it updated from '64' to '80'. |
|
| 151 |
- // Unfortunately, 'netlink.AddrReplace()' ('RTM_NEWADDR' with 'NLM_F_REPLACE')
|
|
| 152 |
- // doesn't update the prefix length. This is a cosmetic problem, the prefix |
|
| 153 |
- // length of an assigned address is not used to determine whether an address is |
|
| 154 |
- // "on-link" (RFC-5942). |
|
| 155 |
- if err := i.nlh.AddrReplace(i.Link, &netlink.Addr{IPNet: netiputil.ToIPNet(addrPrefix)}); err != nil {
|
|
| 156 |
- return errdefs.System(fmt.Errorf("failed to add IPv6 address %s to bridge: %v", i.bridgeIPv6, err))
|
|
| 157 |
- } |
|
| 119 |
+ // Using AddrReplace(), rather than AddrAdd(). When the subnet is changed for an |
|
| 120 |
+ // existing bridge in a way that doesn't affect the bridge's assigned address, |
|
| 121 |
+ // the old address has not been removed at this point - because that would be |
|
| 122 |
+ // service-affecting for a running container. |
|
| 123 |
+ // |
|
| 124 |
+ // But if, for example, 'fixed-cidr-v6' is changed from '2000:dbe::/64' to |
|
| 125 |
+ // '2000:dbe::/80', the default bridge will still be assigned address |
|
| 126 |
+ // '2000:dbe::1'. In the output of 'ip a', the prefix length is displayed - and |
|
| 127 |
+ // the user is likely to expect to see it updated from '64' to '80'. |
|
| 128 |
+ // Unfortunately, 'netlink.AddrReplace()' ('RTM_NEWADDR' with 'NLM_F_REPLACE')
|
|
| 129 |
+ // doesn't update the prefix length. This is a cosmetic problem, the prefix |
|
| 130 |
+ // length of an assigned address is not used to determine whether an address is |
|
| 131 |
+ // "on-link" (RFC-5942). |
|
| 132 |
+ if err := i.nlh.AddrReplace(i.Link, &netlink.Addr{IPNet: netiputil.ToIPNet(addrPrefix)}); err != nil {
|
|
| 133 |
+ return errdefs.System(fmt.Errorf("failed to add IPv6 address %s to bridge: %v", i.bridgeIPv6, err))
|
|
| 158 | 134 |
} |
| 159 | 135 |
return nil |
| 160 | 136 |
} |
| ... | ... |
@@ -2,12 +2,9 @@ package bridge |
| 2 | 2 |
|
| 3 | 3 |
import ( |
| 4 | 4 |
"net" |
| 5 |
- "net/netip" |
|
| 6 |
- "strings" |
|
| 7 | 5 |
"testing" |
| 8 | 6 |
|
| 9 | 7 |
"github.com/docker/docker/internal/testutils/netnsutils" |
| 10 |
- "github.com/google/go-cmp/cmp" |
|
| 11 | 8 |
"github.com/vishvananda/netlink" |
| 12 | 9 |
"gotest.tools/v3/assert" |
| 13 | 10 |
is "gotest.tools/v3/assert/cmp" |
| ... | ... |
@@ -96,48 +93,6 @@ func TestAddressesNonEmptyInterface(t *testing.T) {
|
| 96 | 96 |
assert.Equal(t, addrs[0].IPNet.String(), expAddrV6) |
| 97 | 97 |
} |
| 98 | 98 |
|
| 99 |
-func TestGetRequiredIPv6Addrs(t *testing.T) {
|
|
| 100 |
- testcases := []struct {
|
|
| 101 |
- name string |
|
| 102 |
- addressIPv6 string |
|
| 103 |
- expReqdAddrs []string |
|
| 104 |
- }{
|
|
| 105 |
- {
|
|
| 106 |
- name: "Regular address, expect default link local", |
|
| 107 |
- addressIPv6: "2000:3000::1/80", |
|
| 108 |
- expReqdAddrs: []string{"fe80::1/64", "2000:3000::1/80"},
|
|
| 109 |
- }, |
|
| 110 |
- {
|
|
| 111 |
- name: "Standard link local address only", |
|
| 112 |
- addressIPv6: "fe80::1/64", |
|
| 113 |
- expReqdAddrs: []string{"fe80::1/64"},
|
|
| 114 |
- }, |
|
| 115 |
- {
|
|
| 116 |
- name: "Nonstandard link local address", |
|
| 117 |
- addressIPv6: "fe80:abcd::1/42", |
|
| 118 |
- expReqdAddrs: []string{"fe80:abcd::1/42", "fe80::1/64"},
|
|
| 119 |
- }, |
|
| 120 |
- } |
|
| 121 |
- |
|
| 122 |
- for _, tc := range testcases {
|
|
| 123 |
- t.Run(tc.name, func(t *testing.T) {
|
|
| 124 |
- config := &networkConfiguration{
|
|
| 125 |
- AddressIPv6: cidrToIPNet(t, tc.addressIPv6), |
|
| 126 |
- } |
|
| 127 |
- |
|
| 128 |
- expResult := map[netip.Addr]netip.Prefix{}
|
|
| 129 |
- for _, addr := range tc.expReqdAddrs {
|
|
| 130 |
- expResult[netip.MustParseAddr(strings.Split(addr, "/")[0])] = netip.MustParsePrefix(addr) |
|
| 131 |
- } |
|
| 132 |
- |
|
| 133 |
- reqd, err := getRequiredIPv6Addrs(config) |
|
| 134 |
- assert.Check(t, is.Nil(err)) |
|
| 135 |
- assert.Check(t, is.DeepEqual(reqd, expResult, |
|
| 136 |
- cmp.Comparer(func(a, b netip.Prefix) bool { return a == b })))
|
|
| 137 |
- }) |
|
| 138 |
- } |
|
| 139 |
-} |
|
| 140 |
- |
|
| 141 | 99 |
func TestProgramIPv6Addresses(t *testing.T) {
|
| 142 | 100 |
defer netnsutils.SetupTestOSContext(t)() |
| 143 | 101 |
|
| ... | ... |
@@ -159,11 +114,11 @@ func TestProgramIPv6Addresses(t *testing.T) {
|
| 159 | 159 |
i := prepTestBridge(t, nc) |
| 160 | 160 |
|
| 161 | 161 |
// The bridge has no addresses, ask for a regular IPv6 network and expect it to |
| 162 |
- // be added to the bridge, with the default link local address. |
|
| 162 |
+ // be added to the bridge. |
|
| 163 | 163 |
nc.AddressIPv6 = cidrToIPNet(t, "2000:3000::1/64") |
| 164 | 164 |
err := i.programIPv6Addresses(nc) |
| 165 | 165 |
assert.NilError(t, err) |
| 166 |
- checkAddrs(i, nc, []string{"2000:3000::1/64", "fe80::1/64"})
|
|
| 166 |
+ checkAddrs(i, nc, []string{"2000:3000::1/64"})
|
|
| 167 | 167 |
|
| 168 | 168 |
// Shrink the subnet of that regular address, the prefix length of the address |
| 169 | 169 |
// will not be modified - but it's informational-only, the address itself has |
| ... | ... |
@@ -171,7 +126,7 @@ func TestProgramIPv6Addresses(t *testing.T) {
|
| 171 | 171 |
nc.AddressIPv6 = cidrToIPNet(t, "2000:3000::1/80") |
| 172 | 172 |
err = i.programIPv6Addresses(nc) |
| 173 | 173 |
assert.NilError(t, err) |
| 174 |
- checkAddrs(i, nc, []string{"2000:3000::1/64", "fe80::1/64"})
|
|
| 174 |
+ checkAddrs(i, nc, []string{"2000:3000::1/64"})
|
|
| 175 | 175 |
|
| 176 | 176 |
// Ask for link-local only, by specifying an address with the Link Local prefix. |
| 177 | 177 |
// The regular address should be removed. |
| ... | ... |
@@ -180,9 +135,16 @@ func TestProgramIPv6Addresses(t *testing.T) {
|
| 180 | 180 |
assert.NilError(t, err) |
| 181 | 181 |
checkAddrs(i, nc, []string{"fe80::1/64"})
|
| 182 | 182 |
|
| 183 |
- // Swap the standard link local address for a nonstandard one. |
|
| 183 |
+ // Swap the standard link local address for a nonstandard one. The standard LL |
|
| 184 |
+ // address will not be removed. |
|
| 184 | 185 |
nc.AddressIPv6 = cidrToIPNet(t, "fe80:5555::1/55") |
| 185 | 186 |
err = i.programIPv6Addresses(nc) |
| 186 | 187 |
assert.NilError(t, err) |
| 187 | 188 |
checkAddrs(i, nc, []string{"fe80:5555::1/55", "fe80::1/64"})
|
| 189 |
+ |
|
| 190 |
+ // Back to the original address, expect the nonstandard LL address to be replaced. |
|
| 191 |
+ nc.AddressIPv6 = cidrToIPNet(t, "2000:3000::1/64") |
|
| 192 |
+ err = i.programIPv6Addresses(nc) |
|
| 193 |
+ assert.NilError(t, err) |
|
| 194 |
+ checkAddrs(i, nc, []string{"2000:3000::1/64", "fe80::1/64"})
|
|
| 188 | 195 |
} |
| ... | ... |
@@ -90,7 +90,7 @@ func TestLinkCreate(t *testing.T) {
|
| 90 | 90 |
|
| 91 | 91 |
ip6 := te.iface.addrv6.IP |
| 92 | 92 |
if !n.bridge.bridgeIPv6.Contains(ip6) {
|
| 93 |
- t.Fatalf("IP %s is not a valid ip in the subnet %s", ip6.String(), bridgeIPv6.String())
|
|
| 93 |
+ t.Fatalf("IP %s is not a valid ip in the subnet %s", ip6.String(), n.bridge.bridgeIPv6.String())
|
|
| 94 | 94 |
} |
| 95 | 95 |
|
| 96 | 96 |
if !te.gw.Equal(n.bridge.bridgeIPv4.IP) {
|
| ... | ... |
@@ -43,14 +43,14 @@ func TestSetupIPv6(t *testing.T) {
|
| 43 | 43 |
|
| 44 | 44 |
var found bool |
| 45 | 45 |
for _, addr := range addrsv6 {
|
| 46 |
- if bridgeIPv6.String() == addr.IPNet.String() {
|
|
| 46 |
+ if config.AddressIPv6.String() == addr.IPNet.String() {
|
|
| 47 | 47 |
found = true |
| 48 | 48 |
break |
| 49 | 49 |
} |
| 50 | 50 |
} |
| 51 | 51 |
|
| 52 | 52 |
if !found {
|
| 53 |
- t.Fatalf("Bridge device does not have requested IPv6 address %v", bridgeIPv6)
|
|
| 53 |
+ t.Fatalf("Bridge device does not have requested IPv6 address %v", config.AddressIPv6)
|
|
| 54 | 54 |
} |
| 55 | 55 |
} |
| 56 | 56 |
|