When the Docker daemon restarts with live-restore enabled, containers
retain their network namespaces but neighboring hosts may have stale
ARP/neighbor cache entries. This causes IPv6 connectivity issues because
unlike IPv4, where gratuitous ARP is sent on interface setup, IPv6
relies on Neighbor Discovery Protocol which requires explicit Neighbor
Advertisement messages to update caches.
This change adds unsolicited ARP (for IPv4) and Neighbor Advertisement
(for IPv6) messages when restoring interfaces after a daemon restart,
mirroring the behavior that already exists in AddInterface for new
containers.
The fix also handles network drivers (such as SR-IOV and macvlan) that
don't store the MAC address in the endpoint configuration by fetching
it from the actual link when needed.
Signed-off-by: Paul Saab <ps@mu.org>
| ... | ... |
@@ -665,6 +665,17 @@ func (n *Namespace) advertiseAddrs(ctx context.Context, ifIndex int, i *Interfac |
| 665 | 665 |
defer span.End() |
| 666 | 666 |
|
| 667 | 667 |
mac := i.MacAddress() |
| 668 |
+ // If MAC is not stored in the interface struct, get it from the actual link. |
|
| 669 |
+ // This can happen with some network drivers (e.g., SR-IOV, macvlan) that don't |
|
| 670 |
+ // store the MAC in the endpoint configuration. |
|
| 671 |
+ if len(mac) == 0 {
|
|
| 672 |
+ link, err := nlh.LinkByIndex(ifIndex) |
|
| 673 |
+ if err != nil {
|
|
| 674 |
+ log.G(ctx).WithFields(log.Fields{"error": err, "ifi": ifIndex}).Warn("Failed to lookup link by index to determine MAC address; treating as no MAC to advertise")
|
|
| 675 |
+ } else if hw := link.Attrs().HardwareAddr; len(hw) > 0 {
|
|
| 676 |
+ mac = hw |
|
| 677 |
+ } |
|
| 678 |
+ } |
|
| 668 | 679 |
address4 := i.Address() |
| 669 | 680 |
address6 := i.AddressIPv6() |
| 670 | 681 |
ctx = log.WithLogger(ctx, log.G(ctx).WithFields(log.Fields{
|
| ... | ... |
@@ -681,7 +692,7 @@ func (n *Namespace) advertiseAddrs(ctx context.Context, ifIndex int, i *Interfac |
| 681 | 681 |
log.G(ctx).Debug("No IP addresses to advertise")
|
| 682 | 682 |
return nil |
| 683 | 683 |
} |
| 684 |
- if mac == nil {
|
|
| 684 |
+ if len(mac) == 0 {
|
|
| 685 | 685 |
// Nothing to do - for example, a layer-3 ipvlan. |
| 686 | 686 |
log.G(ctx).Debug("No MAC address to advertise")
|
| 687 | 687 |
return nil |
| ... | ... |
@@ -691,7 +702,7 @@ func (n *Namespace) advertiseAddrs(ctx context.Context, ifIndex int, i *Interfac |
| 691 | 691 |
return nil |
| 692 | 692 |
} |
| 693 | 693 |
|
| 694 |
- arpSender, naSender := n.prepAdvertiseAddrs(ctx, i, ifIndex) |
|
| 694 |
+ arpSender, naSender := n.prepAdvertiseAddrs(ctx, i, ifIndex, mac) |
|
| 695 | 695 |
if arpSender == nil && naSender == nil {
|
| 696 | 696 |
return nil |
| 697 | 697 |
} |
| ... | ... |
@@ -740,9 +751,13 @@ func (n *Namespace) advertiseAddrs(ctx context.Context, ifIndex int, i *Interfac |
| 740 | 740 |
return errors.Join(errs...) |
| 741 | 741 |
} |
| 742 | 742 |
|
| 743 |
- // Send an initial message. If it fails, skip the resends. |
|
| 743 |
+ // Send an initial message. If it fails, log a warning but don't fail container |
|
| 744 |
+ // creation - NA is an optimization, neighbors will still discover addresses via |
|
| 745 |
+ // normal NDP solicitation. This can happen with L3 ipvlan which doesn't support |
|
| 746 |
+ // multicast. |
|
| 744 | 747 |
if err := send(ctx); err != nil {
|
| 745 |
- return err |
|
| 748 |
+ log.G(ctx).WithError(err).Warn("Failed to send initial neighbor advertisement")
|
|
| 749 |
+ return nil |
|
| 746 | 750 |
} |
| 747 | 751 |
if i.advertiseAddrNMsgs == 1 {
|
| 748 | 752 |
return nil |
| ... | ... |
@@ -775,20 +790,20 @@ func (n *Namespace) advertiseAddrs(ctx context.Context, ifIndex int, i *Interfac |
| 775 | 775 |
return nil |
| 776 | 776 |
} |
| 777 | 777 |
|
| 778 |
-func (n *Namespace) prepAdvertiseAddrs(ctx context.Context, i *Interface, ifIndex int) (*l2disco.UnsolARP, *l2disco.UnsolNA) {
|
|
| 778 |
+func (n *Namespace) prepAdvertiseAddrs(ctx context.Context, i *Interface, ifIndex int, mac net.HardwareAddr) (*l2disco.UnsolARP, *l2disco.UnsolNA) {
|
|
| 779 | 779 |
var ua *l2disco.UnsolARP |
| 780 | 780 |
var un *l2disco.UnsolNA |
| 781 | 781 |
if err := n.InvokeFunc(func() {
|
| 782 | 782 |
if address4 := i.Address(); address4 != nil {
|
| 783 | 783 |
var err error |
| 784 |
- ua, err = l2disco.NewUnsolARP(ctx, address4.IP, i.MacAddress(), ifIndex) |
|
| 784 |
+ ua, err = l2disco.NewUnsolARP(ctx, address4.IP, mac, ifIndex) |
|
| 785 | 785 |
if err != nil {
|
| 786 | 786 |
log.G(ctx).WithError(err).Warn("Failed to prepare unsolicited ARP")
|
| 787 | 787 |
} |
| 788 | 788 |
} |
| 789 | 789 |
if address6 := i.AddressIPv6(); address6 != nil {
|
| 790 | 790 |
var err error |
| 791 |
- un, err = l2disco.NewUnsolNA(ctx, address6.IP, i.MacAddress(), ifIndex) |
|
| 791 |
+ un, err = l2disco.NewUnsolNA(ctx, address6.IP, mac, ifIndex) |
|
| 792 | 792 |
if err != nil {
|
| 793 | 793 |
log.G(ctx).WithError(err).Warn("Failed to prepare unsolicited NA")
|
| 794 | 794 |
} |
| ... | ... |
@@ -397,10 +397,14 @@ func (n *Namespace) Destroy() error {
|
| 397 | 397 |
return nil |
| 398 | 398 |
} |
| 399 | 399 |
|
| 400 |
-// RestoreInterfaces restores the network namespace's interfaces. |
|
| 401 |
-func (n *Namespace) RestoreInterfaces(interfaces map[Iface][]IfaceOption) error {
|
|
| 400 |
+// RestoreInterfaces restores the network namespace's interfaces and sends |
|
| 401 |
+// unsolicited ARP/NA messages to update neighbor caches. |
|
| 402 |
+func (n *Namespace) RestoreInterfaces(ctx context.Context, interfaces map[Iface][]IfaceOption) error {
|
|
| 402 | 403 |
// restore interfaces |
| 403 | 404 |
for iface, opts := range interfaces {
|
| 405 |
+ if err := ctx.Err(); err != nil {
|
|
| 406 |
+ return err |
|
| 407 |
+ } |
|
| 404 | 408 |
i, err := newInterface(n, iface.SrcName, iface.DstPrefix, iface.DstName, opts...) |
| 405 | 409 |
if err != nil {
|
| 406 | 410 |
return err |
| ... | ... |
@@ -459,6 +463,29 @@ func (n *Namespace) RestoreInterfaces(interfaces map[Iface][]IfaceOption) error |
| 459 | 459 |
n.iFaces = append(n.iFaces, i) |
| 460 | 460 |
n.mu.Unlock() |
| 461 | 461 |
} |
| 462 |
+ |
|
| 463 |
+ // Send unsolicited ARP/NA messages to update neighbor caches with the |
|
| 464 |
+ // MAC address associated with the interface's IP addresses. This is |
|
| 465 |
+ // necessary after a daemon restart because other hosts may have stale |
|
| 466 |
+ // neighbor cache entries. |
|
| 467 |
+ if i.dstName != "" {
|
|
| 468 |
+ log.G(ctx).WithFields(log.Fields{
|
|
| 469 |
+ "interface": i.dstName, |
|
| 470 |
+ "ipv4": i.address, |
|
| 471 |
+ "ipv6": i.addressIPv6, |
|
| 472 |
+ }).Debug("Sending neighbor advertisements during restore")
|
|
| 473 |
+ link, err := n.nlHandle.LinkByName(i.dstName) |
|
| 474 |
+ if err != nil {
|
|
| 475 |
+ log.G(ctx).WithFields(log.Fields{"error": err, "interface": i.dstName}).Warn("Failed to get link for neighbor advertisement during restore")
|
|
| 476 |
+ continue |
|
| 477 |
+ } |
|
| 478 |
+ ifIndex := link.Attrs().Index |
|
| 479 |
+ waitForBridgePort(ctx, ns.NlHandle(), link) |
|
| 480 |
+ mcastRouteOk := waitForMcastRoute(ctx, ifIndex, i, n.nlHandle) |
|
| 481 |
+ if err := n.advertiseAddrs(ctx, ifIndex, i, n.nlHandle, mcastRouteOk); err != nil {
|
|
| 482 |
+ log.G(ctx).WithError(err).WithField("interface", i.dstName).Warn("Failed to send neighbor advertisement during restore")
|
|
| 483 |
+ } |
|
| 484 |
+ } |
|
| 462 | 485 |
} |
| 463 | 486 |
return nil |
| 464 | 487 |
} |
| ... | ... |
@@ -259,7 +259,7 @@ func (sb *Sandbox) releaseOSSbox() error {
|
| 259 | 259 |
return osSbox.Destroy() |
| 260 | 260 |
} |
| 261 | 261 |
|
| 262 |
-func (sb *Sandbox) restoreOslSandbox() error {
|
|
| 262 |
+func (sb *Sandbox) restoreOslSandbox(ctx context.Context) error {
|
|
| 263 | 263 |
var routes []*types.StaticRoute |
| 264 | 264 |
|
| 265 | 265 |
// restore osl sandbox |
| ... | ... |
@@ -271,7 +271,7 @@ func (sb *Sandbox) restoreOslSandbox() error {
|
| 271 | 271 |
ep.mu.Unlock() |
| 272 | 272 |
|
| 273 | 273 |
if i == nil {
|
| 274 |
- log.G(context.TODO()).Errorf("error restoring endpoint %s for container %s", ep.Name(), sb.ContainerID())
|
|
| 274 |
+ log.G(ctx).Errorf("error restoring endpoint %s for container %s", ep.Name(), sb.ContainerID())
|
|
| 275 | 275 |
continue |
| 276 | 276 |
} |
| 277 | 277 |
|
| ... | ... |
@@ -298,7 +298,9 @@ func (sb *Sandbox) restoreOslSandbox() error {
|
| 298 | 298 |
} |
| 299 | 299 |
} |
| 300 | 300 |
|
| 301 |
- if err := sb.osSbox.RestoreInterfaces(interfaces); err != nil {
|
|
| 301 |
+ // Use WithoutCancel so that restore completes even if the parent context is |
|
| 302 |
+ // cancelled - we don't want to leave containers with partially restored networking. |
|
| 303 |
+ if err := sb.osSbox.RestoreInterfaces(context.WithoutCancel(ctx), interfaces); err != nil {
|
|
| 302 | 304 |
return err |
| 303 | 305 |
} |
| 304 | 306 |
if len(routes) > 0 {
|
| ... | ... |
@@ -256,7 +256,7 @@ func (c *Controller) sandboxRestore(activeSandboxes map[string]any) error {
|
| 256 | 256 |
|
| 257 | 257 |
// reconstruct osl sandbox field |
| 258 | 258 |
if !sb.config.useDefaultSandBox {
|
| 259 |
- if err := sb.restoreOslSandbox(); err != nil {
|
|
| 259 |
+ if err := sb.restoreOslSandbox(ctx); err != nil {
|
|
| 260 | 260 |
log.G(ctx).WithError(err).Error("Failed to populate fields for osl sandbox")
|
| 261 | 261 |
continue |
| 262 | 262 |
} |
| ... | ... |
@@ -24,7 +24,7 @@ func (sb *Sandbox) releaseOSSbox() error {
|
| 24 | 24 |
return nil |
| 25 | 25 |
} |
| 26 | 26 |
|
| 27 |
-func (sb *Sandbox) restoreOslSandbox() error {
|
|
| 27 |
+func (sb *Sandbox) restoreOslSandbox(_ context.Context) error {
|
|
| 28 | 28 |
// not implemented on Windows (Sandbox.osSbox is always nil) |
| 29 | 29 |
return nil |
| 30 | 30 |
} |
| ... | ... |
@@ -1897,6 +1897,128 @@ func TestAdvertiseAddresses(t *testing.T) {
|
| 1897 | 1897 |
} |
| 1898 | 1898 |
} |
| 1899 | 1899 |
|
| 1900 |
+// TestAdvertiseAddressesLiveRestore verifies that unsolicited ARP/NA messages are |
|
| 1901 |
+// sent when the daemon restarts with live-restore enabled. This ensures that |
|
| 1902 |
+// neighbor caches on other hosts are updated with the container's MAC address |
|
| 1903 |
+// after a daemon restart. |
|
| 1904 |
+func TestAdvertiseAddressesLiveRestore(t *testing.T) {
|
|
| 1905 |
+ skip.If(t, testEnv.IsRootless, "can't listen for ARP/NA messages in rootlesskit's namespace") |
|
| 1906 |
+ |
|
| 1907 |
+ ctx := setupTest(t) |
|
| 1908 |
+ d := daemon.New(t) |
|
| 1909 |
+ d.StartWithBusybox(ctx, t, "--live-restore") |
|
| 1910 |
+ defer d.Stop(t) |
|
| 1911 |
+ c := d.NewClientT(t) |
|
| 1912 |
+ defer c.Close() |
|
| 1913 |
+ |
|
| 1914 |
+ const netName = "dsnet-lr" |
|
| 1915 |
+ const brName = "br-advaddrlr" |
|
| 1916 |
+ network.CreateNoError(ctx, t, c, netName, |
|
| 1917 |
+ network.WithOption(bridge.BridgeName, brName), |
|
| 1918 |
+ network.WithIPv6(), |
|
| 1919 |
+ network.WithIPAM("172.23.23.0/24", "172.23.23.1"),
|
|
| 1920 |
+ network.WithIPAM("fd4c:f70b:973d::/64", "fd4c:f70b:973d::1"),
|
|
| 1921 |
+ ) |
|
| 1922 |
+ defer network.RemoveNoError(ctx, t, c, netName) |
|
| 1923 |
+ |
|
| 1924 |
+ // Create ctr1 which will be used to verify neighbor cache updates. |
|
| 1925 |
+ ctr1Id := container.Run(ctx, t, c, container.WithName("ctr1-lr"), container.WithNetworkMode(netName))
|
|
| 1926 |
+ defer c.ContainerRemove(ctx, ctr1Id, client.ContainerRemoveOptions{Force: true})
|
|
| 1927 |
+ |
|
| 1928 |
+ // Create ctr2 with fixed IP addresses. |
|
| 1929 |
+ const ctr2Name = "ctr2-lr" |
|
| 1930 |
+ const ctr2Addr4 = "172.23.23.22" |
|
| 1931 |
+ const ctr2Addr6 = "fd4c:f70b:973d::2222" |
|
| 1932 |
+ ctr2Id := container.Run(ctx, t, c, |
|
| 1933 |
+ container.WithName(ctr2Name), |
|
| 1934 |
+ container.WithNetworkMode(netName), |
|
| 1935 |
+ container.WithIPv4(netName, ctr2Addr4), |
|
| 1936 |
+ container.WithIPv6(netName, ctr2Addr6), |
|
| 1937 |
+ ) |
|
| 1938 |
+ defer c.ContainerRemove(ctx, ctr2Id, client.ContainerRemoveOptions{Force: true})
|
|
| 1939 |
+ |
|
| 1940 |
+ ctr2MAC := container.Inspect(ctx, t, c, ctr2Id).NetworkSettings.Networks[netName].MacAddress |
|
| 1941 |
+ |
|
| 1942 |
+ // Ping from ctr1 to ctr2 to populate ctr1's neighbor caches. |
|
| 1943 |
+ pingRes := container.ExecT(ctx, t, c, ctr1Id, []string{"ping", "-4", "-c1", ctr2Name})
|
|
| 1944 |
+ assert.Assert(t, is.Equal(pingRes.ExitCode, 0)) |
|
| 1945 |
+ pingRes = container.ExecT(ctx, t, c, ctr1Id, []string{"ping", "-6", "-c1", ctr2Name})
|
|
| 1946 |
+ assert.Assert(t, is.Equal(pingRes.ExitCode, 0)) |
|
| 1947 |
+ |
|
| 1948 |
+ // Verify ctr1 has neighbor entries for ctr2. |
|
| 1949 |
+ ctr1Neighs := container.ExecT(ctx, t, c, ctr1Id, []string{"ip", "neigh", "show"})
|
|
| 1950 |
+ assert.Assert(t, is.Equal(ctr1Neighs.ExitCode, 0)) |
|
| 1951 |
+ t.Logf("ctr1 neighbours before restart:\n%s", ctr1Neighs.Combined())
|
|
| 1952 |
+ |
|
| 1953 |
+ // Wait for initial ARP/NA retransmits from container creation to settle. |
|
| 1954 |
+ // The daemon sends unsolicited ARP/NA messages for a couple of seconds after |
|
| 1955 |
+ // AddInterface, so we need to wait before starting to listen to avoid counting |
|
| 1956 |
+ // those messages instead of the ones sent during restore. |
|
| 1957 |
+ t.Log("Waiting for initial ARP/NA retransmits to settle...")
|
|
| 1958 |
+ time.Sleep(5 * time.Second) |
|
| 1959 |
+ |
|
| 1960 |
+ // Now start listening for ARP/NA messages. |
|
| 1961 |
+ stopARPListen := network.CollectBcastARPs(t, brName) |
|
| 1962 |
+ defer stopARPListen() |
|
| 1963 |
+ stopICMP6Listen := network.CollectICMP6(t, brName) |
|
| 1964 |
+ defer stopICMP6Listen() |
|
| 1965 |
+ |
|
| 1966 |
+ // Restart the daemon - this should trigger RestoreInterfaces which sends ARP/NA. |
|
| 1967 |
+ d.Restart(t, "--live-restore") |
|
| 1968 |
+ |
|
| 1969 |
+ // Give time for ARP/NA messages to be sent after restart. |
|
| 1970 |
+ t.Log("Sleeping for 5s to collect ARP/NA messages after daemon restart...")
|
|
| 1971 |
+ time.Sleep(5 * time.Second) |
|
| 1972 |
+ |
|
| 1973 |
+ // Verify that ARP/NA messages were sent for ctr2's addresses. |
|
| 1974 |
+ arps := stopARPListen() |
|
| 1975 |
+ var arpCount int |
|
| 1976 |
+ for i, p := range arps {
|
|
| 1977 |
+ ha, pa, err := network.UnpackUnsolARP(p) |
|
| 1978 |
+ if err != nil {
|
|
| 1979 |
+ t.Logf("ARP %d: %s: %s: %s", i+1, p.ReceivedAt.Format("15:04:05.000"), hex.EncodeToString(p.Data), err)
|
|
| 1980 |
+ continue |
|
| 1981 |
+ } |
|
| 1982 |
+ t.Logf("ARP %d: %s '%s' is at '%s'", i+1, p.ReceivedAt.Format("15:04:05.000"), pa, ha)
|
|
| 1983 |
+ if pa == netip.MustParseAddr(ctr2Addr4) && slices.Compare(ha, net.HardwareAddr(ctr2MAC)) == 0 {
|
|
| 1984 |
+ arpCount++ |
|
| 1985 |
+ t.Logf("---> found ARP for ctr2")
|
|
| 1986 |
+ } |
|
| 1987 |
+ } |
|
| 1988 |
+ assert.Check(t, arpCount >= 1, "expected at least 1 ARP message for ctr2 after live-restore, got %d", arpCount) |
|
| 1989 |
+ |
|
| 1990 |
+ icmps := stopICMP6Listen() |
|
| 1991 |
+ var naCount int |
|
| 1992 |
+ for i, p := range icmps {
|
|
| 1993 |
+ ha, pa, err := network.UnpackUnsolNA(p) |
|
| 1994 |
+ if err != nil {
|
|
| 1995 |
+ t.Logf("ICMP6 %d: %s: %s: %s", i+1, p.ReceivedAt.Format("15:04:05.000"), hex.EncodeToString(p.Data), err)
|
|
| 1996 |
+ continue |
|
| 1997 |
+ } |
|
| 1998 |
+ t.Logf("ICMP6 %d: %s '%s' is at '%s'", i+1, p.ReceivedAt.Format("15:04:05.000"), pa, ha)
|
|
| 1999 |
+ if pa == netip.MustParseAddr(ctr2Addr6) && slices.Compare(ha, net.HardwareAddr(ctr2MAC)) == 0 {
|
|
| 2000 |
+ naCount++ |
|
| 2001 |
+ t.Logf("---> found NA for ctr2")
|
|
| 2002 |
+ } |
|
| 2003 |
+ } |
|
| 2004 |
+ assert.Check(t, naCount >= 1, "expected at least 1 NA message for ctr2 after live-restore, got %d", naCount) |
|
| 2005 |
+ |
|
| 2006 |
+ // Verify ctr1 still has valid neighbor entries (connectivity should work). |
|
| 2007 |
+ ctr1Neighs = container.ExecT(ctx, t, c, ctr1Id, []string{"ip", "neigh", "show"})
|
|
| 2008 |
+ assert.Assert(t, is.Equal(ctr1Neighs.ExitCode, 0)) |
|
| 2009 |
+ t.Logf("ctr1 neighbours after restart:\n%s", ctr1Neighs.Combined())
|
|
| 2010 |
+ |
|
| 2011 |
+ // Verify connectivity still works after restart. |
|
| 2012 |
+ pingRes = container.ExecT(ctx, t, c, ctr1Id, []string{"ping", "-4", "-c1", ctr2Name})
|
|
| 2013 |
+ assert.Assert(t, is.Equal(pingRes.ExitCode, 0)) |
|
| 2014 |
+ pingRes = container.ExecT(ctx, t, c, ctr1Id, []string{"ping", "-6", "-c1", ctr2Name})
|
|
| 2015 |
+ assert.Assert(t, is.Equal(pingRes.ExitCode, 0)) |
|
| 2016 |
+ |
|
| 2017 |
+ if t.Failed() {
|
|
| 2018 |
+ d.TailLogsT(t, 100) |
|
| 2019 |
+ } |
|
| 2020 |
+} |
|
| 2021 |
+ |
|
| 1900 | 2022 |
// TestNetworkInspectGateway checks that gateways reported in inspect output are parseable as addresses. |
| 1901 | 2023 |
func TestNetworkInspectGateway(t *testing.T) {
|
| 1902 | 2024 |
ctx := setupTest(t) |