Browse code

libnetwork: send neighbor advertisements on restore

When the Docker daemon restarts with live-restore enabled, containers
retain their network namespaces but neighboring hosts may have stale
ARP/neighbor cache entries. This causes IPv6 connectivity issues because
unlike IPv4, where gratuitous ARP is sent on interface setup, IPv6
relies on Neighbor Discovery Protocol which requires explicit Neighbor
Advertisement messages to update caches.

This change adds unsolicited ARP (for IPv4) and Neighbor Advertisement
(for IPv6) messages when restoring interfaces after a daemon restart,
mirroring the behavior that already exists in AddInterface for new
containers.

The fix also handles network drivers (such as SR-IOV and macvlan) that
don't store the MAC address in the endpoint configuration by fetching
it from the actual link when needed.

Signed-off-by: Paul Saab <ps@mu.org>

Paul Saab authored on 2026/02/03 10:02:21
Showing 6 changed files
... ...
@@ -665,6 +665,17 @@ func (n *Namespace) advertiseAddrs(ctx context.Context, ifIndex int, i *Interfac
665 665
 	defer span.End()
666 666
 
667 667
 	mac := i.MacAddress()
668
+	// If MAC is not stored in the interface struct, get it from the actual link.
669
+	// This can happen with some network drivers (e.g., SR-IOV, macvlan) that don't
670
+	// store the MAC in the endpoint configuration.
671
+	if len(mac) == 0 {
672
+		link, err := nlh.LinkByIndex(ifIndex)
673
+		if err != nil {
674
+			log.G(ctx).WithFields(log.Fields{"error": err, "ifi": ifIndex}).Warn("Failed to lookup link by index to determine MAC address; treating as no MAC to advertise")
675
+		} else if hw := link.Attrs().HardwareAddr; len(hw) > 0 {
676
+			mac = hw
677
+		}
678
+	}
668 679
 	address4 := i.Address()
669 680
 	address6 := i.AddressIPv6()
670 681
 	ctx = log.WithLogger(ctx, log.G(ctx).WithFields(log.Fields{
... ...
@@ -681,7 +692,7 @@ func (n *Namespace) advertiseAddrs(ctx context.Context, ifIndex int, i *Interfac
681 681
 		log.G(ctx).Debug("No IP addresses to advertise")
682 682
 		return nil
683 683
 	}
684
-	if mac == nil {
684
+	if len(mac) == 0 {
685 685
 		// Nothing to do - for example, a layer-3 ipvlan.
686 686
 		log.G(ctx).Debug("No MAC address to advertise")
687 687
 		return nil
... ...
@@ -691,7 +702,7 @@ func (n *Namespace) advertiseAddrs(ctx context.Context, ifIndex int, i *Interfac
691 691
 		return nil
692 692
 	}
693 693
 
694
-	arpSender, naSender := n.prepAdvertiseAddrs(ctx, i, ifIndex)
694
+	arpSender, naSender := n.prepAdvertiseAddrs(ctx, i, ifIndex, mac)
695 695
 	if arpSender == nil && naSender == nil {
696 696
 		return nil
697 697
 	}
... ...
@@ -740,9 +751,13 @@ func (n *Namespace) advertiseAddrs(ctx context.Context, ifIndex int, i *Interfac
740 740
 		return errors.Join(errs...)
741 741
 	}
742 742
 
743
-	// Send an initial message. If it fails, skip the resends.
743
+	// Send an initial message. If it fails, log a warning but don't fail container
744
+	// creation - NA is an optimization, neighbors will still discover addresses via
745
+	// normal NDP solicitation. This can happen with L3 ipvlan which doesn't support
746
+	// multicast.
744 747
 	if err := send(ctx); err != nil {
745
-		return err
748
+		log.G(ctx).WithError(err).Warn("Failed to send initial neighbor advertisement")
749
+		return nil
746 750
 	}
747 751
 	if i.advertiseAddrNMsgs == 1 {
748 752
 		return nil
... ...
@@ -775,20 +790,20 @@ func (n *Namespace) advertiseAddrs(ctx context.Context, ifIndex int, i *Interfac
775 775
 	return nil
776 776
 }
777 777
 
778
-func (n *Namespace) prepAdvertiseAddrs(ctx context.Context, i *Interface, ifIndex int) (*l2disco.UnsolARP, *l2disco.UnsolNA) {
778
+func (n *Namespace) prepAdvertiseAddrs(ctx context.Context, i *Interface, ifIndex int, mac net.HardwareAddr) (*l2disco.UnsolARP, *l2disco.UnsolNA) {
779 779
 	var ua *l2disco.UnsolARP
780 780
 	var un *l2disco.UnsolNA
781 781
 	if err := n.InvokeFunc(func() {
782 782
 		if address4 := i.Address(); address4 != nil {
783 783
 			var err error
784
-			ua, err = l2disco.NewUnsolARP(ctx, address4.IP, i.MacAddress(), ifIndex)
784
+			ua, err = l2disco.NewUnsolARP(ctx, address4.IP, mac, ifIndex)
785 785
 			if err != nil {
786 786
 				log.G(ctx).WithError(err).Warn("Failed to prepare unsolicited ARP")
787 787
 			}
788 788
 		}
789 789
 		if address6 := i.AddressIPv6(); address6 != nil {
790 790
 			var err error
791
-			un, err = l2disco.NewUnsolNA(ctx, address6.IP, i.MacAddress(), ifIndex)
791
+			un, err = l2disco.NewUnsolNA(ctx, address6.IP, mac, ifIndex)
792 792
 			if err != nil {
793 793
 				log.G(ctx).WithError(err).Warn("Failed to prepare unsolicited NA")
794 794
 			}
... ...
@@ -397,10 +397,14 @@ func (n *Namespace) Destroy() error {
397 397
 	return nil
398 398
 }
399 399
 
400
-// RestoreInterfaces restores the network namespace's interfaces.
401
-func (n *Namespace) RestoreInterfaces(interfaces map[Iface][]IfaceOption) error {
400
+// RestoreInterfaces restores the network namespace's interfaces and sends
401
+// unsolicited ARP/NA messages to update neighbor caches.
402
+func (n *Namespace) RestoreInterfaces(ctx context.Context, interfaces map[Iface][]IfaceOption) error {
402 403
 	// restore interfaces
403 404
 	for iface, opts := range interfaces {
405
+		if err := ctx.Err(); err != nil {
406
+			return err
407
+		}
404 408
 		i, err := newInterface(n, iface.SrcName, iface.DstPrefix, iface.DstName, opts...)
405 409
 		if err != nil {
406 410
 			return err
... ...
@@ -459,6 +463,29 @@ func (n *Namespace) RestoreInterfaces(interfaces map[Iface][]IfaceOption) error
459 459
 			n.iFaces = append(n.iFaces, i)
460 460
 			n.mu.Unlock()
461 461
 		}
462
+
463
+		// Send unsolicited ARP/NA messages to update neighbor caches with the
464
+		// MAC address associated with the interface's IP addresses. This is
465
+		// necessary after a daemon restart because other hosts may have stale
466
+		// neighbor cache entries.
467
+		if i.dstName != "" {
468
+			log.G(ctx).WithFields(log.Fields{
469
+				"interface": i.dstName,
470
+				"ipv4":      i.address,
471
+				"ipv6":      i.addressIPv6,
472
+			}).Debug("Sending neighbor advertisements during restore")
473
+			link, err := n.nlHandle.LinkByName(i.dstName)
474
+			if err != nil {
475
+				log.G(ctx).WithFields(log.Fields{"error": err, "interface": i.dstName}).Warn("Failed to get link for neighbor advertisement during restore")
476
+				continue
477
+			}
478
+			ifIndex := link.Attrs().Index
479
+			waitForBridgePort(ctx, ns.NlHandle(), link)
480
+			mcastRouteOk := waitForMcastRoute(ctx, ifIndex, i, n.nlHandle)
481
+			if err := n.advertiseAddrs(ctx, ifIndex, i, n.nlHandle, mcastRouteOk); err != nil {
482
+				log.G(ctx).WithError(err).WithField("interface", i.dstName).Warn("Failed to send neighbor advertisement during restore")
483
+			}
484
+		}
462 485
 	}
463 486
 	return nil
464 487
 }
... ...
@@ -259,7 +259,7 @@ func (sb *Sandbox) releaseOSSbox() error {
259 259
 	return osSbox.Destroy()
260 260
 }
261 261
 
262
-func (sb *Sandbox) restoreOslSandbox() error {
262
+func (sb *Sandbox) restoreOslSandbox(ctx context.Context) error {
263 263
 	var routes []*types.StaticRoute
264 264
 
265 265
 	// restore osl sandbox
... ...
@@ -271,7 +271,7 @@ func (sb *Sandbox) restoreOslSandbox() error {
271 271
 		ep.mu.Unlock()
272 272
 
273 273
 		if i == nil {
274
-			log.G(context.TODO()).Errorf("error restoring endpoint %s for container %s", ep.Name(), sb.ContainerID())
274
+			log.G(ctx).Errorf("error restoring endpoint %s for container %s", ep.Name(), sb.ContainerID())
275 275
 			continue
276 276
 		}
277 277
 
... ...
@@ -298,7 +298,9 @@ func (sb *Sandbox) restoreOslSandbox() error {
298 298
 		}
299 299
 	}
300 300
 
301
-	if err := sb.osSbox.RestoreInterfaces(interfaces); err != nil {
301
+	// Use WithoutCancel so that restore completes even if the parent context is
302
+	// cancelled - we don't want to leave containers with partially restored networking.
303
+	if err := sb.osSbox.RestoreInterfaces(context.WithoutCancel(ctx), interfaces); err != nil {
302 304
 		return err
303 305
 	}
304 306
 	if len(routes) > 0 {
... ...
@@ -256,7 +256,7 @@ func (c *Controller) sandboxRestore(activeSandboxes map[string]any) error {
256 256
 
257 257
 		// reconstruct osl sandbox field
258 258
 		if !sb.config.useDefaultSandBox {
259
-			if err := sb.restoreOslSandbox(); err != nil {
259
+			if err := sb.restoreOslSandbox(ctx); err != nil {
260 260
 				log.G(ctx).WithError(err).Error("Failed to populate fields for osl sandbox")
261 261
 				continue
262 262
 			}
... ...
@@ -24,7 +24,7 @@ func (sb *Sandbox) releaseOSSbox() error {
24 24
 	return nil
25 25
 }
26 26
 
27
-func (sb *Sandbox) restoreOslSandbox() error {
27
+func (sb *Sandbox) restoreOslSandbox(_ context.Context) error {
28 28
 	// not implemented on Windows (Sandbox.osSbox is always nil)
29 29
 	return nil
30 30
 }
... ...
@@ -1897,6 +1897,128 @@ func TestAdvertiseAddresses(t *testing.T) {
1897 1897
 	}
1898 1898
 }
1899 1899
 
1900
+// TestAdvertiseAddressesLiveRestore verifies that unsolicited ARP/NA messages are
1901
+// sent when the daemon restarts with live-restore enabled. This ensures that
1902
+// neighbor caches on other hosts are updated with the container's MAC address
1903
+// after a daemon restart.
1904
+func TestAdvertiseAddressesLiveRestore(t *testing.T) {
1905
+	skip.If(t, testEnv.IsRootless, "can't listen for ARP/NA messages in rootlesskit's namespace")
1906
+
1907
+	ctx := setupTest(t)
1908
+	d := daemon.New(t)
1909
+	d.StartWithBusybox(ctx, t, "--live-restore")
1910
+	defer d.Stop(t)
1911
+	c := d.NewClientT(t)
1912
+	defer c.Close()
1913
+
1914
+	const netName = "dsnet-lr"
1915
+	const brName = "br-advaddrlr"
1916
+	network.CreateNoError(ctx, t, c, netName,
1917
+		network.WithOption(bridge.BridgeName, brName),
1918
+		network.WithIPv6(),
1919
+		network.WithIPAM("172.23.23.0/24", "172.23.23.1"),
1920
+		network.WithIPAM("fd4c:f70b:973d::/64", "fd4c:f70b:973d::1"),
1921
+	)
1922
+	defer network.RemoveNoError(ctx, t, c, netName)
1923
+
1924
+	// Create ctr1 which will be used to verify neighbor cache updates.
1925
+	ctr1Id := container.Run(ctx, t, c, container.WithName("ctr1-lr"), container.WithNetworkMode(netName))
1926
+	defer c.ContainerRemove(ctx, ctr1Id, client.ContainerRemoveOptions{Force: true})
1927
+
1928
+	// Create ctr2 with fixed IP addresses.
1929
+	const ctr2Name = "ctr2-lr"
1930
+	const ctr2Addr4 = "172.23.23.22"
1931
+	const ctr2Addr6 = "fd4c:f70b:973d::2222"
1932
+	ctr2Id := container.Run(ctx, t, c,
1933
+		container.WithName(ctr2Name),
1934
+		container.WithNetworkMode(netName),
1935
+		container.WithIPv4(netName, ctr2Addr4),
1936
+		container.WithIPv6(netName, ctr2Addr6),
1937
+	)
1938
+	defer c.ContainerRemove(ctx, ctr2Id, client.ContainerRemoveOptions{Force: true})
1939
+
1940
+	ctr2MAC := container.Inspect(ctx, t, c, ctr2Id).NetworkSettings.Networks[netName].MacAddress
1941
+
1942
+	// Ping from ctr1 to ctr2 to populate ctr1's neighbor caches.
1943
+	pingRes := container.ExecT(ctx, t, c, ctr1Id, []string{"ping", "-4", "-c1", ctr2Name})
1944
+	assert.Assert(t, is.Equal(pingRes.ExitCode, 0))
1945
+	pingRes = container.ExecT(ctx, t, c, ctr1Id, []string{"ping", "-6", "-c1", ctr2Name})
1946
+	assert.Assert(t, is.Equal(pingRes.ExitCode, 0))
1947
+
1948
+	// Verify ctr1 has neighbor entries for ctr2.
1949
+	ctr1Neighs := container.ExecT(ctx, t, c, ctr1Id, []string{"ip", "neigh", "show"})
1950
+	assert.Assert(t, is.Equal(ctr1Neighs.ExitCode, 0))
1951
+	t.Logf("ctr1 neighbours before restart:\n%s", ctr1Neighs.Combined())
1952
+
1953
+	// Wait for initial ARP/NA retransmits from container creation to settle.
1954
+	// The daemon sends unsolicited ARP/NA messages for a couple of seconds after
1955
+	// AddInterface, so we need to wait before starting to listen to avoid counting
1956
+	// those messages instead of the ones sent during restore.
1957
+	t.Log("Waiting for initial ARP/NA retransmits to settle...")
1958
+	time.Sleep(5 * time.Second)
1959
+
1960
+	// Now start listening for ARP/NA messages.
1961
+	stopARPListen := network.CollectBcastARPs(t, brName)
1962
+	defer stopARPListen()
1963
+	stopICMP6Listen := network.CollectICMP6(t, brName)
1964
+	defer stopICMP6Listen()
1965
+
1966
+	// Restart the daemon - this should trigger RestoreInterfaces which sends ARP/NA.
1967
+	d.Restart(t, "--live-restore")
1968
+
1969
+	// Give time for ARP/NA messages to be sent after restart.
1970
+	t.Log("Sleeping for 5s to collect ARP/NA messages after daemon restart...")
1971
+	time.Sleep(5 * time.Second)
1972
+
1973
+	// Verify that ARP/NA messages were sent for ctr2's addresses.
1974
+	arps := stopARPListen()
1975
+	var arpCount int
1976
+	for i, p := range arps {
1977
+		ha, pa, err := network.UnpackUnsolARP(p)
1978
+		if err != nil {
1979
+			t.Logf("ARP %d: %s: %s: %s", i+1, p.ReceivedAt.Format("15:04:05.000"), hex.EncodeToString(p.Data), err)
1980
+			continue
1981
+		}
1982
+		t.Logf("ARP %d: %s '%s' is at '%s'", i+1, p.ReceivedAt.Format("15:04:05.000"), pa, ha)
1983
+		if pa == netip.MustParseAddr(ctr2Addr4) && slices.Compare(ha, net.HardwareAddr(ctr2MAC)) == 0 {
1984
+			arpCount++
1985
+			t.Logf("---> found ARP for ctr2")
1986
+		}
1987
+	}
1988
+	assert.Check(t, arpCount >= 1, "expected at least 1 ARP message for ctr2 after live-restore, got %d", arpCount)
1989
+
1990
+	icmps := stopICMP6Listen()
1991
+	var naCount int
1992
+	for i, p := range icmps {
1993
+		ha, pa, err := network.UnpackUnsolNA(p)
1994
+		if err != nil {
1995
+			t.Logf("ICMP6 %d: %s: %s: %s", i+1, p.ReceivedAt.Format("15:04:05.000"), hex.EncodeToString(p.Data), err)
1996
+			continue
1997
+		}
1998
+		t.Logf("ICMP6 %d: %s '%s' is at '%s'", i+1, p.ReceivedAt.Format("15:04:05.000"), pa, ha)
1999
+		if pa == netip.MustParseAddr(ctr2Addr6) && slices.Compare(ha, net.HardwareAddr(ctr2MAC)) == 0 {
2000
+			naCount++
2001
+			t.Logf("---> found NA for ctr2")
2002
+		}
2003
+	}
2004
+	assert.Check(t, naCount >= 1, "expected at least 1 NA message for ctr2 after live-restore, got %d", naCount)
2005
+
2006
+	// Verify ctr1 still has valid neighbor entries (connectivity should work).
2007
+	ctr1Neighs = container.ExecT(ctx, t, c, ctr1Id, []string{"ip", "neigh", "show"})
2008
+	assert.Assert(t, is.Equal(ctr1Neighs.ExitCode, 0))
2009
+	t.Logf("ctr1 neighbours after restart:\n%s", ctr1Neighs.Combined())
2010
+
2011
+	// Verify connectivity still works after restart.
2012
+	pingRes = container.ExecT(ctx, t, c, ctr1Id, []string{"ping", "-4", "-c1", ctr2Name})
2013
+	assert.Assert(t, is.Equal(pingRes.ExitCode, 0))
2014
+	pingRes = container.ExecT(ctx, t, c, ctr1Id, []string{"ping", "-6", "-c1", ctr2Name})
2015
+	assert.Assert(t, is.Equal(pingRes.ExitCode, 0))
2016
+
2017
+	if t.Failed() {
2018
+		d.TailLogsT(t, 100)
2019
+	}
2020
+}
2021
+
1900 2022
 // TestNetworkInspectGateway checks that gateways reported in inspect output are parseable as addresses.
1901 2023
 func TestNetworkInspectGateway(t *testing.T) {
1902 2024
 	ctx := setupTest(t)