Browse code

libnet/d/bridge: mv portmapper to libnet/pms/{nat,routed}

Signed-off-by: Albin Kerouanton <albinker@gmail.com>

Albin Kerouanton authored on 2025/07/02 21:59:26
Showing 14 changed files
... ...
@@ -1458,9 +1458,10 @@ func (daemon *Daemon) networkOptions(conf *config.Config, pg plugingetter.Plugin
1458 1458
 		nwconfig.OptionLabels(conf.Labels),
1459 1459
 		nwconfig.OptionNetworkControlPlaneMTU(conf.NetworkControlPlaneMTU),
1460 1460
 		nwconfig.OptionFirewallBackend(conf.FirewallBackend),
1461
-		driverOptions(conf),
1462 1461
 	}
1463 1462
 
1463
+	options = append(options, networkPlatformOptions(conf)...)
1464
+
1464 1465
 	defaultAddressPools := ipamutils.GetLocalScopeDefaultNetworks()
1465 1466
 	if len(conf.NetworkConfig.DefaultAddressPools.Value()) > 0 {
1466 1467
 		defaultAddressPools = conf.NetworkConfig.DefaultAddressPools.Value()
... ...
@@ -924,20 +924,23 @@ func setHostGatewayIP(controller *libnetwork.Controller, config *config.Config)
924 924
 	}
925 925
 }
926 926
 
927
-func driverOptions(config *config.Config) nwconfig.Option {
928
-	return nwconfig.OptionDriverConfig("bridge", options.Generic{
929
-		netlabel.GenericData: options.Generic{
930
-			"EnableIPForwarding":       config.BridgeConfig.EnableIPForward,
931
-			"DisableFilterForwardDrop": config.BridgeConfig.DisableFilterForwardDrop,
932
-			"EnableIPTables":           config.BridgeConfig.EnableIPTables,
933
-			"EnableIP6Tables":          config.BridgeConfig.EnableIP6Tables,
934
-			"EnableUserlandProxy":      config.EnableUserlandProxy,
935
-			"UserlandProxyPath":        config.UserlandProxyPath,
936
-			"Hairpin":                  !config.EnableUserlandProxy || config.UserlandProxyPath == "",
937
-			"AllowDirectRouting":       config.BridgeConfig.AllowDirectRouting,
938
-			"Rootless":                 config.Rootless,
939
-		},
940
-	})
927
+// networkPlatformOptions returns a slice of platform-specific libnetwork
928
+// options.
929
+func networkPlatformOptions(conf *config.Config) []nwconfig.Option {
930
+	return []nwconfig.Option{
931
+		nwconfig.OptionRootless(conf.Rootless),
932
+		nwconfig.OptionUserlandProxy(conf.EnableUserlandProxy, conf.UserlandProxyPath),
933
+		nwconfig.OptionDriverConfig("bridge", options.Generic{
934
+			netlabel.GenericData: options.Generic{
935
+				"EnableIPForwarding":       conf.BridgeConfig.EnableIPForward,
936
+				"DisableFilterForwardDrop": conf.BridgeConfig.DisableFilterForwardDrop,
937
+				"EnableIPTables":           conf.BridgeConfig.EnableIPTables,
938
+				"EnableIP6Tables":          conf.BridgeConfig.EnableIP6Tables,
939
+				"Hairpin":                  !conf.EnableUserlandProxy || conf.UserlandProxyPath == "",
940
+				"AllowDirectRouting":       conf.BridgeConfig.AllowDirectRouting,
941
+			},
942
+		}),
943
+	}
941 944
 }
942 945
 
943 946
 type defBrOptsV4 struct {
... ...
@@ -523,7 +523,7 @@ func (daemon *Daemon) conditionalUnmountOnCleanup(container *container.Container
523 523
 	return daemon.Unmount(container)
524 524
 }
525 525
 
526
-func driverOptions(_ *config.Config) nwconfig.Option {
526
+func networkPlatformOptions(_ *config.Config) []nwconfig.Option {
527 527
 	return nil
528 528
 }
529 529
 
... ...
@@ -43,6 +43,9 @@ type Config struct {
43 43
 	ActiveSandboxes        map[string]any
44 44
 	PluginGetter           plugingetter.PluginGetter
45 45
 	FirewallBackend        string
46
+	Rootless               bool
47
+	EnableUserlandProxy    bool
48
+	UserlandProxyPath      string
46 49
 }
47 50
 
48 51
 // New creates a new Config and initializes it with the given Options.
... ...
@@ -162,3 +165,20 @@ func OptionFirewallBackend(val string) Option {
162 162
 		c.FirewallBackend = val
163 163
 	}
164 164
 }
165
+
166
+// OptionRootless returns an option setter that indicates whether the daemon is
167
+// running in rootless mode.
168
+func OptionRootless(rootless bool) Option {
169
+	return func(c *Config) {
170
+		c.Rootless = rootless
171
+	}
172
+}
173
+
174
+// OptionUserlandProxy returns an option setter that indicates whether the
175
+// userland proxy is enabled, and sets the path to the proxy binary.
176
+func OptionUserlandProxy(enabled bool, proxyPath string) Option {
177
+	return func(c *Config) {
178
+		c.EnableUserlandProxy = enabled
179
+		c.UserlandProxyPath = proxyPath
180
+	}
181
+}
... ...
@@ -24,7 +24,6 @@ import (
24 24
 	"github.com/docker/docker/daemon/libnetwork/drvregistry"
25 25
 	"github.com/docker/docker/daemon/libnetwork/internal/netiputil"
26 26
 	"github.com/docker/docker/daemon/libnetwork/internal/nftables"
27
-	"github.com/docker/docker/daemon/libnetwork/internal/rlkclient"
28 27
 	"github.com/docker/docker/daemon/libnetwork/iptables"
29 28
 	"github.com/docker/docker/daemon/libnetwork/netlabel"
30 29
 	"github.com/docker/docker/daemon/libnetwork/netutils"
... ...
@@ -51,7 +50,6 @@ const (
51 51
 	vethPrefix                 = "veth"
52 52
 	vethLen                    = len(vethPrefix) + 7
53 53
 	defaultContainerVethPrefix = "eth"
54
-	maxAllocatePortAttempts    = 10
55 54
 )
56 55
 
57 56
 const (
... ...
@@ -74,13 +72,10 @@ type configuration struct {
74 74
 	DisableFilterForwardDrop bool
75 75
 	EnableIPTables           bool
76 76
 	EnableIP6Tables          bool
77
-	EnableUserlandProxy      bool
78
-	UserlandProxyPath        string
79 77
 	// Hairpin indicates whether packets sent from a container to a host port
80 78
 	// published by another container on the same bridge network should be
81 79
 	// hairpinned.
82 80
 	Hairpin            bool
83
-	Rootless           bool
84 81
 	AllowDirectRouting bool
85 82
 }
86 83
 
... ...
@@ -161,25 +156,14 @@ type bridgeNetwork struct {
161 161
 	sync.Mutex
162 162
 }
163 163
 
164
-type portDriverClient interface {
165
-	ChildHostIP(hostIP netip.Addr) netip.Addr
166
-	AddPort(ctx context.Context, proto string, hostIP, childIP netip.Addr, hostPort int) (func() error, error)
167
-}
168
-
169
-// Allow unit tests to supply a dummy RootlessKit port driver client.
170
-var newPortDriverClient = func(ctx context.Context) (portDriverClient, error) {
171
-	return rlkclient.NewPortDriverClient(ctx)
172
-}
173
-
174 164
 type driver struct {
175
-	config           configuration
176
-	networks         map[string]*bridgeNetwork
177
-	store            *datastore.Store
178
-	nlh              nlwrap.Handle
179
-	portDriverClient portDriverClient
180
-	configNetwork    sync.Mutex
181
-	firewaller       firewaller.Firewaller
182
-	portmappers      *drvregistry.PortMappers
165
+	config        configuration
166
+	networks      map[string]*bridgeNetwork
167
+	store         *datastore.Store
168
+	nlh           nlwrap.Handle
169
+	configNetwork sync.Mutex
170
+	firewaller    firewaller.Firewaller
171
+	portmappers   *drvregistry.PortMappers
183 172
 	sync.Mutex
184 173
 }
185 174
 
... ...
@@ -476,15 +460,6 @@ func (n *bridgeNetwork) gwMode(v firewaller.IPVersion) gwMode {
476 476
 	return n.config.GwModeIPv6
477 477
 }
478 478
 
479
-func (n *bridgeNetwork) userlandProxyPath() string {
480
-	n.Lock()
481
-	defer n.Unlock()
482
-	if n.driver == nil {
483
-		return ""
484
-	}
485
-	return n.driver.userlandProxyPath()
486
-}
487
-
488 479
 func (n *bridgeNetwork) hairpin() bool {
489 480
 	n.Lock()
490 481
 	defer n.Unlock()
... ...
@@ -494,13 +469,13 @@ func (n *bridgeNetwork) hairpin() bool {
494 494
 	return n.driver.config.Hairpin
495 495
 }
496 496
 
497
-func (n *bridgeNetwork) getPortDriverClient() portDriverClient {
497
+func (n *bridgeNetwork) portMappers() *drvregistry.PortMappers {
498 498
 	n.Lock()
499 499
 	defer n.Unlock()
500 500
 	if n.driver == nil {
501 501
 		return nil
502 502
 	}
503
-	return n.driver.getPortDriverClient()
503
+	return n.driver.portmappers
504 504
 }
505 505
 
506 506
 func (n *bridgeNetwork) getEndpoint(eid string) (*bridgeEndpoint, error) {
... ...
@@ -546,17 +521,7 @@ func (d *driver) configure(option map[string]interface{}) error {
546 546
 		return err
547 547
 	}
548 548
 
549
-	var pdc portDriverClient
550
-	if config.Rootless {
551
-		var err error
552
-		pdc, err = newPortDriverClient(context.TODO())
553
-		if err != nil {
554
-			return err
555
-		}
556
-	}
557
-
558 549
 	d.Lock()
559
-	d.portDriverClient = pdc
560 550
 	d.config = config
561 551
 	d.Unlock()
562 552
 
... ...
@@ -604,22 +569,6 @@ func (d *driver) getNetwork(id string) (*bridgeNetwork, error) {
604 604
 	return nil, types.NotFoundErrorf("network not found: %s", id)
605 605
 }
606 606
 
607
-func (d *driver) userlandProxyPath() string {
608
-	d.Lock()
609
-	defer d.Unlock()
610
-
611
-	if d.config.EnableUserlandProxy {
612
-		return d.config.UserlandProxyPath
613
-	}
614
-	return ""
615
-}
616
-
617
-func (d *driver) getPortDriverClient() portDriverClient {
618
-	d.Lock()
619
-	defer d.Unlock()
620
-	return d.portDriverClient
621
-}
622
-
623 607
 func parseNetworkGenericOptions(data interface{}) (*networkConfiguration, error) {
624 608
 	var (
625 609
 		err    error
... ...
@@ -1639,7 +1588,7 @@ func (ep *bridgeEndpoint) trimPortBindings(ctx context.Context, n *bridgeNetwork
1639 1639
 		return nil, nil
1640 1640
 	}
1641 1641
 
1642
-	if err := releasePortBindings(toDrop, n.firewallerNetwork); err != nil {
1642
+	if err := n.unmapPBs(ctx, toDrop); err != nil {
1643 1643
 		log.G(ctx).WithFields(log.Fields{
1644 1644
 			"error": err,
1645 1645
 			"gw4":   pbmReq.ipv4,
... ...
@@ -8,7 +8,6 @@ import (
8 8
 	"maps"
9 9
 	"net"
10 10
 	"net/netip"
11
-	"os/exec"
12 11
 	"slices"
13 12
 	"strconv"
14 13
 	"testing"
... ...
@@ -796,21 +795,16 @@ func testQueryEndpointInfo(t *testing.T, ulPxyEnabled bool) {
796 796
 	defer netnsutils.SetupTestOSContext(t)()
797 797
 	useStubFirewaller(t)
798 798
 
799
-	d := newDriver(storeutils.NewTempStore(t), &drvregistry.PortMappers{})
799
+	pms := drvregistry.PortMappers{}
800
+	pm := &stubPortMapper{}
801
+	err := pms.Register("nat", pm)
802
+	assert.NilError(t, err)
803
+
804
+	d := newDriver(storeutils.NewTempStore(t), &pms)
800 805
 	portallocator.Get().ReleaseAll()
801 806
 
802
-	var proxyBinary string
803
-	var err error
804
-	if ulPxyEnabled {
805
-		proxyBinary, err = exec.LookPath("docker-proxy")
806
-		if err != nil {
807
-			t.Fatalf("failed to lookup userland-proxy binary: %v", err)
808
-		}
809
-	}
810 807
 	config := &configuration{
811
-		EnableIPTables:      true,
812
-		EnableUserlandProxy: ulPxyEnabled,
813
-		UserlandProxyPath:   proxyBinary,
808
+		EnableIPTables: true,
814 809
 	}
815 810
 	genericOption := make(map[string]interface{})
816 811
 	genericOption[netlabel.GenericData] = config
... ...
@@ -865,15 +859,15 @@ func testQueryEndpointInfo(t *testing.T, ulPxyEnabled bool) {
865 865
 	if !ok {
866 866
 		t.Fatal("Endpoint operational data does not contain port mapping data")
867 867
 	}
868
-	pm, ok := pmd.([]types.PortBinding)
868
+	pbs, ok := pmd.([]types.PortBinding)
869 869
 	if !ok {
870 870
 		t.Fatal("Unexpected format for port mapping in endpoint operational data")
871 871
 	}
872
-	if len(ep.portMapping) != len(pm) {
872
+	if len(ep.portMapping) != len(pbs) {
873 873
 		t.Fatal("Incomplete data for port mapping in endpoint operational data")
874 874
 	}
875 875
 	for i, pb := range ep.portMapping {
876
-		if !comparePortBinding(&pb.PortBinding, &pm[i]) {
876
+		if !comparePortBinding(&pb.PortBinding, &pbs[i]) {
877 877
 			t.Fatal("Unexpected data for port mapping in endpoint operational data")
878 878
 		}
879 879
 	}
... ...
@@ -8,25 +8,15 @@ import (
8 8
 	"errors"
9 9
 	"fmt"
10 10
 	"net"
11
-	"net/netip"
12
-	"os"
13 11
 	"slices"
14
-	"strconv"
15
-	"syscall"
16 12
 
17 13
 	"github.com/containerd/log"
18
-	"github.com/docker/docker/daemon/libnetwork/drivers/bridge/internal/firewaller"
19
-	"github.com/docker/docker/daemon/libnetwork/internal/rlkclient"
20 14
 	"github.com/docker/docker/daemon/libnetwork/netutils"
21
-	"github.com/docker/docker/daemon/libnetwork/portallocator"
22
-	"github.com/docker/docker/daemon/libnetwork/portmapper"
23 15
 	"github.com/docker/docker/daemon/libnetwork/portmapperapi"
24 16
 	"github.com/docker/docker/daemon/libnetwork/types"
17
+	"github.com/docker/docker/internal/sliceutil"
25 18
 )
26 19
 
27
-// Allow unit tests to supply a dummy StartProxy.
28
-var startProxy = portmapper.StartProxy
29
-
30 20
 // addPortMappings takes cfg, the configuration for port mappings, selects host
31 21
 // ports when ranges are given, binds host ports to check they're available and
32 22
 // reserve them, starts docker-proxy if required, and sets up iptables
... ...
@@ -50,20 +40,23 @@ func (n *bridgeNetwork) addPortMappings(
50 50
 		defHostIP = addr4
51 51
 	}
52 52
 
53
+	pms := n.portMappers()
54
+
53 55
 	bindings := make([]portmapperapi.PortBinding, 0, len(cfg)*2)
54 56
 	defer func() {
55 57
 		if retErr != nil {
56
-			if err := releasePortBindings(bindings, n.firewallerNetwork); err != nil {
57
-				log.G(ctx).Warnf("Release port bindings: %s", err.Error())
58
+			if err := n.unmapPBs(ctx, bindings); err != nil {
59
+				log.G(ctx).WithFields(log.Fields{
60
+					"bindings": bindings,
61
+					"error":    err,
62
+					"origErr":  retErr,
63
+				}).Warn("Failed to unmap port bindings after error")
58 64
 			}
59 65
 		}
60 66
 	}()
61 67
 
62 68
 	bindingReqs := n.sortAndNormPBs(ctx, ep, cfg, defHostIP, pbmReq)
63 69
 
64
-	proxyPath := n.userlandProxyPath()
65
-	pdc := n.getPortDriverClient()
66
-
67 70
 	// toBind accumulates port bindings that should be allocated the same host port
68 71
 	// (if required by NAT config). If the host address is unspecified, and defHostIP
69 72
 	// is 0.0.0.0, one iteration of the loop may generate bindings for v4 and v6. If
... ...
@@ -76,52 +69,30 @@ func (n *bridgeNetwork) addPortMappings(
76 76
 	var toBind []portmapperapi.PortBindingReq
77 77
 	for i, c := range bindingReqs {
78 78
 		toBind = append(toBind, c)
79
-		if i < len(bindingReqs)-1 && c.DisableNAT == bindingReqs[i+1].DisableNAT && needSamePort(c, bindingReqs[i+1]) {
79
+		if i < len(bindingReqs)-1 && c.Mapper == bindingReqs[i+1].Mapper && needSamePort(c, bindingReqs[i+1]) {
80 80
 			// This port binding matches the next, apart from host IP. So, continue
81 81
 			// collecting bindings, then allocate the same host port for all addresses.
82 82
 			continue
83 83
 		}
84 84
 
85
-		var newB []portmapperapi.PortBinding
86
-		var err error
87
-		if c.DisableNAT {
88
-			newB, err = setupForwardedPorts(ctx, toBind, n.firewallerNetwork)
89
-		} else {
90
-			newB, err = bindHostPorts(ctx, toBind, proxyPath, pdc, n.firewallerNetwork)
85
+		pm, err := pms.Get(c.Mapper)
86
+		if err != nil {
87
+			return nil, err
91 88
 		}
89
+
90
+		newB, err := pm.MapPorts(ctx, toBind, n.firewallerNetwork)
92 91
 		if err != nil {
93 92
 			return nil, err
94 93
 		}
95
-		bindings = append(bindings, newB...)
94
+		bindings = append(bindings, sliceutil.Map(newB, func(b portmapperapi.PortBinding) portmapperapi.PortBinding {
95
+			b.Mapper = c.Mapper
96
+			return b
97
+		})...)
96 98
 
97 99
 		// Reset toBind now the ports are bound.
98 100
 		toBind = toBind[:0]
99 101
 	}
100 102
 
101
-	// Start userland proxy processes.
102
-	if proxyPath != "" {
103
-		for i := range bindings {
104
-			if bindings[i].BoundSocket == nil || bindings[i].RootlesskitUnsupported || bindings[i].StopProxy != nil {
105
-				continue
106
-			}
107
-			var err error
108
-			bindings[i].StopProxy, err = startProxy(
109
-				bindings[i].ChildPortBinding(), proxyPath, bindings[i].BoundSocket,
110
-			)
111
-			if err != nil {
112
-				return nil, fmt.Errorf("failed to start userland proxy for port mapping %s: %w",
113
-					bindings[i].PortBinding, err)
114
-			}
115
-			if err := bindings[i].BoundSocket.Close(); err != nil {
116
-				log.G(ctx).WithFields(log.Fields{
117
-					"error":   err,
118
-					"mapping": bindings[i].PortBinding,
119
-				}).Warnf("failed to close proxy socket")
120
-			}
121
-			bindings[i].BoundSocket = nil
122
-		}
123
-	}
124
-
125 103
 	return bindings, nil
126 104
 }
127 105
 
... ...
@@ -283,7 +254,10 @@ func configurePortBindingIPv4(
283 283
 	// Unmap the addresses if they're IPv4-mapped IPv6.
284 284
 	bnd.HostIP = bnd.HostIP.To4()
285 285
 	bnd.IP = containerIPv4.To4()
286
-	bnd.DisableNAT = disableNAT
286
+	bnd.Mapper = "nat"
287
+	if disableNAT {
288
+		bnd.Mapper = "routed"
289
+	}
287 290
 	return bnd, true
288 291
 }
289 292
 
... ...
@@ -340,234 +314,11 @@ func configurePortBindingIPv6(
340 340
 	}
341 341
 
342 342
 	bnd.IP = containerIP
343
-	bnd.DisableNAT = disableNAT
344
-	return bnd, true
345
-}
346
-
347
-func setChildHostIP(pdc portDriverClient, req portmapperapi.PortBindingReq) portmapperapi.PortBindingReq {
348
-	if pdc == nil {
349
-		req.ChildHostIP = req.HostIP
350
-		return req
351
-	}
352
-	hip, _ := netip.AddrFromSlice(req.HostIP)
353
-	req.ChildHostIP = pdc.ChildHostIP(hip).AsSlice()
354
-	return req
355
-}
356
-
357
-// setupForwardedPorts sets up firewall rules to allow direct remote access to
358
-// the container's ports in cfg.
359
-func setupForwardedPorts(ctx context.Context, cfg []portmapperapi.PortBindingReq, fwn firewaller.Network) ([]portmapperapi.PortBinding, error) {
360
-	if len(cfg) == 0 {
361
-		return nil, nil
362
-	}
363
-
364
-	res := make([]portmapperapi.PortBinding, 0, len(cfg))
365
-	bindings := make([]types.PortBinding, 0, len(cfg))
366
-	for _, c := range cfg {
367
-		pb := portmapperapi.PortBinding{PortBinding: c.GetCopy()}
368
-		if pb.HostPort != 0 || pb.HostPortEnd != 0 {
369
-			log.G(ctx).WithFields(log.Fields{"mapping": pb}).Infof(
370
-				"Host port ignored, because NAT is disabled")
371
-			pb.HostPort = 0
372
-			pb.HostPortEnd = 0
373
-		}
374
-		res = append(res, pb)
375
-		bindings = append(bindings, pb.PortBinding)
376
-	}
377
-
378
-	if err := fwn.AddPorts(ctx, bindings); err != nil {
379
-		return nil, err
380
-	}
381
-
382
-	return res, nil
383
-}
384
-
385
-// bindHostPorts allocates and binds host ports for the given cfg. The
386
-// caller is responsible for ensuring that all entries in cfg map the same proto,
387
-// container port, and host port range (their host addresses must differ).
388
-func bindHostPorts(
389
-	ctx context.Context,
390
-	cfg []portmapperapi.PortBindingReq,
391
-	proxyPath string,
392
-	pdc portDriverClient,
393
-	fwn firewaller.Network,
394
-) ([]portmapperapi.PortBinding, error) {
395
-	if len(cfg) == 0 {
396
-		return nil, nil
397
-	}
398
-	// Ensure that all of cfg's entries have the same proto and ports.
399
-	proto, port, hostPort, hostPortEnd := cfg[0].Proto, cfg[0].Port, cfg[0].HostPort, cfg[0].HostPortEnd
400
-	for _, c := range cfg[1:] {
401
-		if c.Proto != proto || c.Port != port || c.HostPort != hostPort || c.HostPortEnd != hostPortEnd {
402
-			return nil, types.InternalErrorf("port binding mismatch %d/%s:%d-%d, %d/%s:%d-%d",
403
-				port, proto, hostPort, hostPortEnd,
404
-				port, c.Proto, c.HostPort, c.HostPortEnd)
405
-		}
406
-	}
407
-
408
-	// Try up to maxAllocatePortAttempts times to get a port that's not already allocated.
409
-	var err error
410
-	for i := 0; i < maxAllocatePortAttempts; i++ {
411
-		var b []portmapperapi.PortBinding
412
-		b, err = attemptBindHostPorts(ctx, cfg, proto, hostPort, hostPortEnd, proxyPath, pdc, fwn)
413
-		if err == nil {
414
-			return b, nil
415
-		}
416
-		// There is no point in immediately retrying to map an explicitly chosen port.
417
-		if hostPort != 0 && hostPort == hostPortEnd {
418
-			log.G(ctx).WithError(err).Warnf("Failed to allocate and map port")
419
-			break
420
-		}
421
-		log.G(ctx).WithFields(log.Fields{
422
-			"error":   err,
423
-			"attempt": i + 1,
424
-		}).Warn("Failed to allocate and map port")
425
-	}
426
-	return nil, err
427
-}
428
-
429
-// attemptBindHostPorts allocates host ports for each NAT port mapping, and
430
-// reserves those ports by binding them.
431
-//
432
-// If the allocator doesn't have an available port in the required range, or the
433
-// port can't be bound (perhaps because another process has already bound it),
434
-// all resources are released and an error is returned. When ports are
435
-// successfully reserved, a PortBinding is returned for each mapping.
436
-func attemptBindHostPorts(
437
-	ctx context.Context,
438
-	cfg []portmapperapi.PortBindingReq,
439
-	proto types.Protocol,
440
-	hostPortStart, hostPortEnd uint16,
441
-	proxyPath string,
442
-	pdc portDriverClient,
443
-	fwn firewaller.Network,
444
-) (_ []portmapperapi.PortBinding, retErr error) {
445
-	var err error
446
-	var port int
447
-
448
-	addrs := make([]net.IP, 0, len(cfg))
449
-	for i := range cfg {
450
-		cfg[i] = setChildHostIP(pdc, cfg[i])
451
-		addrs = append(addrs, cfg[i].ChildHostIP)
452
-	}
453
-
454
-	pa := portallocator.NewOSAllocator()
455
-	port, socks, err := pa.RequestPortsInRange(addrs, proto, int(hostPortStart), int(hostPortEnd))
456
-	if err != nil {
457
-		return nil, err
458
-	}
459
-	defer func() {
460
-		if retErr != nil {
461
-			pa.ReleasePorts(addrs, proto, port)
462
-		}
463
-	}()
464
-
465
-	if len(socks) != len(cfg) {
466
-		for _, sock := range socks {
467
-			if err := sock.Close(); err != nil {
468
-				log.G(ctx).WithError(err).Warn("Failed to close socket")
469
-			}
470
-		}
471
-		return nil, types.InternalErrorf("port allocator returned %d sockets for %d port bindings", len(socks), len(cfg))
343
+	bnd.Mapper = "nat"
344
+	if disableNAT {
345
+		bnd.Mapper = "routed"
472 346
 	}
473
-
474
-	res := make([]portmapperapi.PortBinding, 0, len(cfg))
475
-	defer func() {
476
-		if retErr != nil {
477
-			if err := releasePortBindings(res, fwn); err != nil {
478
-				log.G(ctx).WithError(err).Warn("Failed to release port bindings")
479
-			}
480
-		}
481
-	}()
482
-
483
-	for i := range cfg {
484
-		pb := portmapperapi.PortBinding{
485
-			PortBinding: cfg[i].PortBinding.GetCopy(),
486
-			BoundSocket: socks[i],
487
-			ChildHostIP: cfg[i].ChildHostIP,
488
-		}
489
-		pb.PortBinding.HostPort = uint16(port)
490
-		pb.PortBinding.HostPortEnd = pb.HostPort
491
-		res = append(res, pb)
492
-	}
493
-
494
-	if err := configPortDriver(ctx, res, pdc); err != nil {
495
-		return nil, err
496
-	}
497
-	if err := fwn.AddPorts(ctx, mergeChildHostIPs(res)); err != nil {
498
-		return nil, err
499
-	}
500
-	// Now the firewall rules are set up, it's safe to listen on the socket. (Listening
501
-	// earlier could result in dropped connections if the proxy becomes unreachable due
502
-	// to NAT rules sending packets directly to the container.)
503
-	//
504
-	// If not starting the proxy, nothing will ever accept a connection on the
505
-	// socket. Listen here anyway because SO_REUSEADDR is set, so bind() won't notice
506
-	// the problem if a port's bound to both INADDR_ANY and a specific address. (Also
507
-	// so the binding shows up in "netstat -at".)
508
-	if err := listenBoundPorts(res, proxyPath); err != nil {
509
-		return nil, err
510
-	}
511
-	return res, nil
512
-}
513
-
514
-// configPortDriver passes the port binding's details to rootlesskit, and updates the
515
-// port binding with callbacks to remove the rootlesskit config (or marks the binding as
516
-// unsupported by rootlesskit).
517
-func configPortDriver(ctx context.Context, pbs []portmapperapi.PortBinding, pdc portDriverClient) error {
518
-	for i := range pbs {
519
-		b := pbs[i]
520
-		if pdc != nil && b.HostPort != 0 {
521
-			var err error
522
-			hip, ok := netip.AddrFromSlice(b.HostIP)
523
-			if !ok {
524
-				return fmt.Errorf("invalid host IP address in %s", b)
525
-			}
526
-			chip, ok := netip.AddrFromSlice(b.ChildHostIP)
527
-			if !ok {
528
-				return fmt.Errorf("invalid child host IP address %s in %s", b.ChildHostIP, b)
529
-			}
530
-			pbs[i].PortDriverRemove, err = pdc.AddPort(ctx, b.Proto.String(), hip, chip, int(b.HostPort))
531
-			if err != nil {
532
-				var pErr *rlkclient.ProtocolUnsupportedError
533
-				if errors.As(err, &pErr) {
534
-					log.G(ctx).WithFields(log.Fields{
535
-						"error": pErr,
536
-					}).Warnf("discarding request for %q", net.JoinHostPort(hip.String(), strconv.Itoa(int(b.HostPort))))
537
-					pbs[i].RootlesskitUnsupported = true
538
-					continue
539
-				}
540
-				return err
541
-			}
542
-		}
543
-	}
544
-	return nil
545
-}
546
-
547
-func listenBoundPorts(pbs []portmapperapi.PortBinding, proxyPath string) error {
548
-	for i := range pbs {
549
-		if pbs[i].BoundSocket == nil || pbs[i].RootlesskitUnsupported || pbs[i].Proto == types.UDP {
550
-			continue
551
-		}
552
-		rc, err := pbs[i].BoundSocket.SyscallConn()
553
-		if err != nil {
554
-			return fmt.Errorf("raw conn not available on %s socket: %w", pbs[i].Proto, err)
555
-		}
556
-		if errC := rc.Control(func(fd uintptr) {
557
-			somaxconn := 0
558
-			// SCTP sockets do not support somaxconn=0
559
-			if proxyPath != "" || pbs[i].Proto == types.SCTP {
560
-				somaxconn = -1 // silently capped to "/proc/sys/net/core/somaxconn"
561
-			}
562
-			err = syscall.Listen(int(fd), somaxconn)
563
-		}); errC != nil {
564
-			return fmt.Errorf("failed to Control %s socket: %w", pbs[i].Proto, err)
565
-		}
566
-		if err != nil {
567
-			return fmt.Errorf("failed to listen on %s socket: %w", pbs[i].Proto, err)
568
-		}
569
-	}
570
-	return nil
347
+	return bnd, true
571 348
 }
572 349
 
573 350
 // releasePorts attempts to release all port bindings, does not stop on failure
... ...
@@ -578,36 +329,25 @@ func (n *bridgeNetwork) releasePorts(ep *bridgeEndpoint) error {
578 578
 	ep.portBindingState = portBindingMode{}
579 579
 	n.Unlock()
580 580
 
581
-	return releasePortBindings(pbs, n.firewallerNetwork)
581
+	return n.unmapPBs(context.TODO(), pbs)
582 582
 }
583 583
 
584
-func releasePortBindings(pbs []portmapperapi.PortBinding, fwn firewaller.Network) error {
584
+func (n *bridgeNetwork) unmapPBs(ctx context.Context, bindings []portmapperapi.PortBinding) error {
585
+	pms := n.portMappers()
586
+
585 587
 	var errs []error
586
-	for _, pb := range pbs {
587
-		if pb.BoundSocket != nil {
588
-			if err := pb.BoundSocket.Close(); err != nil {
589
-				errs = append(errs, fmt.Errorf("failed to close socket for port mapping %s: %w", pb, err))
590
-			}
591
-		}
592
-		if pb.PortDriverRemove != nil {
593
-			if err := pb.PortDriverRemove(); err != nil {
594
-				errs = append(errs, err)
595
-			}
596
-		}
597
-		if pb.StopProxy != nil {
598
-			if err := pb.StopProxy(); err != nil && !errors.Is(err, os.ErrProcessDone) {
599
-				errs = append(errs, fmt.Errorf("failed to stop userland proxy for port mapping %s: %w", pb, err))
600
-			}
588
+	for _, b := range bindings {
589
+		pm, err := pms.Get(b.Mapper)
590
+		if err != nil {
591
+			errs = append(errs, fmt.Errorf("unmapping port binding %s: %w", b.PortBinding, err))
592
+			continue
601 593
 		}
602
-	}
603
-	if err := fwn.DelPorts(context.TODO(), mergeChildHostIPs(pbs)); err != nil {
604
-		errs = append(errs, err)
605
-	}
606
-	for _, pb := range pbs {
607
-		if pb.HostPort > 0 {
608
-			portallocator.Get().ReleasePort(pb.ChildHostIP, pb.Proto.String(), int(pb.HostPort))
594
+
595
+		if err := pm.UnmapPorts(ctx, []portmapperapi.PortBinding{b}, n.firewallerNetwork); err != nil {
596
+			errs = append(errs, fmt.Errorf("unmapping port binding %s: %w", b.PortBinding, err))
609 597
 		}
610 598
 	}
599
+
611 600
 	return errors.Join(errs...)
612 601
 }
613 602
 
... ...
@@ -1,3 +1,6 @@
1
+// FIXME(thaJeztah): remove once we are a module; the go:build directive prevents go from downgrading language version to go1.16:
2
+//go:build go1.23
3
+
1 4
 package bridge
2 5
 
3 6
 import (
... ...
@@ -7,6 +10,7 @@ import (
7 7
 	"net"
8 8
 	"net/netip"
9 9
 	"os"
10
+	"slices"
10 11
 	"strconv"
11 12
 	"strings"
12 13
 	"syscall"
... ...
@@ -19,7 +23,10 @@ import (
19 19
 	"github.com/docker/docker/daemon/libnetwork/ns"
20 20
 	"github.com/docker/docker/daemon/libnetwork/portallocator"
21 21
 	"github.com/docker/docker/daemon/libnetwork/portmapperapi"
22
+	"github.com/docker/docker/daemon/libnetwork/portmappers/nat"
23
+	"github.com/docker/docker/daemon/libnetwork/portmappers/routed"
22 24
 	"github.com/docker/docker/daemon/libnetwork/types"
25
+	"github.com/docker/docker/internal/sliceutil"
23 26
 	"github.com/docker/docker/internal/testutils/netnsutils"
24 27
 	"github.com/docker/docker/internal/testutils/storeutils"
25 28
 	"github.com/sirupsen/logrus"
... ...
@@ -32,7 +39,12 @@ func TestPortMappingConfig(t *testing.T) {
32 32
 	defer netnsutils.SetupTestOSContext(t)()
33 33
 	useStubFirewaller(t)
34 34
 
35
-	d := newDriver(storeutils.NewTempStore(t), &drvregistry.PortMappers{})
35
+	pms := drvregistry.PortMappers{}
36
+	pm := &stubPortMapper{}
37
+	err := pms.Register("nat", pm)
38
+	assert.NilError(t, err)
39
+
40
+	d := newDriver(storeutils.NewTempStore(t), &pms)
36 41
 
37 42
 	config := &configuration{
38 43
 		EnableIPTables: true,
... ...
@@ -61,7 +73,7 @@ func TestPortMappingConfig(t *testing.T) {
61 61
 	}
62 62
 
63 63
 	ipdList4 := getIPv4Data(t)
64
-	err := d.CreateNetwork(context.Background(), "dummy", netOptions, nil, ipdList4, getIPv6Data(t))
64
+	err = d.CreateNetwork(context.Background(), "dummy", netOptions, nil, ipdList4, getIPv6Data(t))
65 65
 	if err != nil {
66 66
 		t.Fatalf("Failed to create bridge: %v", err)
67 67
 	}
... ...
@@ -117,7 +129,12 @@ func TestPortMappingV6Config(t *testing.T) {
117 117
 		t.Fatalf("Could not bring loopback iface up: %v", err)
118 118
 	}
119 119
 
120
-	d := newDriver(storeutils.NewTempStore(t), &drvregistry.PortMappers{})
120
+	pms := drvregistry.PortMappers{}
121
+	pm := &stubPortMapper{}
122
+	err := pms.Register("nat", pm)
123
+	assert.NilError(t, err)
124
+
125
+	d := newDriver(storeutils.NewTempStore(t), &pms)
121 126
 
122 127
 	config := &configuration{
123 128
 		EnableIPTables:  true,
... ...
@@ -147,7 +164,7 @@ func TestPortMappingV6Config(t *testing.T) {
147 147
 
148 148
 	ipdList4 := getIPv4Data(t)
149 149
 	ipdList6 := getIPv6Data(t)
150
-	err := d.CreateNetwork(context.Background(), "dummy", netOptions, nil, ipdList4, ipdList6)
150
+	err = d.CreateNetwork(context.Background(), "dummy", netOptions, nil, ipdList4, ipdList6)
151 151
 	if err != nil {
152 152
 		t.Fatalf("Failed to create bridge: %v", err)
153 153
 	}
... ...
@@ -196,30 +213,6 @@ func loopbackUp() error {
196 196
 	return nlHandle.LinkSetUp(iface)
197 197
 }
198 198
 
199
-func TestBindHostPortsError(t *testing.T) {
200
-	cfg := []portmapperapi.PortBindingReq{
201
-		{
202
-			PortBinding: types.PortBinding{
203
-				Proto:       types.TCP,
204
-				Port:        80,
205
-				HostPort:    8080,
206
-				HostPortEnd: 8080,
207
-			},
208
-		},
209
-		{
210
-			PortBinding: types.PortBinding{
211
-				Proto:       types.TCP,
212
-				Port:        80,
213
-				HostPort:    8080,
214
-				HostPortEnd: 8081,
215
-			},
216
-		},
217
-	}
218
-	pbs, err := bindHostPorts(context.Background(), cfg, "", nil, nil)
219
-	assert.Check(t, is.Error(err, "port binding mismatch 80/tcp:8080-8080, 80/tcp:8080-8081"))
220
-	assert.Check(t, is.Nil(pbs))
221
-}
222
-
223 199
 func newIPNet(t *testing.T, cidr string) *net.IPNet {
224 200
 	t.Helper()
225 201
 	ip, ipNet, err := net.ParseCIDR(cidr)
... ...
@@ -242,7 +235,7 @@ func TestAddPortMappings(t *testing.T) {
242 242
 		gwMode6      gwMode
243 243
 		cfg          []portmapperapi.PortBindingReq
244 244
 		defHostIP    net.IP
245
-		proxyPath    string
245
+		enableProxy  bool
246 246
 		hairpin      bool
247 247
 		busyPortIPv4 int
248 248
 		rootless     bool
... ...
@@ -267,7 +260,7 @@ func TestAddPortMappings(t *testing.T) {
267 267
 				{PortBinding: types.PortBinding{Proto: types.TCP, Port: 22}},
268 268
 				{PortBinding: types.PortBinding{Proto: types.TCP, Port: 80}},
269 269
 			},
270
-			proxyPath: "/dummy/path/to/proxy",
270
+			enableProxy: true,
271 271
 			expPBs: []types.PortBinding{
272 272
 				{Proto: types.TCP, IP: ctrIP4.IP, Port: 22, HostIP: net.IPv4zero, HostPort: firstEphemPort},
273 273
 				{Proto: types.TCP, IP: ctrIP6.IP, Port: 22, HostIP: net.IPv6zero, HostPort: firstEphemPort},
... ...
@@ -276,24 +269,24 @@ func TestAddPortMappings(t *testing.T) {
276 276
 			},
277 277
 		},
278 278
 		{
279
-			name:      "specific host port",
280
-			epAddrV4:  ctrIP4,
281
-			epAddrV6:  ctrIP6,
282
-			cfg:       []portmapperapi.PortBindingReq{{PortBinding: types.PortBinding{Proto: types.TCP, Port: 80, HostPort: 8080}}},
283
-			proxyPath: "/dummy/path/to/proxy",
279
+			name:        "specific host port",
280
+			epAddrV4:    ctrIP4,
281
+			epAddrV6:    ctrIP6,
282
+			cfg:         []portmapperapi.PortBindingReq{{PortBinding: types.PortBinding{Proto: types.TCP, Port: 80, HostPort: 8080}}},
283
+			enableProxy: true,
284 284
 			expPBs: []types.PortBinding{
285 285
 				{Proto: types.TCP, IP: ctrIP4.IP, Port: 80, HostIP: net.IPv4zero, HostPort: 8080, HostPortEnd: 8080},
286 286
 				{Proto: types.TCP, IP: ctrIP6.IP, Port: 80, HostIP: net.IPv6zero, HostPort: 8080, HostPortEnd: 8080},
287 287
 			},
288 288
 		},
289 289
 		{
290
-			name:      "nat explicitly enabled",
291
-			epAddrV4:  ctrIP4,
292
-			epAddrV6:  ctrIP6,
293
-			cfg:       []portmapperapi.PortBindingReq{{PortBinding: types.PortBinding{Proto: types.TCP, Port: 80, HostPort: 8080}}},
294
-			gwMode4:   gwModeNAT,
295
-			gwMode6:   gwModeNAT,
296
-			proxyPath: "/dummy/path/to/proxy",
290
+			name:        "nat explicitly enabled",
291
+			epAddrV4:    ctrIP4,
292
+			epAddrV6:    ctrIP6,
293
+			cfg:         []portmapperapi.PortBindingReq{{PortBinding: types.PortBinding{Proto: types.TCP, Port: 80, HostPort: 8080}}},
294
+			gwMode4:     gwModeNAT,
295
+			gwMode6:     gwModeNAT,
296
+			enableProxy: true,
297 297
 			expPBs: []types.PortBinding{
298 298
 				{Proto: types.TCP, IP: ctrIP4.IP, Port: 80, HostIP: net.IPv4zero, HostPort: 8080, HostPortEnd: 8080},
299 299
 				{Proto: types.TCP, IP: ctrIP6.IP, Port: 80, HostIP: net.IPv6zero, HostPort: 8080, HostPortEnd: 8080},
... ...
@@ -304,27 +297,27 @@ func TestAddPortMappings(t *testing.T) {
304 304
 			epAddrV4:     ctrIP4,
305 305
 			epAddrV6:     ctrIP6,
306 306
 			cfg:          []portmapperapi.PortBindingReq{{PortBinding: types.PortBinding{Proto: types.TCP, Port: 80, HostPort: 8080}}},
307
-			proxyPath:    "/dummy/path/to/proxy",
307
+			enableProxy:  true,
308 308
 			busyPortIPv4: 8080,
309 309
 			expErr:       "failed to bind host port 0.0.0.0:8080/tcp: address already in use",
310 310
 		},
311 311
 		{
312
-			name:      "ipv4 mapped container address with specific host port",
313
-			epAddrV4:  ctrIP4Mapped,
314
-			epAddrV6:  ctrIP6,
315
-			cfg:       []portmapperapi.PortBindingReq{{PortBinding: types.PortBinding{Proto: types.TCP, Port: 80, HostPort: 8080}}},
316
-			proxyPath: "/dummy/path/to/proxy",
312
+			name:        "ipv4 mapped container address with specific host port",
313
+			epAddrV4:    ctrIP4Mapped,
314
+			epAddrV6:    ctrIP6,
315
+			cfg:         []portmapperapi.PortBindingReq{{PortBinding: types.PortBinding{Proto: types.TCP, Port: 80, HostPort: 8080}}},
316
+			enableProxy: true,
317 317
 			expPBs: []types.PortBinding{
318 318
 				{Proto: types.TCP, IP: ctrIP4.IP, Port: 80, HostIP: net.IPv4zero, HostPort: 8080, HostPortEnd: 8080},
319 319
 				{Proto: types.TCP, IP: ctrIP6.IP, Port: 80, HostIP: net.IPv6zero, HostPort: 8080, HostPortEnd: 8080},
320 320
 			},
321 321
 		},
322 322
 		{
323
-			name:      "ipv4 mapped host address with specific host port",
324
-			epAddrV4:  ctrIP4,
325
-			epAddrV6:  ctrIP6,
326
-			cfg:       []portmapperapi.PortBindingReq{{PortBinding: types.PortBinding{Proto: types.TCP, Port: 80, HostIP: newIPNet(t, "::ffff:127.0.0.1/128").IP, HostPort: 8080}}},
327
-			proxyPath: "/dummy/path/to/proxy",
323
+			name:        "ipv4 mapped host address with specific host port",
324
+			epAddrV4:    ctrIP4,
325
+			epAddrV6:    ctrIP6,
326
+			cfg:         []portmapperapi.PortBindingReq{{PortBinding: types.PortBinding{Proto: types.TCP, Port: 80, HostIP: newIPNet(t, "::ffff:127.0.0.1/128").IP, HostPort: 8080}}},
327
+			enableProxy: true,
328 328
 			expPBs: []types.PortBinding{
329 329
 				{Proto: types.TCP, IP: ctrIP4.IP, Port: 80, HostIP: newIPNet(t, "127.0.0.1/32").IP, HostPort: 8080, HostPortEnd: 8080},
330 330
 			},
... ...
@@ -334,7 +327,7 @@ func TestAddPortMappings(t *testing.T) {
334 334
 			epAddrV4:     ctrIP4,
335 335
 			epAddrV6:     ctrIP6,
336 336
 			cfg:          []portmapperapi.PortBindingReq{{PortBinding: types.PortBinding{Proto: types.TCP, Port: 80, HostPort: 8080, HostPortEnd: 8081}}},
337
-			proxyPath:    "/dummy/path/to/proxy",
337
+			enableProxy:  true,
338 338
 			busyPortIPv4: 8080,
339 339
 			expPBs: []types.PortBinding{
340 340
 				{Proto: types.TCP, IP: ctrIP4.IP, Port: 80, HostIP: net.IPv4zero, HostPort: 8081, HostPortEnd: 8081},
... ...
@@ -349,7 +342,7 @@ func TestAddPortMappings(t *testing.T) {
349 349
 				{PortBinding: types.PortBinding{Proto: types.TCP, Port: 80, HostIP: net.IPv4zero, HostPort: 8080, HostPortEnd: 8081}},
350 350
 				{PortBinding: types.PortBinding{Proto: types.TCP, Port: 80, HostIP: net.IPv6zero, HostPort: 8080, HostPortEnd: 8081}},
351 351
 			},
352
-			proxyPath:    "/dummy/path/to/proxy",
352
+			enableProxy:  true,
353 353
 			busyPortIPv4: 8080,
354 354
 			expPBs: []types.PortBinding{
355 355
 				{Proto: types.TCP, IP: ctrIP4.IP, Port: 80, HostIP: net.IPv4zero, HostPort: 8081},
... ...
@@ -368,7 +361,7 @@ func TestAddPortMappings(t *testing.T) {
368 368
 				{PortBinding: types.PortBinding{Proto: types.UDP, Port: 81, HostPort: 8080, HostPortEnd: 8083}},
369 369
 				{PortBinding: types.PortBinding{Proto: types.UDP, Port: 82, HostPort: 8080, HostPortEnd: 8083}},
370 370
 			},
371
-			proxyPath:    "/dummy/path/to/proxy",
371
+			enableProxy:  true,
372 372
 			busyPortIPv4: 8082,
373 373
 			expPBs: []types.PortBinding{
374 374
 				{Proto: types.TCP, IP: ctrIP4.IP, Port: 80, HostIP: net.IPv4zero, HostPort: 8080, HostPortEnd: 8080},
... ...
@@ -394,7 +387,7 @@ func TestAddPortMappings(t *testing.T) {
394 394
 				{PortBinding: types.PortBinding{Proto: types.TCP, Port: 81, HostPort: 8080, HostPortEnd: 8082}},
395 395
 				{PortBinding: types.PortBinding{Proto: types.TCP, Port: 82, HostPort: 8080, HostPortEnd: 8082}},
396 396
 			},
397
-			proxyPath:    "/dummy/path/to/proxy",
397
+			enableProxy:  true,
398 398
 			busyPortIPv4: 8081,
399 399
 			expErr:       "failed to bind host port 0.0.0.0:8081",
400 400
 		},
... ...
@@ -405,7 +398,7 @@ func TestAddPortMappings(t *testing.T) {
405 405
 				{PortBinding: types.PortBinding{Proto: types.TCP, HostIP: net.IPv4zero, Port: 80}},
406 406
 				{PortBinding: types.PortBinding{Proto: types.TCP, HostIP: net.IPv6zero, Port: 80}},
407 407
 			},
408
-			proxyPath: "/dummy/path/to/proxy",
408
+			enableProxy: true,
409 409
 			expPBs: []types.PortBinding{
410 410
 				{Proto: types.TCP, IP: ctrIP4.IP, Port: 80, HostIP: net.IPv4zero, HostPort: firstEphemPort},
411 411
 				{Proto: types.TCP, IP: ctrIP4.IP, Port: 80, HostIP: net.IPv6zero, HostPort: firstEphemPort},
... ...
@@ -417,7 +410,7 @@ func TestAddPortMappings(t *testing.T) {
417 417
 			cfg: []portmapperapi.PortBindingReq{
418 418
 				{PortBinding: types.PortBinding{Proto: types.TCP, Port: 80}},
419 419
 			},
420
-			proxyPath:   "/dummy/path/to/proxy",
420
+			enableProxy: true,
421 421
 			noProxy6To4: true,
422 422
 			expPBs: []types.PortBinding{
423 423
 				{Proto: types.TCP, IP: ctrIP4.IP, Port: 80, HostIP: net.IPv4zero, HostPort: firstEphemPort},
... ...
@@ -436,45 +429,45 @@ func TestAddPortMappings(t *testing.T) {
436 436
 			},
437 437
 		},
438 438
 		{
439
-			name:      "default host ip is nonzero v4",
440
-			epAddrV4:  ctrIP4,
441
-			epAddrV6:  ctrIP6,
442
-			cfg:       []portmapperapi.PortBindingReq{{PortBinding: types.PortBinding{Proto: types.TCP, Port: 80}}},
443
-			proxyPath: "/dummy/path/to/proxy",
444
-			defHostIP: newIPNet(t, "127.0.0.1/8").IP,
439
+			name:        "default host ip is nonzero v4",
440
+			epAddrV4:    ctrIP4,
441
+			epAddrV6:    ctrIP6,
442
+			cfg:         []portmapperapi.PortBindingReq{{PortBinding: types.PortBinding{Proto: types.TCP, Port: 80}}},
443
+			enableProxy: true,
444
+			defHostIP:   newIPNet(t, "127.0.0.1/8").IP,
445 445
 			expPBs: []types.PortBinding{
446 446
 				{Proto: types.TCP, IP: ctrIP4.IP, Port: 80, HostIP: newIPNet(t, "127.0.0.1/8").IP, HostPort: firstEphemPort},
447 447
 			},
448 448
 		},
449 449
 		{
450
-			name:      "default host ip is nonzero IPv4-mapped IPv6",
451
-			epAddrV4:  ctrIP4,
452
-			epAddrV6:  ctrIP6,
453
-			cfg:       []portmapperapi.PortBindingReq{{PortBinding: types.PortBinding{Proto: types.TCP, Port: 80}}},
454
-			proxyPath: "/dummy/path/to/proxy",
455
-			defHostIP: newIPNet(t, "::ffff:127.0.0.1/72").IP,
450
+			name:        "default host ip is nonzero IPv4-mapped IPv6",
451
+			epAddrV4:    ctrIP4,
452
+			epAddrV6:    ctrIP6,
453
+			cfg:         []portmapperapi.PortBindingReq{{PortBinding: types.PortBinding{Proto: types.TCP, Port: 80}}},
454
+			enableProxy: true,
455
+			defHostIP:   newIPNet(t, "::ffff:127.0.0.1/72").IP,
456 456
 			expPBs: []types.PortBinding{
457 457
 				{Proto: types.TCP, IP: ctrIP4.IP, Port: 80, HostIP: newIPNet(t, "127.0.0.1/8").IP, HostPort: firstEphemPort},
458 458
 			},
459 459
 		},
460 460
 		{
461
-			name:      "default host ip is v6",
462
-			epAddrV4:  ctrIP4,
463
-			epAddrV6:  ctrIP6,
464
-			cfg:       []portmapperapi.PortBindingReq{{PortBinding: types.PortBinding{Proto: types.TCP, Port: 80}}},
465
-			proxyPath: "/dummy/path/to/proxy",
466
-			defHostIP: net.IPv6zero,
461
+			name:        "default host ip is v6",
462
+			epAddrV4:    ctrIP4,
463
+			epAddrV6:    ctrIP6,
464
+			cfg:         []portmapperapi.PortBindingReq{{PortBinding: types.PortBinding{Proto: types.TCP, Port: 80}}},
465
+			enableProxy: true,
466
+			defHostIP:   net.IPv6zero,
467 467
 			expPBs: []types.PortBinding{
468 468
 				{Proto: types.TCP, IP: ctrIP6.IP, Port: 80, HostIP: net.IPv6zero, HostPort: firstEphemPort},
469 469
 			},
470 470
 		},
471 471
 		{
472
-			name:      "default host ip is nonzero v6",
473
-			epAddrV4:  ctrIP4,
474
-			epAddrV6:  ctrIP6,
475
-			cfg:       []portmapperapi.PortBindingReq{{PortBinding: types.PortBinding{Proto: types.TCP, Port: 80}}},
476
-			proxyPath: "/dummy/path/to/proxy",
477
-			defHostIP: newIPNet(t, "::1/128").IP,
472
+			name:        "default host ip is nonzero v6",
473
+			epAddrV4:    ctrIP4,
474
+			epAddrV6:    ctrIP6,
475
+			cfg:         []portmapperapi.PortBindingReq{{PortBinding: types.PortBinding{Proto: types.TCP, Port: 80}}},
476
+			enableProxy: true,
477
+			defHostIP:   newIPNet(t, "::1/128").IP,
478 478
 			expPBs: []types.PortBinding{
479 479
 				{Proto: types.TCP, IP: ctrIP6.IP, Port: 80, HostIP: newIPNet(t, "::1/128").IP, HostPort: firstEphemPort},
480 480
 			},
... ...
@@ -487,17 +480,17 @@ func TestAddPortMappings(t *testing.T) {
487 487
 				{PortBinding: types.PortBinding{Proto: types.TCP, Port: 80, HostPort: 8080}},
488 488
 				{PortBinding: types.PortBinding{Proto: types.TCP, Port: 22, HostPort: 2222}},
489 489
 			},
490
-			proxyPath: "/dummy/path/to/proxy",
490
+			enableProxy: true,
491 491
 			expPBs: []types.PortBinding{
492 492
 				{Proto: types.TCP, IP: ctrIP4.IP, Port: 22, HostIP: net.IPv4zero, HostPort: 2222},
493 493
 				{Proto: types.TCP, IP: ctrIP6.IP, Port: 22, HostIP: net.IPv6zero, HostPort: 2222},
494 494
 				{Proto: types.TCP, IP: ctrIP4.IP, Port: 80, HostIP: net.IPv4zero, HostPort: 8080},
495 495
 				{Proto: types.TCP, IP: ctrIP6.IP, Port: 80, HostIP: net.IPv6zero, HostPort: 8080},
496 496
 			},
497
-			expReleaseErr: "failed to stop userland proxy for port mapping 0.0.0.0:2222:172.19.0.2:22/tcp: can't stop now\n" +
498
-				"failed to stop userland proxy for port mapping [::]:2222:[fdf8:b88e:bb5c:3483::2]:22/tcp: can't stop now\n" +
499
-				"failed to stop userland proxy for port mapping 0.0.0.0:8080:172.19.0.2:80/tcp: can't stop now\n" +
500
-				"failed to stop userland proxy for port mapping [::]:8080:[fdf8:b88e:bb5c:3483::2]:80/tcp: can't stop now",
497
+			expReleaseErr: "unmapping port binding 0.0.0.0:2222:172.19.0.2:22/tcp: failed to stop userland proxy: can't stop now\n" +
498
+				"unmapping port binding [::]:2222:[fdf8:b88e:bb5c:3483::2]:22/tcp: failed to stop userland proxy: can't stop now\n" +
499
+				"unmapping port binding 0.0.0.0:8080:172.19.0.2:80/tcp: failed to stop userland proxy: can't stop now\n" +
500
+				"unmapping port binding [::]:8080:[fdf8:b88e:bb5c:3483::2]:80/tcp: failed to stop userland proxy: can't stop now",
501 501
 		},
502 502
 		{
503 503
 			name:     "disable nat6",
... ...
@@ -507,8 +500,8 @@ func TestAddPortMappings(t *testing.T) {
507 507
 				{PortBinding: types.PortBinding{Proto: types.TCP, Port: 22}},
508 508
 				{PortBinding: types.PortBinding{Proto: types.TCP, Port: 80}},
509 509
 			},
510
-			proxyPath: "/dummy/path/to/proxy",
511
-			gwMode6:   gwModeRouted,
510
+			enableProxy: true,
511
+			gwMode6:     gwModeRouted,
512 512
 			expPBs: []types.PortBinding{
513 513
 				{Proto: types.TCP, IP: ctrIP4.IP, Port: 22, HostIP: net.IPv4zero, HostPort: firstEphemPort},
514 514
 				{Proto: types.TCP, IP: ctrIP6.IP, Port: 22, HostIP: net.IPv6zero},
... ...
@@ -524,9 +517,9 @@ func TestAddPortMappings(t *testing.T) {
524 524
 				{PortBinding: types.PortBinding{Proto: types.TCP, Port: 22}},
525 525
 				{PortBinding: types.PortBinding{Proto: types.TCP, Port: 80}},
526 526
 			},
527
-			proxyPath: "/dummy/path/to/proxy",
528
-			gwMode6:   gwModeRouted,
529
-			defHostIP: net.IPv6loopback,
527
+			enableProxy: true,
528
+			gwMode6:     gwModeRouted,
529
+			defHostIP:   net.IPv6loopback,
530 530
 			expPBs: []types.PortBinding{
531 531
 				{Proto: types.TCP, IP: ctrIP6.IP, Port: 22, HostIP: net.IPv6zero},
532 532
 				{Proto: types.TCP, IP: ctrIP6.IP, Port: 80, HostIP: net.IPv6zero},
... ...
@@ -540,8 +533,8 @@ func TestAddPortMappings(t *testing.T) {
540 540
 				{PortBinding: types.PortBinding{Proto: types.TCP, Port: 22}},
541 541
 				{PortBinding: types.PortBinding{Proto: types.TCP, Port: 80}},
542 542
 			},
543
-			proxyPath: "/dummy/path/to/proxy",
544
-			gwMode4:   gwModeRouted,
543
+			enableProxy: true,
544
+			gwMode4:     gwModeRouted,
545 545
 			expPBs: []types.PortBinding{
546 546
 				{Proto: types.TCP, IP: ctrIP4.IP, Port: 22, HostIP: net.IPv4zero},
547 547
 				{Proto: types.TCP, IP: ctrIP6.IP, Port: 22, HostIP: net.IPv6zero, HostPort: firstEphemPort},
... ...
@@ -557,9 +550,9 @@ func TestAddPortMappings(t *testing.T) {
557 557
 				{PortBinding: types.PortBinding{Proto: types.TCP, Port: 22}},
558 558
 				{PortBinding: types.PortBinding{Proto: types.TCP, Port: 80}},
559 559
 			},
560
-			proxyPath: "/dummy/path/to/proxy",
561
-			gwMode4:   gwModeRouted,
562
-			gwMode6:   gwModeRouted,
560
+			enableProxy: true,
561
+			gwMode4:     gwModeRouted,
562
+			gwMode6:     gwModeRouted,
563 563
 			expPBs: []types.PortBinding{
564 564
 				{Proto: types.TCP, IP: ctrIP4.IP, Port: 22, HostIP: net.IPv4zero},
565 565
 				{Proto: types.TCP, IP: ctrIP6.IP, Port: 22, HostIP: net.IPv6zero},
... ...
@@ -587,12 +580,12 @@ func TestAddPortMappings(t *testing.T) {
587 587
 			expLogs: []string{"Cannot map from default host binding address to an IPv4-only container because the userland proxy is disabled"},
588 588
 		},
589 589
 		{
590
-			name:      "routed mode specific address",
591
-			epAddrV4:  ctrIP4,
592
-			epAddrV6:  ctrIP6,
593
-			gwMode4:   gwModeRouted,
594
-			gwMode6:   gwModeRouted,
595
-			proxyPath: "/dummy/path/to/proxy",
590
+			name:        "routed mode specific address",
591
+			epAddrV4:    ctrIP4,
592
+			epAddrV6:    ctrIP6,
593
+			gwMode4:     gwModeRouted,
594
+			gwMode6:     gwModeRouted,
595
+			enableProxy: true,
596 596
 			cfg: []portmapperapi.PortBindingReq{
597 597
 				{PortBinding: types.PortBinding{Proto: types.TCP, Port: 22, HostIP: newIPNet(t, "127.0.0.1/8").IP}},
598 598
 				{PortBinding: types.PortBinding{Proto: types.TCP, Port: 22, HostIP: net.IPv6loopback}},
... ...
@@ -607,12 +600,12 @@ func TestAddPortMappings(t *testing.T) {
607 607
 			},
608 608
 		},
609 609
 		{
610
-			name:      "routed4 nat6 with ipv4 default binding",
611
-			epAddrV4:  ctrIP4,
612
-			epAddrV6:  ctrIP6,
613
-			gwMode4:   gwModeRouted,
614
-			defHostIP: newIPNet(t, "127.0.0.1/8").IP,
615
-			proxyPath: "/dummy/path/to/proxy",
610
+			name:        "routed4 nat6 with ipv4 default binding",
611
+			epAddrV4:    ctrIP4,
612
+			epAddrV6:    ctrIP6,
613
+			gwMode4:     gwModeRouted,
614
+			defHostIP:   newIPNet(t, "127.0.0.1/8").IP,
615
+			enableProxy: true,
616 616
 			cfg: []portmapperapi.PortBindingReq{
617 617
 				{PortBinding: types.PortBinding{Proto: types.TCP, Port: 22}},
618 618
 			},
... ...
@@ -621,12 +614,12 @@ func TestAddPortMappings(t *testing.T) {
621 621
 			},
622 622
 		},
623 623
 		{
624
-			name:      "routed4 nat6 with ipv6 default binding",
625
-			epAddrV4:  ctrIP4,
626
-			epAddrV6:  ctrIP6,
627
-			gwMode4:   gwModeRouted,
628
-			defHostIP: net.IPv6loopback,
629
-			proxyPath: "/dummy/path/to/proxy",
624
+			name:        "routed4 nat6 with ipv6 default binding",
625
+			epAddrV4:    ctrIP4,
626
+			epAddrV6:    ctrIP6,
627
+			gwMode4:     gwModeRouted,
628
+			defHostIP:   net.IPv6loopback,
629
+			enableProxy: true,
630 630
 			cfg: []portmapperapi.PortBindingReq{
631 631
 				{PortBinding: types.PortBinding{Proto: types.TCP, Port: 22}},
632 632
 			},
... ...
@@ -672,7 +665,7 @@ func TestAddPortMappings(t *testing.T) {
672 672
 				{PortBinding: types.PortBinding{Proto: types.TCP, Port: 12345, HostPort: 12345, HostPortEnd: 12346}},
673 673
 				{PortBinding: types.PortBinding{Proto: types.TCP, Port: 12345, HostPort: 12345}},
674 674
 			},
675
-			proxyPath: "/dummy/path/to/proxy",
675
+			enableProxy: true,
676 676
 			expPBs: []types.PortBinding{
677 677
 				{Proto: types.TCP, IP: ctrIP4.IP, Port: 12345, HostIP: net.IPv4zero, HostPort: 12345},
678 678
 				{Proto: types.TCP, IP: ctrIP6.IP, Port: 12345, HostIP: net.IPv6zero, HostPort: 12345},
... ...
@@ -693,8 +686,8 @@ func TestAddPortMappings(t *testing.T) {
693 693
 				{PortBinding: types.PortBinding{Proto: types.TCP, Port: 22}},
694 694
 				{PortBinding: types.PortBinding{Proto: types.TCP, Port: 80}},
695 695
 			},
696
-			proxyPath: "/dummy/path/to/proxy",
697
-			rootless:  true,
696
+			enableProxy: true,
697
+			rootless:    true,
698 698
 			expPBs: []types.PortBinding{
699 699
 				{Proto: types.TCP, IP: ctrIP4.IP, Port: 22, HostIP: net.IPv4zero, HostPort: firstEphemPort},
700 700
 				{Proto: types.TCP, IP: ctrIP6.IP, Port: 22, HostIP: net.IPv6zero, HostPort: firstEphemPort},
... ...
@@ -727,17 +720,12 @@ func TestAddPortMappings(t *testing.T) {
727 727
 			useStubFirewaller(t)
728 728
 
729 729
 			// Mock the startProxy function used by the code under test.
730
-			origStartProxy := startProxy
731
-			defer func() { startProxy = origStartProxy }()
732 730
 			proxies := map[proxyCall]bool{} // proxy -> is not stopped
733
-			startProxy = func(pb types.PortBinding,
734
-				proxyPath string,
735
-				listenSock *os.File,
736
-			) (stop func() error, retErr error) {
731
+			startProxy := func(pb types.PortBinding, listenSock *os.File) (stop func() error, retErr error) {
737 732
 				if tc.busyPortIPv4 > 0 && tc.busyPortIPv4 == int(pb.HostPort) && pb.HostIP.To4() != nil {
738 733
 					return nil, errors.New("busy port")
739 734
 				}
740
-				c := newProxyCall(pb.Proto.String(), pb.HostIP, int(pb.HostPort), pb.IP, int(pb.Port), proxyPath)
735
+				c := newProxyCall(pb.Proto.String(), pb.HostIP, int(pb.HostPort), pb.IP, int(pb.Port))
741 736
 				if _, ok := proxies[c]; ok {
742 737
 					return nil, fmt.Errorf("duplicate proxy: %#v", c)
743 738
 				}
... ...
@@ -754,13 +742,6 @@ func TestAddPortMappings(t *testing.T) {
754 754
 				}, nil
755 755
 			}
756 756
 
757
-			// Mock the RootlessKit port driver.
758
-			origNewPortDriverClient := newPortDriverClient
759
-			defer func() { newPortDriverClient = origNewPortDriverClient }()
760
-			newPortDriverClient = func(ctx context.Context) (portDriverClient, error) {
761
-				return newMockPortDriverClient(ctx)
762
-			}
763
-
764 757
 			if len(tc.hostAddrs) > 0 {
765 758
 				dummyLink := &netlink.Bridge{LinkAttrs: netlink.LinkAttrs{Name: "br-dummy"}}
766 759
 				err := netlink.LinkAdd(dummyLink)
... ...
@@ -783,6 +764,21 @@ func TestAddPortMappings(t *testing.T) {
783 783
 				defer ul.Close()
784 784
 			}
785 785
 
786
+			var pdc nat.PortDriverClient
787
+			if tc.rootless {
788
+				pdc = newMockPortDriverClient()
789
+			}
790
+
791
+			pms := &drvregistry.PortMappers{}
792
+			err := nat.Register(pms, nat.Config{
793
+				RlkClient:   pdc,
794
+				EnableProxy: tc.enableProxy,
795
+				StartProxy:  startProxy,
796
+			})
797
+			assert.NilError(t, err)
798
+			err = routed.Register(pms)
799
+			assert.NilError(t, err)
800
+
786 801
 			n := &bridgeNetwork{
787 802
 				config: &networkConfiguration{
788 803
 					BridgeName: "dummybridge",
... ...
@@ -792,26 +788,22 @@ func TestAddPortMappings(t *testing.T) {
792 792
 					GwModeIPv6: tc.gwMode6,
793 793
 				},
794 794
 				bridge: &bridgeInterface{},
795
-				driver: newDriver(storeutils.NewTempStore(t), &drvregistry.PortMappers{}),
795
+				driver: newDriver(storeutils.NewTempStore(t), pms),
796 796
 			}
797 797
 			genericOption := map[string]interface{}{
798 798
 				netlabel.GenericData: &configuration{
799
-					EnableIPTables:      true,
800
-					EnableIP6Tables:     true,
801
-					EnableUserlandProxy: tc.proxyPath != "",
802
-					UserlandProxyPath:   tc.proxyPath,
803
-					Hairpin:             tc.hairpin,
804
-					Rootless:            tc.rootless,
799
+					EnableIPTables:  true,
800
+					EnableIP6Tables: true,
801
+					Hairpin:         tc.hairpin,
805 802
 				},
806 803
 			}
807
-			err := n.driver.configure(genericOption)
804
+			err = n.driver.configure(genericOption)
808 805
 			assert.NilError(t, err)
809 806
 			fwn, err := n.newFirewallerNetwork(context.Background())
810 807
 			assert.NilError(t, err)
811 808
 			assert.Check(t, fwn != nil, "no firewaller network")
812 809
 			n.firewallerNetwork = fwn
813 810
 
814
-			assert.Check(t, is.Equal(n.driver.portDriverClient == nil, !tc.rootless))
815 811
 			expChildIP := func(hostIP net.IP) net.IP {
816 812
 				if !tc.rootless {
817 813
 					return hostIP
... ...
@@ -860,7 +852,7 @@ func TestAddPortMappings(t *testing.T) {
860 860
 			assert.Assert(t, is.Len(pbs, len(tc.expPBs)))
861 861
 
862 862
 			fw := n.driver.firewaller.(*firewaller.StubFirewaller)
863
-			assert.Check(t, is.Equal(fw.Hairpin, tc.proxyPath == ""))
863
+			assert.Check(t, is.Equal(fw.Hairpin, !tc.enableProxy))
864 864
 			assert.Check(t, fw.IPv4)
865 865
 			assert.Check(t, fw.IPv6)
866 866
 
... ...
@@ -903,7 +895,7 @@ func TestAddPortMappings(t *testing.T) {
903 903
 			}
904 904
 
905 905
 			// Check a docker-proxy was started and stopped for each expected port binding.
906
-			if tc.proxyPath != "" {
906
+			if tc.enableProxy {
907 907
 				expProxies := map[proxyCall]bool{}
908 908
 				for _, expPB := range tc.expPBs {
909 909
 					hip := expChildIP(expPB.HostIP)
... ...
@@ -913,7 +905,7 @@ func TestAddPortMappings(t *testing.T) {
913 913
 					}
914 914
 					p := newProxyCall(expPB.Proto.String(),
915 915
 						hip, int(expPB.HostPort),
916
-						expPB.IP, int(expPB.Port), tc.proxyPath)
916
+						expPB.IP, int(expPB.Port))
917 917
 					expProxies[p] = tc.expReleaseErr != ""
918 918
 				}
919 919
 				assert.Check(t, is.DeepEqual(expProxies, proxies))
... ...
@@ -921,8 +913,8 @@ func TestAddPortMappings(t *testing.T) {
921 921
 
922 922
 			// Check the port driver has seen the expected port mappings and no others,
923 923
 			// and that they have all been closed.
924
-			if n.driver.portDriverClient != nil {
925
-				pdc := n.driver.portDriverClient.(*mockPortDriverClient)
924
+			if pdc != nil {
925
+				pdc := pdc.(*mockPortDriverClient)
926 926
 				expPorts := map[mockPortDriverPort]bool{}
927 927
 				for _, expPB := range tc.expPBs {
928 928
 					if expPB.HostPort == 0 {
... ...
@@ -943,18 +935,16 @@ func TestAddPortMappings(t *testing.T) {
943 943
 }
944 944
 
945 945
 // Type for tracking calls to StartProxy.
946
-type proxyCall struct{ proto, host, container, proxyPath string }
946
+type proxyCall struct{ proto, host, container string }
947 947
 
948 948
 func newProxyCall(proto string,
949 949
 	hostIP net.IP, hostPort int,
950 950
 	containerIP net.IP, containerPort int,
951
-	proxyPath string,
952 951
 ) proxyCall {
953 952
 	return proxyCall{
954 953
 		proto:     proto,
955 954
 		host:      fmt.Sprintf("%v:%v", hostIP, hostPort),
956 955
 		container: fmt.Sprintf("%v:%v", containerIP, containerPort),
957
-		proxyPath: proxyPath,
958 956
 	}
959 957
 }
960 958
 
... ...
@@ -975,10 +965,10 @@ type mockPortDriverClient struct {
975 975
 	openPorts map[mockPortDriverPort]bool
976 976
 }
977 977
 
978
-func newMockPortDriverClient(_ context.Context) (*mockPortDriverClient, error) {
978
+func newMockPortDriverClient() *mockPortDriverClient {
979 979
 	return &mockPortDriverClient{
980 980
 		openPorts: map[mockPortDriverPort]bool{},
981
-	}, nil
981
+	}
982 982
 }
983 983
 
984 984
 func (c *mockPortDriverClient) ChildHostIP(hostIP netip.Addr) netip.Addr {
... ...
@@ -1002,3 +992,33 @@ func (c *mockPortDriverClient) AddPort(_ context.Context, proto string, hostIP,
1002 1002
 		return nil
1003 1003
 	}, nil
1004 1004
 }
1005
+
1006
+type stubPortMapper struct {
1007
+	reqs   [][]portmapperapi.PortBindingReq
1008
+	mapped []portmapperapi.PortBinding
1009
+}
1010
+
1011
+func (pm *stubPortMapper) MapPorts(_ context.Context, reqs []portmapperapi.PortBindingReq, _ portmapperapi.Firewaller) ([]portmapperapi.PortBinding, error) {
1012
+	if len(reqs) == 0 {
1013
+		return []portmapperapi.PortBinding{}, nil
1014
+	}
1015
+	pm.reqs = append(pm.reqs, reqs)
1016
+	pbs := sliceutil.Map(reqs, func(req portmapperapi.PortBindingReq) portmapperapi.PortBinding {
1017
+		return portmapperapi.PortBinding{PortBinding: req.PortBinding}
1018
+	})
1019
+	pm.mapped = append(pm.mapped, pbs...)
1020
+	return pbs, nil
1021
+}
1022
+
1023
+func (pm *stubPortMapper) UnmapPorts(_ context.Context, reqs []portmapperapi.PortBinding, _ portmapperapi.Firewaller) error {
1024
+	for _, req := range reqs {
1025
+		idx := slices.IndexFunc(pm.mapped, func(pb portmapperapi.PortBinding) bool {
1026
+			return pb.Equal(&req.PortBinding)
1027
+		})
1028
+		if idx == -1 {
1029
+			return fmt.Errorf("stubPortMapper.UnmapPorts: pb doesn't exist %v", req)
1030
+		}
1031
+		pm.mapped = slices.Delete(pm.mapped, idx, idx)
1032
+	}
1033
+	return nil
1034
+}
... ...
@@ -3,6 +3,7 @@ package libnetwork
3 3
 import (
4 4
 	"context"
5 5
 	"fmt"
6
+	"os"
6 7
 
7 8
 	"github.com/docker/docker/daemon/libnetwork/config"
8 9
 	"github.com/docker/docker/daemon/libnetwork/datastore"
... ...
@@ -14,6 +15,11 @@ import (
14 14
 	"github.com/docker/docker/daemon/libnetwork/drivers/null"
15 15
 	"github.com/docker/docker/daemon/libnetwork/drivers/overlay"
16 16
 	"github.com/docker/docker/daemon/libnetwork/drvregistry"
17
+	"github.com/docker/docker/daemon/libnetwork/internal/rlkclient"
18
+	"github.com/docker/docker/daemon/libnetwork/portmapper"
19
+	"github.com/docker/docker/daemon/libnetwork/portmappers/nat"
20
+	"github.com/docker/docker/daemon/libnetwork/portmappers/routed"
21
+	"github.com/docker/docker/daemon/libnetwork/types"
17 22
 )
18 23
 
19 24
 func registerNetworkDrivers(r driverapi.Registerer, store *datastore.Store, pms *drvregistry.PortMappers, driverConfig func(string) map[string]interface{}) error {
... ...
@@ -45,5 +51,28 @@ func registerNetworkDrivers(r driverapi.Registerer, store *datastore.Store, pms
45 45
 }
46 46
 
47 47
 func registerPortMappers(ctx context.Context, r *drvregistry.PortMappers, cfg *config.Config) error {
48
+	var pdc *rlkclient.PortDriverClient
49
+	if cfg.Rootless {
50
+		var err error
51
+		pdc, err = rlkclient.NewPortDriverClient(ctx)
52
+		if err != nil {
53
+			return fmt.Errorf("failed to create port driver client: %w", err)
54
+		}
55
+	}
56
+
57
+	if err := nat.Register(r, nat.Config{
58
+		RlkClient: pdc,
59
+		StartProxy: func(pb types.PortBinding, file *os.File) (func() error, error) {
60
+			return portmapper.StartProxy(pb, cfg.UserlandProxyPath, file)
61
+		},
62
+		EnableProxy: cfg.EnableUserlandProxy && cfg.UserlandProxyPath != "",
63
+	}); err != nil {
64
+		return fmt.Errorf("registering nat portmapper: %w", err)
65
+	}
66
+
67
+	if err := routed.Register(r); err != nil {
68
+		return fmt.Errorf("registering routed portmapper: %w", err)
69
+	}
70
+
48 71
 	return nil
49 72
 }
... ...
@@ -5,6 +5,7 @@ import (
5 5
 	"net"
6 6
 	"net/netip"
7 7
 	"os"
8
+	"strings"
8 9
 
9 10
 	"github.com/docker/docker/daemon/libnetwork/types"
10 11
 )
... ...
@@ -36,14 +37,12 @@ type PortMapper interface {
36 36
 
37 37
 type PortBindingReq struct {
38 38
 	types.PortBinding
39
+	// Mapper is the name of the port mapper used to process this PortBindingReq.
40
+	Mapper string
39 41
 	// ChildHostIP is a temporary field used to pass the host IP address as
40 42
 	// seen from the daemon. (It'll be removed once the portmapper API is
41 43
 	// implemented).
42 44
 	ChildHostIP net.IP `json:"-"`
43
-	// DisableNAT is a temporary field used to indicate whether the port is
44
-	// mapped on the host or not. (It'll be removed once the portmapper API is
45
-	// implemented).
46
-	DisableNAT bool `json:"-"`
47 45
 }
48 46
 
49 47
 // Compare defines an ordering over PortBindingReq such that bindings that
... ...
@@ -58,11 +57,8 @@ type PortBindingReq struct {
58 58
 //   - same host ports or ranges are adjacent, then
59 59
 //   - ordered by container IP (then host IP, if set).
60 60
 func (pbReq PortBindingReq) Compare(other PortBindingReq) int {
61
-	if pbReq.DisableNAT != other.DisableNAT {
62
-		if pbReq.DisableNAT {
63
-			return 1 // NAT disabled bindings come last
64
-		}
65
-		return -1
61
+	if pbReq.Mapper != other.Mapper {
62
+		return strings.Compare(pbReq.Mapper, other.Mapper)
66 63
 	}
67 64
 	// Exact host port < host port range.
68 65
 	aIsRange := pbReq.HostPort == 0 || pbReq.HostPort != pbReq.HostPortEnd
... ...
@@ -97,6 +93,8 @@ func (pbReq PortBindingReq) Compare(other PortBindingReq) int {
97 97
 
98 98
 type PortBinding struct {
99 99
 	types.PortBinding
100
+	// Mapper is the name of the port mapper used to process this PortBinding.
101
+	Mapper string
100 102
 	// BoundSocket is used to reserve a host port for the binding. If the
101 103
 	// userland proxy is in-use, it's passed to the proxy when the proxy is
102 104
 	// started, then it's closed and set to nil here.
... ...
@@ -24,7 +24,7 @@ func TestPortBindingReqsCompare(t *testing.T) {
24 24
 	assert.Check(t, pb.Compare(pb) == 0) //nolint:gocritic // ignore "dupArg: suspicious method call with the same argument and receiver (gocritic)"
25 25
 
26 26
 	pbA, pbB = pb, pb
27
-	pbB.DisableNAT = true
27
+	pbB.Mapper = "routed"
28 28
 	assert.Check(t, pbA.Compare(pbB) < 0)
29 29
 	assert.Check(t, pbB.Compare(pbA) > 0)
30 30
 
31 31
new file mode 100644
... ...
@@ -0,0 +1,325 @@
0
+package nat
1
+
2
+import (
3
+	"context"
4
+	"errors"
5
+	"fmt"
6
+	"net"
7
+	"net/netip"
8
+	"os"
9
+	"strconv"
10
+	"syscall"
11
+
12
+	"github.com/containerd/log"
13
+	"github.com/docker/docker/daemon/libnetwork/internal/rlkclient"
14
+	"github.com/docker/docker/daemon/libnetwork/portallocator"
15
+	"github.com/docker/docker/daemon/libnetwork/portmapperapi"
16
+	"github.com/docker/docker/daemon/libnetwork/types"
17
+)
18
+
19
+const (
20
+	driverName              = "nat"
21
+	maxAllocatePortAttempts = 10
22
+)
23
+
24
+type PortDriverClient interface {
25
+	ChildHostIP(hostIP netip.Addr) netip.Addr
26
+	AddPort(ctx context.Context, proto string, hostIP, childIP netip.Addr, hostPort int) (func() error, error)
27
+}
28
+
29
+type proxyStarter func(types.PortBinding, *os.File) (func() error, error)
30
+
31
+// Register the "nat" port-mapper with libnetwork.
32
+func Register(r portmapperapi.Registerer, cfg Config) error {
33
+	return r.Register(driverName, NewPortMapper(cfg))
34
+}
35
+
36
+type PortMapper struct {
37
+	// pdc is used to interact with rootlesskit port driver.
38
+	pdc         PortDriverClient
39
+	startProxy  proxyStarter
40
+	enableProxy bool
41
+}
42
+
43
+type Config struct {
44
+	// RlkClient is called by MapPorts to determine the ChildHostIP and ask
45
+	// rootlesskit to map ports in its netns.
46
+	RlkClient   PortDriverClient
47
+	StartProxy  proxyStarter
48
+	EnableProxy bool
49
+}
50
+
51
+func NewPortMapper(cfg Config) PortMapper {
52
+	return PortMapper{
53
+		pdc:         cfg.RlkClient,
54
+		startProxy:  cfg.StartProxy,
55
+		enableProxy: cfg.EnableProxy,
56
+	}
57
+}
58
+
59
+// MapPorts allocates and binds host ports for the given cfg. The caller is
60
+// responsible for ensuring that all entries in cfg map the same proto,
61
+// container port, and host port range (their host addresses must differ).
62
+func (pm PortMapper) MapPorts(ctx context.Context, cfg []portmapperapi.PortBindingReq, fwn portmapperapi.Firewaller) ([]portmapperapi.PortBinding, error) {
63
+	if len(cfg) == 0 {
64
+		return nil, nil
65
+	}
66
+	// Ensure that all of cfg's entries have the same proto and ports.
67
+	proto, port, hostPort, hostPortEnd := cfg[0].Proto, cfg[0].Port, cfg[0].HostPort, cfg[0].HostPortEnd
68
+	for _, c := range cfg[1:] {
69
+		if c.Proto != proto || c.Port != port || c.HostPort != hostPort || c.HostPortEnd != hostPortEnd {
70
+			return nil, types.InternalErrorf("port binding mismatch %d/%s:%d-%d, %d/%s:%d-%d",
71
+				port, proto, hostPort, hostPortEnd,
72
+				port, c.Proto, c.HostPort, c.HostPortEnd)
73
+		}
74
+	}
75
+
76
+	// Try up to maxAllocatePortAttempts times to get a port that's not already allocated.
77
+	var bindings []portmapperapi.PortBinding
78
+	var err error
79
+	for i := 0; i < maxAllocatePortAttempts; i++ {
80
+		bindings, err = pm.attemptBindHostPorts(ctx, cfg, proto, hostPort, hostPortEnd, fwn)
81
+		if err == nil {
82
+			break
83
+		}
84
+		// There is no point in immediately retrying to map an explicitly chosen port.
85
+		if hostPort != 0 && hostPort == hostPortEnd {
86
+			log.G(ctx).WithError(err).Warnf("Failed to allocate and map port")
87
+			return nil, err
88
+		}
89
+		log.G(ctx).WithFields(log.Fields{
90
+			"error":   err,
91
+			"attempt": i + 1,
92
+		}).Warn("Failed to allocate and map port")
93
+	}
94
+
95
+	if err != nil {
96
+		// If the retry budget is exhausted and no free port could be found, return
97
+		// the latest error.
98
+		return nil, err
99
+	}
100
+
101
+	// Start userland proxy processes.
102
+	if pm.enableProxy {
103
+		for i := range bindings {
104
+			if bindings[i].BoundSocket == nil || bindings[i].RootlesskitUnsupported || bindings[i].StopProxy != nil {
105
+				continue
106
+			}
107
+			var err error
108
+			bindings[i].StopProxy, err = pm.startProxy(
109
+				bindings[i].ChildPortBinding(), bindings[i].BoundSocket,
110
+			)
111
+			if err != nil {
112
+				return nil, fmt.Errorf("failed to start userland proxy for port mapping %s: %w",
113
+					bindings[i].PortBinding, err)
114
+			}
115
+			if err := bindings[i].BoundSocket.Close(); err != nil {
116
+				log.G(ctx).WithFields(log.Fields{
117
+					"error":   err,
118
+					"mapping": bindings[i].PortBinding,
119
+				}).Warnf("failed to close proxy socket")
120
+			}
121
+			bindings[i].BoundSocket = nil
122
+		}
123
+	}
124
+
125
+	return bindings, nil
126
+}
127
+
128
+func (pm PortMapper) UnmapPorts(ctx context.Context, pbs []portmapperapi.PortBinding, fwn portmapperapi.Firewaller) error {
129
+	var errs []error
130
+	for _, pb := range pbs {
131
+		if pb.BoundSocket != nil {
132
+			if err := pb.BoundSocket.Close(); err != nil {
133
+				errs = append(errs, fmt.Errorf("failed to close socket for port mapping %s: %w", pb, err))
134
+			}
135
+		}
136
+		if pb.PortDriverRemove != nil {
137
+			if err := pb.PortDriverRemove(); err != nil {
138
+				errs = append(errs, err)
139
+			}
140
+		}
141
+		if pb.StopProxy != nil {
142
+			if err := pb.StopProxy(); err != nil && !errors.Is(err, os.ErrProcessDone) {
143
+				errs = append(errs, fmt.Errorf("failed to stop userland proxy: %w", err))
144
+			}
145
+		}
146
+	}
147
+	if err := fwn.DelPorts(ctx, mergeChildHostIPs(pbs)); err != nil {
148
+		errs = append(errs, err)
149
+	}
150
+	for _, pb := range pbs {
151
+		portallocator.Get().ReleasePort(pb.ChildHostIP, pb.Proto.String(), int(pb.HostPort))
152
+	}
153
+	return errors.Join(errs...)
154
+}
155
+
156
+// attemptBindHostPorts allocates host ports for each NAT port mapping, and
157
+// reserves those ports by binding them.
158
+//
159
+// If the allocator doesn't have an available port in the required range, or the
160
+// port can't be bound (perhaps because another process has already bound it),
161
+// all resources are released and an error is returned. When ports are
162
+// successfully reserved, a PortBinding is returned for each mapping.
163
+func (pm PortMapper) attemptBindHostPorts(
164
+	ctx context.Context,
165
+	cfg []portmapperapi.PortBindingReq,
166
+	proto types.Protocol,
167
+	hostPortStart, hostPortEnd uint16,
168
+	fwn portmapperapi.Firewaller,
169
+) (_ []portmapperapi.PortBinding, retErr error) {
170
+	var err error
171
+	var port int
172
+
173
+	addrs := make([]net.IP, 0, len(cfg))
174
+	for i := range cfg {
175
+		cfg[i] = setChildHostIP(pm.pdc, cfg[i])
176
+		addrs = append(addrs, cfg[i].ChildHostIP)
177
+	}
178
+
179
+	pa := portallocator.NewOSAllocator()
180
+	port, socks, err := pa.RequestPortsInRange(addrs, proto, int(hostPortStart), int(hostPortEnd))
181
+	if err != nil {
182
+		return nil, err
183
+	}
184
+	defer func() {
185
+		if retErr != nil {
186
+			pa.ReleasePorts(addrs, proto, port)
187
+		}
188
+	}()
189
+
190
+	if len(socks) != len(cfg) {
191
+		for _, sock := range socks {
192
+			if err := sock.Close(); err != nil {
193
+				log.G(ctx).WithError(err).Warn("Failed to close socket")
194
+			}
195
+		}
196
+		return nil, types.InternalErrorf("port allocator returned %d sockets for %d port bindings", len(socks), len(cfg))
197
+	}
198
+
199
+	res := make([]portmapperapi.PortBinding, 0, len(cfg))
200
+	defer func() {
201
+		if retErr != nil {
202
+			if err := pm.UnmapPorts(ctx, res, fwn); err != nil {
203
+				log.G(ctx).WithFields(log.Fields{
204
+					"pbs":   res,
205
+					"error": err,
206
+				}).Warn("Failed to release port bindings")
207
+			}
208
+		}
209
+	}()
210
+
211
+	for i := range cfg {
212
+		pb := portmapperapi.PortBinding{
213
+			PortBinding: cfg[i].PortBinding.GetCopy(),
214
+			BoundSocket: socks[i],
215
+			ChildHostIP: cfg[i].ChildHostIP,
216
+		}
217
+		pb.PortBinding.HostPort = uint16(port)
218
+		pb.PortBinding.HostPortEnd = pb.HostPort
219
+		res = append(res, pb)
220
+	}
221
+
222
+	if err := configPortDriver(ctx, res, pm.pdc); err != nil {
223
+		return nil, err
224
+	}
225
+	if err := fwn.AddPorts(ctx, mergeChildHostIPs(res)); err != nil {
226
+		return nil, err
227
+	}
228
+	// Now the firewall rules are set up, it's safe to listen on the socket. (Listening
229
+	// earlier could result in dropped connections if the proxy becomes unreachable due
230
+	// to NAT rules sending packets directly to the container.)
231
+	//
232
+	// If not starting the proxy, nothing will ever accept a connection on the
233
+	// socket. Listen here anyway because SO_REUSEADDR is set, so bind() won't notice
234
+	// the problem if a port's bound to both INADDR_ANY and a specific address. (Also
235
+	// so the binding shows up in "netstat -at".)
236
+	if err := listenBoundPorts(res, pm.enableProxy); err != nil {
237
+		return nil, err
238
+	}
239
+	return res, nil
240
+}
241
+
242
+func setChildHostIP(pdc PortDriverClient, req portmapperapi.PortBindingReq) portmapperapi.PortBindingReq {
243
+	if pdc == nil {
244
+		req.ChildHostIP = req.HostIP
245
+		return req
246
+	}
247
+	hip, _ := netip.AddrFromSlice(req.HostIP)
248
+	req.ChildHostIP = pdc.ChildHostIP(hip).AsSlice()
249
+	return req
250
+}
251
+
252
+// mergeChildHostIPs take a slice of PortBinding and returns a slice of
253
+// types.PortBinding, where the HostIP in each of the results has the
254
+// value of ChildHostIP from the input (if present).
255
+func mergeChildHostIPs(pbs []portmapperapi.PortBinding) []types.PortBinding {
256
+	res := make([]types.PortBinding, 0, len(pbs))
257
+	for _, b := range pbs {
258
+		pb := b.PortBinding
259
+		if b.ChildHostIP != nil {
260
+			pb.HostIP = b.ChildHostIP
261
+		}
262
+		res = append(res, pb)
263
+	}
264
+	return res
265
+}
266
+
267
+// configPortDriver passes the port binding's details to rootlesskit, and updates the
268
+// port binding with callbacks to remove the rootlesskit config (or marks the binding as
269
+// unsupported by rootlesskit).
270
+func configPortDriver(ctx context.Context, pbs []portmapperapi.PortBinding, pdc PortDriverClient) error {
271
+	for i := range pbs {
272
+		b := pbs[i]
273
+		if pdc != nil && b.HostPort != 0 {
274
+			var err error
275
+			hip, ok := netip.AddrFromSlice(b.HostIP)
276
+			if !ok {
277
+				return fmt.Errorf("invalid host IP address in %s", b)
278
+			}
279
+			chip, ok := netip.AddrFromSlice(b.ChildHostIP)
280
+			if !ok {
281
+				return fmt.Errorf("invalid child host IP address %s in %s", b.ChildHostIP, b)
282
+			}
283
+			pbs[i].PortDriverRemove, err = pdc.AddPort(ctx, b.Proto.String(), hip, chip, int(b.HostPort))
284
+			if err != nil {
285
+				var pErr *rlkclient.ProtocolUnsupportedError
286
+				if errors.As(err, &pErr) {
287
+					log.G(ctx).WithFields(log.Fields{
288
+						"error": pErr,
289
+					}).Warnf("discarding request for %q", net.JoinHostPort(hip.String(), strconv.Itoa(int(b.HostPort))))
290
+					pbs[i].RootlesskitUnsupported = true
291
+					continue
292
+				}
293
+				return err
294
+			}
295
+		}
296
+	}
297
+	return nil
298
+}
299
+
300
+func listenBoundPorts(pbs []portmapperapi.PortBinding, proxyEnabled bool) error {
301
+	for i := range pbs {
302
+		if pbs[i].BoundSocket == nil || pbs[i].RootlesskitUnsupported || pbs[i].Proto == types.UDP {
303
+			continue
304
+		}
305
+		rc, err := pbs[i].BoundSocket.SyscallConn()
306
+		if err != nil {
307
+			return fmt.Errorf("raw conn not available on %d socket: %w", pbs[i].Proto, err)
308
+		}
309
+		if errC := rc.Control(func(fd uintptr) {
310
+			somaxconn := 0
311
+			// SCTP sockets do not support somaxconn=0
312
+			if proxyEnabled || pbs[i].Proto == types.SCTP {
313
+				somaxconn = -1 // silently capped to "/proc/sys/net/core/somaxconn"
314
+			}
315
+			err = syscall.Listen(int(fd), somaxconn)
316
+		}); errC != nil {
317
+			return fmt.Errorf("failed to Control %s socket: %w", pbs[i].Proto, err)
318
+		}
319
+		if err != nil {
320
+			return fmt.Errorf("failed to listen on %s socket: %w", pbs[i].Proto, err)
321
+		}
322
+	}
323
+	return nil
324
+}
0 325
new file mode 100644
... ...
@@ -0,0 +1,36 @@
0
+package nat
1
+
2
+import (
3
+	"context"
4
+	"testing"
5
+
6
+	"github.com/docker/docker/daemon/libnetwork/portmapperapi"
7
+	"github.com/docker/docker/daemon/libnetwork/types"
8
+	"gotest.tools/v3/assert"
9
+	is "gotest.tools/v3/assert/cmp"
10
+)
11
+
12
+func TestBindHostPortsError(t *testing.T) {
13
+	cfg := []portmapperapi.PortBindingReq{
14
+		{
15
+			PortBinding: types.PortBinding{
16
+				Proto:       types.TCP,
17
+				Port:        80,
18
+				HostPort:    8080,
19
+				HostPortEnd: 8080,
20
+			},
21
+		},
22
+		{
23
+			PortBinding: types.PortBinding{
24
+				Proto:       types.TCP,
25
+				Port:        80,
26
+				HostPort:    8080,
27
+				HostPortEnd: 8081,
28
+			},
29
+		},
30
+	}
31
+	pm := &PortMapper{}
32
+	pbs, err := pm.MapPorts(context.Background(), cfg, nil)
33
+	assert.Check(t, is.Error(err, "port binding mismatch 80/tcp:8080-8080, 80/tcp:8080-8081"))
34
+	assert.Check(t, is.Nil(pbs))
35
+}
0 36
new file mode 100644
... ...
@@ -0,0 +1,60 @@
0
+// FIXME(thaJeztah): remove once we are a module; the go:build directive prevents go from downgrading language version to go1.16:
1
+//go:build go1.23
2
+
3
+package routed
4
+
5
+import (
6
+	"context"
7
+
8
+	"github.com/containerd/log"
9
+	"github.com/docker/docker/daemon/libnetwork/portmapperapi"
10
+	"github.com/docker/docker/daemon/libnetwork/types"
11
+	"github.com/docker/docker/internal/sliceutil"
12
+)
13
+
14
+const driverName = "routed"
15
+
16
+// Register the "routed" port-mapper with libnetwork.
17
+func Register(r portmapperapi.Registerer) error {
18
+	return r.Register(driverName, NewPortMapper())
19
+}
20
+
21
+type PortMapper struct{}
22
+
23
+func NewPortMapper() PortMapper {
24
+	return PortMapper{}
25
+}
26
+
27
+// MapPorts sets up firewall rules to allow direct remote access to pbs.
28
+func (pm PortMapper) MapPorts(ctx context.Context, reqs []portmapperapi.PortBindingReq, fwn portmapperapi.Firewaller) ([]portmapperapi.PortBinding, error) {
29
+	if len(reqs) == 0 {
30
+		return nil, nil
31
+	}
32
+
33
+	res := make([]portmapperapi.PortBinding, 0, len(reqs))
34
+	bindings := make([]types.PortBinding, 0, len(reqs))
35
+	for _, c := range reqs {
36
+		pb := portmapperapi.PortBinding{PortBinding: c.GetCopy()}
37
+		if pb.HostPort != 0 || pb.HostPortEnd != 0 {
38
+			log.G(ctx).WithFields(log.Fields{"mapping": pb}).Infof(
39
+				"Host port ignored, because NAT is disabled")
40
+			pb.HostPort = 0
41
+			pb.HostPortEnd = 0
42
+		}
43
+		res = append(res, pb)
44
+		bindings = append(bindings, pb.PortBinding)
45
+	}
46
+
47
+	if err := fwn.AddPorts(ctx, bindings); err != nil {
48
+		return nil, err
49
+	}
50
+
51
+	return res, nil
52
+}
53
+
54
+// UnmapPorts removes firewall rules allowing direct remote access to the pbs.
55
+func (pm PortMapper) UnmapPorts(ctx context.Context, pbs []portmapperapi.PortBinding, fwn portmapperapi.Firewaller) error {
56
+	return fwn.DelPorts(ctx, sliceutil.Map(pbs, func(pb portmapperapi.PortBinding) types.PortBinding {
57
+		return pb.PortBinding
58
+	}))
59
+}