Browse code

Create docker-proxy TCP/UDP listener sockets in the daemon

Before commit 4f09af6, when allocating host ports for a new
port mapping, iptables rules were set up then docker-proxy was
started. If the host port was already in-use, docker-proxy exited
with an error, and the iptables rules were removed. That could
potentially interfere with a non-docker service that was already
using the host port for something unrelated.

Commit 4f09af6 swapped that problem for a different one... in
order to check that a port was available before creating iptables
rules, it attempted to start docker-proxy first. If it failed, it
could then try a different host port, without interfering with
any other service. The problem with that is docker-proxy would
start listening before the iptables rules were in place, so it
could accept connections then become unusable because new NAT
rules diverted packets directly to the container. This would leave
the client with a broken connection, causing at-least a delay
while it figured that out and reconnected.

This change creates and binds the socket in the daemon, before
creating iptables rules. If the bind fails, it may try a different
port. When or if the bind succeeds, iptables rules are created,
then the daemon calls listen on the socket. If docker-proxy is
needed, the socket is handed over to it at that point.

In rootless mode, the ports have to be bound to an address in the
rootless network namespace (where dockerd is running). DNAT rules
now use the same address.

If docker-proxy is not needed ("--userland-proxy=false"), the daemon
still listens on TCP sockets as the old dummyProxy would have done.
This makes the socket show up in "netstat" output.

The dummyProxy is no longer needed on Linux. Its job was to bind the
host ports if docker-proxy was disabled, but that's now already
handled by binding the sockets early.

This change doesn't affect SCTP, because it's not currently possible
for docker-proxy to convert the file descriptor into an SCTPListener.
So, docker-proxy is still started early, and the window for lost
connections remains.

If the user has an old docker-proxy in their path and it's given a
listener docker with '-use-listen-fd', it'll fail because of the
unknown option. In this case, the daemon's error message suggests
checking $PATH.

Signed-off-by: Rob Murray <rob.murray@docker.com>

Rob Murray authored on 2024/07/08 21:50:40
Showing 7 changed files
... ...
@@ -704,7 +704,7 @@ func (s *DockerDaemonSuite) TestDaemonIP(c *testing.T) {
704 704
 
705 705
 	out, err := d.Cmd("run", "-d", "-p", "8000:8000", "busybox", "top")
706 706
 	assert.Assert(c, err != nil, "Running a container must fail with an invalid --ip option")
707
-	assert.Equal(c, strings.Contains(out, "Error starting userland proxy"), true)
707
+	assert.Check(c, is.Contains(out, "failed to bind host port for 192.170.1.1"))
708 708
 
709 709
 	ifName := "dummy"
710 710
 	createInterface(c, "dummy", ifName, ipStr)
... ...
@@ -12,6 +12,8 @@ import (
12 12
 	"os"
13 13
 	"slices"
14 14
 	"strconv"
15
+	"syscall"
16
+	"unsafe"
15 17
 
16 18
 	"github.com/containerd/log"
17 19
 	"github.com/docker/docker/libnetwork/iptables"
... ...
@@ -19,10 +21,15 @@ import (
19 19
 	"github.com/docker/docker/libnetwork/portallocator"
20 20
 	"github.com/docker/docker/libnetwork/portmapper"
21 21
 	"github.com/docker/docker/libnetwork/types"
22
+	"github.com/ishidawataru/sctp"
22 23
 )
23 24
 
24 25
 type portBinding struct {
25 26
 	types.PortBinding
27
+	// boundSocket is used to reserve a host port for the binding. If the
28
+	// userland proxy is in-use, it's passed to the proxy when the proxy is
29
+	// started, then it's closed and set to nil here.
30
+	boundSocket *os.File
26 31
 	// childHostIP is the host IP address, as seen from the daemon. This
27 32
 	// is normally the same as PortBinding.HostIP but, in rootless mode, it
28 33
 	// will be an address in the rootless network namespace. RootlessKit
... ...
@@ -41,21 +48,33 @@ type portBinding struct {
41 41
 	stopProxy func() error
42 42
 }
43 43
 
44
+// childPortBinding is pb.PortBinding, with the host address the daemon
45
+// will see - which, in rootless mode, will be an address in the RootlessKit's
46
+// child namespace (see portBinding.childHostIP).
47
+func (pb portBinding) childPortBinding() types.PortBinding {
48
+	res := pb.PortBinding
49
+	res.HostIP = pb.childHostIP
50
+	return res
51
+}
52
+
44 53
 type portBindingReq struct {
45 54
 	types.PortBinding
46 55
 	childHostIP net.IP
47 56
 	disableNAT  bool
48 57
 }
49 58
 
59
+// Allow unit tests to supply a dummy StartProxy.
60
+var startProxy = portmapper.StartProxy
61
+
50 62
 // addPortMappings takes cfg, the configuration for port mappings, selects host
51
-// ports when ranges are given, starts docker-proxy or its dummy to reserve
52
-// host ports, and sets up iptables NAT/forwarding rules as necessary. If
53
-// anything goes wrong, it will undo any work it's done and return an error.
54
-// Otherwise, the returned slice of portBinding has an entry per address
55
-// family (if cfg describes a mapping for 'any' host address, it's expanded
56
-// into mappings for IPv4 and IPv6, because that's how the mapping is presented
57
-// in 'inspect'). HostPort and HostPortEnd in each returned portBinding are set
58
-// to the selected and reserved port.
63
+// ports when ranges are given, binds host ports to check they're available and
64
+// reserve them, starts docker-proxy if required, and sets up iptables
65
+// NAT/forwarding rules as necessary. If anything goes wrong, it will undo any
66
+// work it's done and return an error. Otherwise, the returned slice of
67
+// portBinding has an entry per address family (if cfg describes a mapping for
68
+// 'any' host address, it's expanded into mappings for IPv4 and IPv6, because
69
+// that's how the mapping is presented in 'inspect'). HostPort and HostPortEnd in
70
+// each returned portBinding are set to the selected and reserved port.
59 71
 func (n *bridgeNetwork) addPortMappings(
60 72
 	epAddrV4, epAddrV6 *net.IPNet,
61 73
 	cfg []types.PortBinding,
... ...
@@ -134,15 +153,14 @@ func (n *bridgeNetwork) addPortMappings(
134 134
 			continue
135 135
 		}
136 136
 
137
-		// Allocate a host port, and reserve it by starting docker-proxy for each host
138
-		// address in toBind.
137
+		// Allocate and bind a host port.
139 138
 		newB, err := bindHostPorts(toBind, proxyPath)
140 139
 		if err != nil {
141 140
 			return nil, err
142 141
 		}
143 142
 		bindings = append(bindings, newB...)
144 143
 
145
-		// Reset the collection of bindings now they're bound.
144
+		// Reset toBind now the ports are bound.
146 145
 		toBind = toBind[:0]
147 146
 	}
148 147
 
... ...
@@ -168,6 +186,53 @@ func (n *bridgeNetwork) addPortMappings(
168 168
 		}
169 169
 	}
170 170
 
171
+	// Now the iptables rules are set up, it's safe to start the userland proxy.
172
+	// (If it was started before the iptables rules were created, it may have
173
+	// accepted a connection, then become unreachable due to NAT rules sending
174
+	// packets directly to the container.)
175
+	// If not starting the proxy, nothing will ever accept a connection on the
176
+	// socket. But, listen anyway so that the binding shows up in "netstat -at".
177
+	somaxconn := 0
178
+	if proxyPath != "" {
179
+		somaxconn = -1 // silently capped to "/proc/sys/net/core/somaxconn"
180
+	}
181
+	for i := range bindings {
182
+		if bindings[i].boundSocket == nil {
183
+			continue
184
+		}
185
+		if bindings[i].Proto == types.TCP {
186
+			rc, err := bindings[i].boundSocket.SyscallConn()
187
+			if err != nil {
188
+				return nil, fmt.Errorf("raw conn not available on TCP socket: %w", err)
189
+			}
190
+			if errC := rc.Control(func(fd uintptr) {
191
+				err = syscall.Listen(int(fd), somaxconn)
192
+			}); errC != nil {
193
+				return nil, fmt.Errorf("failed to Control TCP socket: %w", err)
194
+			}
195
+			if err != nil {
196
+				return nil, fmt.Errorf("failed to listen on TCP socket: %w", err)
197
+			}
198
+		}
199
+		if proxyPath != "" {
200
+			var err error
201
+			bindings[i].stopProxy, err = startProxy(
202
+				bindings[i].childPortBinding(), proxyPath, bindings[i].boundSocket,
203
+			)
204
+			if err != nil {
205
+				return nil, fmt.Errorf("failed to start userland proxy for port mapping %s: %w",
206
+					bindings[i].PortBinding, err)
207
+			}
208
+			if err := bindings[i].boundSocket.Close(); err != nil {
209
+				log.G(context.TODO()).WithFields(log.Fields{
210
+					"error":   err,
211
+					"mapping": bindings[i].PortBinding,
212
+				}).Warnf("failed to close proxy socket")
213
+			}
214
+			bindings[i].boundSocket = nil
215
+		}
216
+	}
217
+
171 218
 	return bindings, nil
172 219
 }
173 220
 
... ...
@@ -366,7 +431,7 @@ func setChildHostIP(pdc portDriverClient, req portBindingReq) portBindingReq {
366 366
 	return req
367 367
 }
368 368
 
369
-// bindHostPorts allocates ports and starts docker-proxy for the given cfg. The
369
+// bindHostPorts allocates and binds host ports for the given cfg. The
370 370
 // caller is responsible for ensuring that all entries in cfg map the same proto,
371 371
 // container port, and host port range (their host addresses must differ).
372 372
 func bindHostPorts(cfg []portBindingReq, proxyPath string) ([]portBinding, error) {
... ...
@@ -401,17 +466,13 @@ func bindHostPorts(cfg []portBindingReq, proxyPath string) ([]portBinding, error
401 401
 	return nil, err
402 402
 }
403 403
 
404
-// Allow unit tests to supply a dummy StartProxy.
405
-var startProxy = portmapper.StartProxy
406
-
407 404
 // attemptBindHostPorts allocates host ports for each port mapping that requires
408
-// one, and reserves those ports by starting docker-proxy.
405
+// one, and reserves those ports by binding them.
409 406
 //
410 407
 // If the allocator doesn't have an available port in the required range, or the
411
-// docker-proxy process doesn't start (perhaps because another process has
412
-// already bound the port), all resources are released and an error is returned.
413
-// When ports are successfully reserved, a portBinding is returned for each
414
-// mapping.
408
+// port can't be bound (perhaps because another process has already bound it),
409
+// all resources are released and an error is returned. When ports are
410
+// successfully reserved, a portBinding is returned for each mapping.
415 411
 //
416 412
 // If NAT is disabled for any of the bindings, no host port reservation is
417 413
 // needed. These bindings are included in results, as the container port itself
... ...
@@ -428,7 +489,7 @@ func attemptBindHostPorts(
428 428
 	addrs := make([]net.IP, 0, len(cfg))
429 429
 	for _, c := range cfg {
430 430
 		if !c.disableNAT {
431
-			addrs = append(addrs, c.HostIP)
431
+			addrs = append(addrs, c.childHostIP)
432 432
 		}
433 433
 	}
434 434
 
... ...
@@ -448,31 +509,177 @@ func attemptBindHostPorts(
448 448
 	}
449 449
 
450 450
 	res := make([]portBinding, 0, len(cfg))
451
+	defer func() {
452
+		if retErr != nil {
453
+			for _, pb := range res {
454
+				if pb.boundSocket != nil {
455
+					if err := pb.boundSocket.Close(); err != nil {
456
+						log.G(context.TODO()).Warnf("Failed to close port binding for %s: %s", pb, err)
457
+					}
458
+				}
459
+				// TODO(robmry) - this is only needed because the userland proxy may have
460
+				//  been started for SCTP. If a bound socket is passed to the proxy after
461
+				//  iptables rules have been configured (as it is for TCP/UDP), remove this.
462
+				if pb.stopProxy != nil {
463
+					if err := pb.stopProxy(); err != nil {
464
+						log.G(context.TODO()).Warnf("Failed to stop proxy for %s: %s", pb, err)
465
+					}
466
+				}
467
+			}
468
+		}
469
+	}()
470
+
451 471
 	for _, c := range cfg {
452
-		pb := portBinding{PortBinding: c.GetCopy()}
472
+		var pb portBinding
453 473
 		if c.disableNAT {
474
+			pb = portBinding{PortBinding: c.GetCopy()}
454 475
 			pb.HostPort = 0
476
+			pb.HostPortEnd = 0
455 477
 		} else {
456
-			pb.stopProxy, err = startProxy(c.Proto.String(), c.childHostIP, port, c.IP, int(c.Port), proxyPath)
478
+			switch proto {
479
+			case "tcp":
480
+				pb, err = bindTCPOrUDP(c, port, syscall.SOCK_STREAM, syscall.IPPROTO_TCP)
481
+			case "udp":
482
+				pb, err = bindTCPOrUDP(c, port, syscall.SOCK_DGRAM, syscall.IPPROTO_UDP)
483
+			case "sctp":
484
+				if proxyPath == "" {
485
+					pb, err = bindSCTP(c, port)
486
+				} else {
487
+					// TODO(robmry) - it's not currently possible to pass a bound SCTP port
488
+					//  to the userland proxy, because the proxy is not able to convert the
489
+					//  file descriptor into an sctp.SCTPListener (fd is an unexported member
490
+					//  of the struct, and ListenSCTP is the only constructor).
491
+					//  So, it is possible for the proxy to start listening and accept
492
+					//  connections before iptables rules are created that would bypass
493
+					//  the proxy for external connections.
494
+					//  Remove this and pb.stopProxy() from the cleanup function above if
495
+					//  this is fixed.
496
+					pb, err = startSCTPProxy(c, port, proxyPath)
497
+				}
498
+			default:
499
+				return nil, fmt.Errorf("Unknown addr type: %s", proto)
500
+			}
457 501
 			if err != nil {
458
-				return nil, fmt.Errorf("failed to bind port %s:%d/%s: %w", c.HostIP, port, c.Proto, err)
502
+				return nil, err
459 503
 			}
460
-			defer func() {
461
-				if retErr != nil {
462
-					if err := pb.stopProxy(); err != nil {
463
-						log.G(context.TODO()).Warnf("Failed to stop userland proxy for port mapping %s: %s", pb, err)
464
-					}
465
-				}
466
-			}()
467
-			pb.HostPort = uint16(port)
468 504
 		}
469
-		pb.HostPortEnd = pb.HostPort
470
-		pb.childHostIP = c.childHostIP
471 505
 		res = append(res, pb)
472 506
 	}
473 507
 	return res, nil
474 508
 }
475 509
 
510
+func bindTCPOrUDP(cfg portBindingReq, port, typ, proto int) (_ portBinding, retErr error) {
511
+	pb := portBinding{PortBinding: cfg.PortBinding.GetCopy()}
512
+	pb.HostPort = uint16(port)
513
+	pb.HostPortEnd = pb.HostPort
514
+	pb.childHostIP = cfg.childHostIP
515
+
516
+	var domain int
517
+	var sa syscall.Sockaddr
518
+	if hip := cfg.childHostIP.To4(); hip != nil {
519
+		domain = syscall.AF_INET
520
+		sa4 := syscall.SockaddrInet4{Port: port}
521
+		copy(sa4.Addr[:], hip)
522
+		sa = &sa4
523
+	} else {
524
+		domain = syscall.AF_INET6
525
+		sa6 := syscall.SockaddrInet6{Port: port}
526
+		copy(sa6.Addr[:], cfg.childHostIP)
527
+		sa = &sa6
528
+	}
529
+
530
+	sd, err := syscall.Socket(domain, typ|syscall.SOCK_CLOEXEC, proto)
531
+	if err != nil {
532
+		return portBinding{}, fmt.Errorf("failed to create socket for userland proxy for %s: %w", cfg, err)
533
+	}
534
+	defer func() {
535
+		if retErr != nil {
536
+			syscall.Close(sd)
537
+		}
538
+	}()
539
+
540
+	if domain == syscall.AF_INET6 {
541
+		syscall.SetsockoptInt(sd, syscall.IPPROTO_IPV6, syscall.IPV6_V6ONLY, 1)
542
+	}
543
+	if err := syscall.Bind(sd, sa); err != nil {
544
+		if cfg.HostPort == cfg.HostPortEnd {
545
+			return portBinding{}, fmt.Errorf("failed to bind host port for %s: %w", cfg, err)
546
+		}
547
+		return portBinding{}, fmt.Errorf("failed to bind host port %d for %s: %w", port, cfg, err)
548
+	}
549
+
550
+	pb.boundSocket = os.NewFile(uintptr(sd), "listener")
551
+	if pb.boundSocket == nil {
552
+		return portBinding{}, fmt.Errorf("failed to convert socket for userland proxy for %s", cfg)
553
+	}
554
+	return pb, nil
555
+}
556
+
557
+// bindSCTP is based on sctp.ListenSCTP. The socket is created and bound, but
558
+// does not start listening.
559
+func bindSCTP(cfg portBindingReq, port int) (_ portBinding, retErr error) {
560
+	pb := portBinding{PortBinding: cfg.GetCopy()}
561
+	pb.HostPort = uint16(port)
562
+	pb.HostPortEnd = pb.HostPort
563
+	pb.childHostIP = cfg.childHostIP
564
+
565
+	domain := syscall.AF_INET
566
+	if cfg.childHostIP.To4() == nil {
567
+		domain = syscall.AF_INET6
568
+	}
569
+
570
+	sd, err := syscall.Socket(domain, syscall.SOCK_STREAM|syscall.SOCK_CLOEXEC, syscall.IPPROTO_SCTP)
571
+	if err != nil {
572
+		return portBinding{}, fmt.Errorf("failed to create socket for userland proxy for %s: %w", cfg, err)
573
+	}
574
+	defer func() {
575
+		if retErr != nil {
576
+			syscall.Close(sd)
577
+		}
578
+	}()
579
+
580
+	if domain == syscall.AF_INET6 {
581
+		syscall.SetsockoptInt(sd, syscall.IPPROTO_IPV6, syscall.IPV6_V6ONLY, 1)
582
+	}
583
+
584
+	options := sctp.InitMsg{NumOstreams: sctp.SCTP_MAX_STREAM}
585
+	if _, _, errno := syscall.Syscall6(syscall.SYS_SETSOCKOPT,
586
+		uintptr(sd),
587
+		sctp.SOL_SCTP,
588
+		sctp.SCTP_INITMSG,
589
+		uintptr(unsafe.Pointer(&options)),
590
+		unsafe.Sizeof(options),
591
+		0); errno != 0 {
592
+		return portBinding{}, errno
593
+	}
594
+
595
+	if err := sctp.SCTPBind(sd,
596
+		&sctp.SCTPAddr{IPAddrs: []net.IPAddr{{IP: cfg.childHostIP}}, Port: int(cfg.HostPort)},
597
+		sctp.SCTP_BINDX_ADD_ADDR); err != nil {
598
+		return portBinding{}, fmt.Errorf("failed to bind socket for userland proxy for %s: %w", cfg, err)
599
+	}
600
+
601
+	pb.boundSocket = os.NewFile(uintptr(sd), "listener")
602
+	if pb.boundSocket == nil {
603
+		return portBinding{}, fmt.Errorf("failed to convert socket for userland proxy for %s", cfg)
604
+	}
605
+	return pb, nil
606
+}
607
+
608
+func startSCTPProxy(cfg portBindingReq, port int, proxyPath string) (_ portBinding, retErr error) {
609
+	pb := portBinding{PortBinding: cfg.GetCopy()}
610
+	pb.HostPort = uint16(port)
611
+	pb.HostPortEnd = pb.HostPort
612
+	pb.childHostIP = cfg.childHostIP
613
+
614
+	var err error
615
+	pb.stopProxy, err = startProxy(pb.childPortBinding(), proxyPath, nil)
616
+	if err != nil {
617
+		return portBinding{}, err
618
+	}
619
+	return pb, nil
620
+}
621
+
476 622
 // releasePorts attempts to release all port bindings, does not stop on failure
477 623
 func (n *bridgeNetwork) releasePorts(ep *bridgeEndpoint) error {
478 624
 	n.Lock()
... ...
@@ -486,14 +693,20 @@ func (n *bridgeNetwork) releasePorts(ep *bridgeEndpoint) error {
486 486
 func (n *bridgeNetwork) releasePortBindings(pbs []portBinding) error {
487 487
 	var errs []error
488 488
 	for _, pb := range pbs {
489
-		var errPD, errP error
489
+		var errS, errPD, errP error
490
+		if pb.boundSocket != nil {
491
+			errS = pb.boundSocket.Close()
492
+			if errS != nil {
493
+				errS = fmt.Errorf("failed to close socket for port mapping %s: %w", pb, errS)
494
+			}
495
+		}
490 496
 		if pb.portDriverRemove != nil {
491 497
 			errPD = pb.portDriverRemove()
492 498
 		}
493 499
 		if pb.stopProxy != nil {
494 500
 			errP = pb.stopProxy()
495 501
 			if errP != nil {
496
-				errP = fmt.Errorf("failed to stop docker-proxy for port mapping %s: %w", pb, errP)
502
+				errP = fmt.Errorf("failed to stop userland proxy for port mapping %s: %w", pb, errP)
497 503
 			}
498 504
 		}
499 505
 		errN := n.setPerPortIptables(pb, false)
... ...
@@ -501,9 +714,9 @@ func (n *bridgeNetwork) releasePortBindings(pbs []portBinding) error {
501 501
 			errN = fmt.Errorf("failed to remove iptables rules for port mapping %s: %w", pb, errN)
502 502
 		}
503 503
 		if pb.HostPort > 0 {
504
-			portallocator.Get().ReleasePort(pb.HostIP, pb.Proto.String(), int(pb.HostPort))
504
+			portallocator.Get().ReleasePort(pb.childHostIP, pb.Proto.String(), int(pb.HostPort))
505 505
 		}
506
-		errs = append(errs, errPD, errP, errN)
506
+		errs = append(errs, errS, errPD, errP, errN)
507 507
 	}
508 508
 	return errors.Join(errs...)
509 509
 }
... ...
@@ -545,8 +758,8 @@ func setPerPortNAT(b portBinding, ipv iptables.IPVersion, proxyPath string, brid
545 545
 	// want "0.0.0.0/0". "0/0" is correctly interpreted as "any
546 546
 	// value" by both iptables and ip6tables.
547 547
 	hostIP := "0/0"
548
-	if !b.HostIP.IsUnspecified() {
549
-		hostIP = b.HostIP.String()
548
+	if !b.childHostIP.IsUnspecified() {
549
+		hostIP = b.childHostIP.String()
550 550
 	}
551 551
 	args := []string{
552 552
 		"-p", b.Proto.String(),
... ...
@@ -6,8 +6,10 @@ import (
6 6
 	"fmt"
7 7
 	"net"
8 8
 	"net/netip"
9
+	"os"
9 10
 	"strconv"
10 11
 	"strings"
12
+	"syscall"
11 13
 	"testing"
12 14
 
13 15
 	"github.com/docker/docker/internal/testutils/netnsutils"
... ...
@@ -16,6 +18,7 @@ import (
16 16
 	"github.com/docker/docker/libnetwork/ns"
17 17
 	"github.com/docker/docker/libnetwork/portallocator"
18 18
 	"github.com/docker/docker/libnetwork/types"
19
+	"github.com/vishvananda/netlink"
19 20
 	"gotest.tools/v3/assert"
20 21
 	is "gotest.tools/v3/assert/cmp"
21 22
 )
... ...
@@ -423,6 +426,7 @@ func TestAddPortMappings(t *testing.T) {
423 423
 		proxyPath    string
424 424
 		busyPortIPv4 int
425 425
 		rootless     bool
426
+		hostAddrs    []string
426 427
 
427 428
 		expErr          string
428 429
 		expPBs          []types.PortBinding
... ...
@@ -441,6 +445,7 @@ func TestAddPortMappings(t *testing.T) {
441 441
 				{Proto: types.TCP, Port: 22},
442 442
 				{Proto: types.TCP, Port: 80},
443 443
 			},
444
+			proxyPath: "/dummy/path/to/proxy",
444 445
 			expPBs: []types.PortBinding{
445 446
 				{Proto: types.TCP, IP: ctrIP4.IP, Port: 22, HostIP: net.IPv4zero, HostPort: firstEphemPort},
446 447
 				{Proto: types.TCP, IP: ctrIP6.IP, Port: 22, HostIP: net.IPv6zero, HostPort: firstEphemPort},
... ...
@@ -449,22 +454,24 @@ func TestAddPortMappings(t *testing.T) {
449 449
 			},
450 450
 		},
451 451
 		{
452
-			name:     "specific host port",
453
-			epAddrV4: ctrIP4,
454
-			epAddrV6: ctrIP6,
455
-			cfg:      []types.PortBinding{{Proto: types.TCP, Port: 80, HostPort: 8080}},
452
+			name:      "specific host port",
453
+			epAddrV4:  ctrIP4,
454
+			epAddrV6:  ctrIP6,
455
+			cfg:       []types.PortBinding{{Proto: types.TCP, Port: 80, HostPort: 8080}},
456
+			proxyPath: "/dummy/path/to/proxy",
456 457
 			expPBs: []types.PortBinding{
457 458
 				{Proto: types.TCP, IP: ctrIP4.IP, Port: 80, HostIP: net.IPv4zero, HostPort: 8080, HostPortEnd: 8080},
458 459
 				{Proto: types.TCP, IP: ctrIP6.IP, Port: 80, HostIP: net.IPv6zero, HostPort: 8080, HostPortEnd: 8080},
459 460
 			},
460 461
 		},
461 462
 		{
462
-			name:     "nat explicitly enabled",
463
-			epAddrV4: ctrIP4,
464
-			epAddrV6: ctrIP6,
465
-			cfg:      []types.PortBinding{{Proto: types.TCP, Port: 80, HostPort: 8080}},
466
-			gwMode4:  gwModeNAT,
467
-			gwMode6:  gwModeNAT,
463
+			name:      "nat explicitly enabled",
464
+			epAddrV4:  ctrIP4,
465
+			epAddrV6:  ctrIP6,
466
+			cfg:       []types.PortBinding{{Proto: types.TCP, Port: 80, HostPort: 8080}},
467
+			gwMode4:   gwModeNAT,
468
+			gwMode6:   gwModeNAT,
469
+			proxyPath: "/dummy/path/to/proxy",
468 470
 			expPBs: []types.PortBinding{
469 471
 				{Proto: types.TCP, IP: ctrIP4.IP, Port: 80, HostIP: net.IPv4zero, HostPort: 8080, HostPortEnd: 8080},
470 472
 				{Proto: types.TCP, IP: ctrIP6.IP, Port: 80, HostIP: net.IPv6zero, HostPort: 8080, HostPortEnd: 8080},
... ...
@@ -475,24 +482,27 @@ func TestAddPortMappings(t *testing.T) {
475 475
 			epAddrV4:     ctrIP4,
476 476
 			epAddrV6:     ctrIP6,
477 477
 			cfg:          []types.PortBinding{{Proto: types.TCP, Port: 80, HostPort: 8080}},
478
+			proxyPath:    "/dummy/path/to/proxy",
478 479
 			busyPortIPv4: 8080,
479
-			expErr:       "failed to bind port 0.0.0.0:8080/tcp: busy port",
480
+			expErr:       "failed to bind host port for 0.0.0.0:8080:172.19.0.2:80/tcp: address already in use",
480 481
 		},
481 482
 		{
482
-			name:     "ipv4 mapped container address with specific host port",
483
-			epAddrV4: ctrIP4Mapped,
484
-			epAddrV6: ctrIP6,
485
-			cfg:      []types.PortBinding{{Proto: types.TCP, Port: 80, HostPort: 8080}},
483
+			name:      "ipv4 mapped container address with specific host port",
484
+			epAddrV4:  ctrIP4Mapped,
485
+			epAddrV6:  ctrIP6,
486
+			cfg:       []types.PortBinding{{Proto: types.TCP, Port: 80, HostPort: 8080}},
487
+			proxyPath: "/dummy/path/to/proxy",
486 488
 			expPBs: []types.PortBinding{
487 489
 				{Proto: types.TCP, IP: ctrIP4.IP, Port: 80, HostIP: net.IPv4zero, HostPort: 8080, HostPortEnd: 8080},
488 490
 				{Proto: types.TCP, IP: ctrIP6.IP, Port: 80, HostIP: net.IPv6zero, HostPort: 8080, HostPortEnd: 8080},
489 491
 			},
490 492
 		},
491 493
 		{
492
-			name:     "ipv4 mapped host address with specific host port",
493
-			epAddrV4: ctrIP4,
494
-			epAddrV6: ctrIP6,
495
-			cfg:      []types.PortBinding{{Proto: types.TCP, Port: 80, HostIP: newIPNet(t, "::ffff:127.0.0.1/128").IP, HostPort: 8080}},
494
+			name:      "ipv4 mapped host address with specific host port",
495
+			epAddrV4:  ctrIP4,
496
+			epAddrV6:  ctrIP6,
497
+			cfg:       []types.PortBinding{{Proto: types.TCP, Port: 80, HostIP: newIPNet(t, "::ffff:127.0.0.1/128").IP, HostPort: 8080}},
498
+			proxyPath: "/dummy/path/to/proxy",
496 499
 			expPBs: []types.PortBinding{
497 500
 				{Proto: types.TCP, IP: ctrIP4.IP, Port: 80, HostIP: newIPNet(t, "127.0.0.1/32").IP, HostPort: 8080, HostPortEnd: 8080},
498 501
 			},
... ...
@@ -502,6 +512,7 @@ func TestAddPortMappings(t *testing.T) {
502 502
 			epAddrV4:     ctrIP4,
503 503
 			epAddrV6:     ctrIP6,
504 504
 			cfg:          []types.PortBinding{{Proto: types.TCP, Port: 80, HostPort: 8080, HostPortEnd: 8081}},
505
+			proxyPath:    "/dummy/path/to/proxy",
505 506
 			busyPortIPv4: 8080,
506 507
 			expPBs: []types.PortBinding{
507 508
 				{Proto: types.TCP, IP: ctrIP4.IP, Port: 80, HostIP: net.IPv4zero, HostPort: 8081, HostPortEnd: 8081},
... ...
@@ -516,6 +527,7 @@ func TestAddPortMappings(t *testing.T) {
516 516
 				{Proto: types.TCP, Port: 80, HostIP: net.IPv4zero, HostPort: 8080, HostPortEnd: 8081},
517 517
 				{Proto: types.TCP, Port: 80, HostIP: net.IPv6zero, HostPort: 8080, HostPortEnd: 8081},
518 518
 			},
519
+			proxyPath:    "/dummy/path/to/proxy",
519 520
 			busyPortIPv4: 8080,
520 521
 			expPBs: []types.PortBinding{
521 522
 				{Proto: types.TCP, IP: ctrIP4.IP, Port: 80, HostIP: net.IPv4zero, HostPort: 8081},
... ...
@@ -534,6 +546,7 @@ func TestAddPortMappings(t *testing.T) {
534 534
 				{Proto: types.UDP, Port: 81, HostPort: 8080, HostPortEnd: 8083},
535 535
 				{Proto: types.UDP, Port: 82, HostPort: 8080, HostPortEnd: 8083},
536 536
 			},
537
+			proxyPath:    "/dummy/path/to/proxy",
537 538
 			busyPortIPv4: 8082,
538 539
 			expPBs: []types.PortBinding{
539 540
 				{Proto: types.TCP, IP: ctrIP4.IP, Port: 80, HostIP: net.IPv4zero, HostPort: 8080, HostPortEnd: 8080},
... ...
@@ -559,8 +572,9 @@ func TestAddPortMappings(t *testing.T) {
559 559
 				{Proto: types.TCP, Port: 81, HostPort: 8080, HostPortEnd: 8082},
560 560
 				{Proto: types.TCP, Port: 82, HostPort: 8080, HostPortEnd: 8082},
561 561
 			},
562
+			proxyPath:    "/dummy/path/to/proxy",
562 563
 			busyPortIPv4: 8081,
563
-			expErr:       "failed to bind port 0.0.0.0:8081/tcp: busy port",
564
+			expErr:       "failed to bind host port 8081 for 0.0.0.0:8080-8082:172.19.0.2:82/tcp",
564 565
 		},
565 566
 		{
566 567
 			name:     "map host ipv6 to ipv4 container with proxy",
... ...
@@ -591,9 +605,10 @@ func TestAddPortMappings(t *testing.T) {
591 591
 			epAddrV4:  ctrIP4,
592 592
 			epAddrV6:  ctrIP6,
593 593
 			cfg:       []types.PortBinding{{Proto: types.TCP, Port: 80}},
594
-			defHostIP: newIPNet(t, "10.11.12.13/24").IP,
594
+			proxyPath: "/dummy/path/to/proxy",
595
+			defHostIP: newIPNet(t, "127.0.0.1/8").IP,
595 596
 			expPBs: []types.PortBinding{
596
-				{Proto: types.TCP, IP: ctrIP4.IP, Port: 80, HostIP: newIPNet(t, "10.11.12.13/24").IP, HostPort: firstEphemPort},
597
+				{Proto: types.TCP, IP: ctrIP4.IP, Port: 80, HostIP: newIPNet(t, "127.0.0.1/8").IP, HostPort: firstEphemPort},
597 598
 			},
598 599
 		},
599 600
 		{
... ...
@@ -601,9 +616,10 @@ func TestAddPortMappings(t *testing.T) {
601 601
 			epAddrV4:  ctrIP4,
602 602
 			epAddrV6:  ctrIP6,
603 603
 			cfg:       []types.PortBinding{{Proto: types.TCP, Port: 80}},
604
-			defHostIP: newIPNet(t, "::ffff:10.11.12.13/120").IP,
604
+			proxyPath: "/dummy/path/to/proxy",
605
+			defHostIP: newIPNet(t, "::ffff:127.0.0.1/72").IP,
605 606
 			expPBs: []types.PortBinding{
606
-				{Proto: types.TCP, IP: ctrIP4.IP, Port: 80, HostIP: newIPNet(t, "10.11.12.13/24").IP, HostPort: firstEphemPort},
607
+				{Proto: types.TCP, IP: ctrIP4.IP, Port: 80, HostIP: newIPNet(t, "127.0.0.1/8").IP, HostPort: firstEphemPort},
607 608
 			},
608 609
 		},
609 610
 		{
... ...
@@ -611,6 +627,7 @@ func TestAddPortMappings(t *testing.T) {
611 611
 			epAddrV4:  ctrIP4,
612 612
 			epAddrV6:  ctrIP6,
613 613
 			cfg:       []types.PortBinding{{Proto: types.TCP, Port: 80}},
614
+			proxyPath: "/dummy/path/to/proxy",
614 615
 			defHostIP: net.IPv6zero,
615 616
 			expPBs: []types.PortBinding{
616 617
 				{Proto: types.TCP, IP: ctrIP6.IP, Port: 80, HostIP: net.IPv6zero, HostPort: firstEphemPort},
... ...
@@ -621,6 +638,7 @@ func TestAddPortMappings(t *testing.T) {
621 621
 			epAddrV4:  ctrIP4,
622 622
 			epAddrV6:  ctrIP6,
623 623
 			cfg:       []types.PortBinding{{Proto: types.TCP, Port: 80}},
624
+			proxyPath: "/dummy/path/to/proxy",
624 625
 			defHostIP: newIPNet(t, "::1/128").IP,
625 626
 			expPBs: []types.PortBinding{
626 627
 				{Proto: types.TCP, IP: ctrIP6.IP, Port: 80, HostIP: newIPNet(t, "::1/128").IP, HostPort: firstEphemPort},
... ...
@@ -634,16 +652,17 @@ func TestAddPortMappings(t *testing.T) {
634 634
 				{Proto: types.TCP, Port: 80, HostPort: 8080},
635 635
 				{Proto: types.TCP, Port: 22, HostPort: 2222},
636 636
 			},
637
+			proxyPath: "/dummy/path/to/proxy",
637 638
 			expPBs: []types.PortBinding{
638 639
 				{Proto: types.TCP, IP: ctrIP4.IP, Port: 22, HostIP: net.IPv4zero, HostPort: 2222},
639 640
 				{Proto: types.TCP, IP: ctrIP6.IP, Port: 22, HostIP: net.IPv6zero, HostPort: 2222},
640 641
 				{Proto: types.TCP, IP: ctrIP4.IP, Port: 80, HostIP: net.IPv4zero, HostPort: 8080},
641 642
 				{Proto: types.TCP, IP: ctrIP6.IP, Port: 80, HostIP: net.IPv6zero, HostPort: 8080},
642 643
 			},
643
-			expReleaseErr: "failed to stop docker-proxy for port mapping 0.0.0.0:2222:172.19.0.2:22/tcp: can't stop now\n" +
644
-				"failed to stop docker-proxy for port mapping [::]:2222:[fdf8:b88e:bb5c:3483::2]:22/tcp: can't stop now\n" +
645
-				"failed to stop docker-proxy for port mapping 0.0.0.0:8080:172.19.0.2:80/tcp: can't stop now\n" +
646
-				"failed to stop docker-proxy for port mapping [::]:8080:[fdf8:b88e:bb5c:3483::2]:80/tcp: can't stop now",
644
+			expReleaseErr: "failed to stop userland proxy for port mapping 0.0.0.0:2222:172.19.0.2:22/tcp: can't stop now\n" +
645
+				"failed to stop userland proxy for port mapping [::]:2222:[fdf8:b88e:bb5c:3483::2]:22/tcp: can't stop now\n" +
646
+				"failed to stop userland proxy for port mapping 0.0.0.0:8080:172.19.0.2:80/tcp: can't stop now\n" +
647
+				"failed to stop userland proxy for port mapping [::]:8080:[fdf8:b88e:bb5c:3483::2]:80/tcp: can't stop now",
647 648
 		},
648 649
 		{
649 650
 			name:     "disable nat6",
... ...
@@ -653,7 +672,8 @@ func TestAddPortMappings(t *testing.T) {
653 653
 				{Proto: types.TCP, Port: 22},
654 654
 				{Proto: types.TCP, Port: 80},
655 655
 			},
656
-			gwMode6: gwModeRouted,
656
+			proxyPath: "/dummy/path/to/proxy",
657
+			gwMode6:   gwModeRouted,
657 658
 			expPBs: []types.PortBinding{
658 659
 				{Proto: types.TCP, IP: ctrIP4.IP, Port: 22, HostIP: net.IPv4zero, HostPort: firstEphemPort},
659 660
 				{Proto: types.TCP, IP: ctrIP6.IP, Port: 22, HostIP: net.IPv6zero},
... ...
@@ -669,7 +689,8 @@ func TestAddPortMappings(t *testing.T) {
669 669
 				{Proto: types.TCP, Port: 22},
670 670
 				{Proto: types.TCP, Port: 80},
671 671
 			},
672
-			gwMode4: gwModeRouted,
672
+			proxyPath: "/dummy/path/to/proxy",
673
+			gwMode4:   gwModeRouted,
673 674
 			expPBs: []types.PortBinding{
674 675
 				{Proto: types.TCP, IP: ctrIP4.IP, Port: 22, HostIP: net.IPv4zero},
675 676
 				{Proto: types.TCP, IP: ctrIP6.IP, Port: 22, HostIP: net.IPv6zero, HostPort: firstEphemPort},
... ...
@@ -685,8 +706,9 @@ func TestAddPortMappings(t *testing.T) {
685 685
 				{Proto: types.TCP, Port: 22},
686 686
 				{Proto: types.TCP, Port: 80},
687 687
 			},
688
-			gwMode4: gwModeRouted,
689
-			gwMode6: gwModeRouted,
688
+			proxyPath: "/dummy/path/to/proxy",
689
+			gwMode4:   gwModeRouted,
690
+			gwMode6:   gwModeRouted,
690 691
 			expPBs: []types.PortBinding{
691 692
 				{Proto: types.TCP, IP: ctrIP4.IP, Port: 22, HostIP: net.IPv4zero},
692 693
 				{Proto: types.TCP, IP: ctrIP6.IP, Port: 22, HostIP: net.IPv6zero},
... ...
@@ -695,9 +717,10 @@ func TestAddPortMappings(t *testing.T) {
695 695
 			},
696 696
 		},
697 697
 		{
698
-			name:     "same ports for matching mappings with different host addresses",
699
-			epAddrV4: ctrIP4,
700
-			epAddrV6: ctrIP6,
698
+			name:      "same ports for matching mappings with different host addresses",
699
+			epAddrV4:  ctrIP4,
700
+			epAddrV6:  ctrIP6,
701
+			hostAddrs: []string{"192.168.1.2/24", "fd0c:9167:5b11::2/64", "fd0c:9167:5b11::3/64"},
701 702
 			cfg: []types.PortBinding{
702 703
 				// These two should both get the same host port.
703 704
 				{Proto: types.TCP, Port: 80, HostIP: newIPNet(t, "fd0c:9167:5b11::2/64").IP},
... ...
@@ -711,6 +734,7 @@ func TestAddPortMappings(t *testing.T) {
711 711
 				{Proto: types.TCP, Port: 12345, HostPort: 12345, HostPortEnd: 12346},
712 712
 				{Proto: types.TCP, Port: 12345, HostPort: 12345},
713 713
 			},
714
+			proxyPath: "/dummy/path/to/proxy",
714 715
 			expPBs: []types.PortBinding{
715 716
 				{Proto: types.TCP, IP: ctrIP4.IP, Port: 12345, HostIP: net.IPv4zero, HostPort: 12345},
716 717
 				{Proto: types.TCP, IP: ctrIP6.IP, Port: 12345, HostIP: net.IPv6zero, HostPort: 12345},
... ...
@@ -751,15 +775,14 @@ func TestAddPortMappings(t *testing.T) {
751 751
 			origStartProxy := startProxy
752 752
 			defer func() { startProxy = origStartProxy }()
753 753
 			proxies := map[proxyCall]bool{} // proxy -> is not stopped
754
-			startProxy = func(proto string,
755
-				hostIP net.IP, hostPort int,
756
-				containerIP net.IP, containerPort int,
754
+			startProxy = func(pb types.PortBinding,
757 755
 				proxyPath string,
756
+				listenSock *os.File,
758 757
 			) (stop func() error, retErr error) {
759
-				if tc.busyPortIPv4 > 0 && tc.busyPortIPv4 == hostPort && hostIP.To4() != nil {
758
+				if tc.busyPortIPv4 > 0 && tc.busyPortIPv4 == int(pb.HostPort) && pb.HostIP.To4() != nil {
760 759
 					return nil, errors.New("busy port")
761 760
 				}
762
-				c := newProxyCall(proto, hostIP, hostPort, containerIP, containerPort, proxyPath)
761
+				c := newProxyCall(pb.Proto.String(), pb.HostIP, int(pb.HostPort), pb.IP, int(pb.Port), proxyPath)
763 762
 				if _, ok := proxies[c]; ok {
764 763
 					return nil, fmt.Errorf("duplicate proxy: %#v", c)
765 764
 				}
... ...
@@ -781,6 +804,28 @@ func TestAddPortMappings(t *testing.T) {
781 781
 			defer func() { newPortDriverClient = origNewPortDriverClient }()
782 782
 			newPortDriverClient = func() (portDriverClient, error) { return newMockPortDriverClient() }
783 783
 
784
+			if len(tc.hostAddrs) > 0 {
785
+				dummyLink := &netlink.Bridge{LinkAttrs: netlink.LinkAttrs{Name: "br-dummy"}}
786
+				err := netlink.LinkAdd(dummyLink)
787
+				assert.NilError(t, err)
788
+				for _, addr := range tc.hostAddrs {
789
+					// Add with NODAD so that the address is available immediately.
790
+					err := netlink.AddrAdd(dummyLink,
791
+						&netlink.Addr{IPNet: newIPNet(t, addr), Flags: syscall.IFA_F_NODAD})
792
+					assert.NilError(t, err)
793
+				}
794
+				err = netlink.LinkSetUp(dummyLink)
795
+				assert.NilError(t, err)
796
+			}
797
+			if tc.busyPortIPv4 != 0 {
798
+				tl, err := net.ListenTCP("tcp4", &net.TCPAddr{IP: net.IPv4zero, Port: tc.busyPortIPv4})
799
+				assert.NilError(t, err)
800
+				defer tl.Close()
801
+				ul, err := net.ListenUDP("udp4", &net.UDPAddr{IP: net.IPv4zero, Port: tc.busyPortIPv4})
802
+				assert.NilError(t, err)
803
+				defer ul.Close()
804
+			}
805
+
784 806
 			n := &bridgeNetwork{
785 807
 				config: &networkConfiguration{
786 808
 					BridgeName: "dummybridge",
... ...
@@ -829,20 +874,21 @@ func TestAddPortMappings(t *testing.T) {
829 829
 				var disableNAT bool
830 830
 				var addrM, addrD, addrH string
831 831
 				var ipv iptables.IPVersion
832
+				hip := expChildIP(expPB.HostIP)
832 833
 				if expPB.IP.To4() == nil {
833 834
 					disableNAT = tc.gwMode6.natDisabled()
834 835
 					ipv = iptables.IPv6
835 836
 					addrM = ctrIP6.IP.String() + "/128"
836 837
 					addrD = "[" + ctrIP6.IP.String() + "]"
837
-					addrH = expPB.HostIP.String() + "/128"
838
+					addrH = hip.String() + "/128"
838 839
 				} else {
839 840
 					disableNAT = tc.gwMode4.natDisabled()
840 841
 					ipv = iptables.IPv4
841 842
 					addrM = ctrIP4.IP.String() + "/32"
842 843
 					addrD = ctrIP4.IP.String()
843
-					addrH = expPB.HostIP.String() + "/32"
844
+					addrH = hip.String() + "/32"
844 845
 				}
845
-				if expPB.HostIP.IsUnspecified() {
846
+				if hip.IsUnspecified() {
846 847
 					addrH = "0/0"
847 848
 				}
848 849
 
... ...
@@ -887,19 +933,21 @@ func TestAddPortMappings(t *testing.T) {
887 887
 			}
888 888
 
889 889
 			// Check a docker-proxy was started and stopped for each expected port binding.
890
-			expProxies := map[proxyCall]bool{}
891
-			for _, expPB := range tc.expPBs {
892
-				hip := expChildIP(expPB.HostIP)
893
-				is4 := hip.To4() != nil
894
-				if (is4 && tc.gwMode4.natDisabled()) || (!is4 && tc.gwMode6.natDisabled()) {
895
-					continue
890
+			if tc.proxyPath != "" {
891
+				expProxies := map[proxyCall]bool{}
892
+				for _, expPB := range tc.expPBs {
893
+					hip := expChildIP(expPB.HostIP)
894
+					is4 := hip.To4() != nil
895
+					if (is4 && tc.gwMode4.natDisabled()) || (!is4 && tc.gwMode6.natDisabled()) {
896
+						continue
897
+					}
898
+					p := newProxyCall(expPB.Proto.String(),
899
+						hip, int(expPB.HostPort),
900
+						expPB.IP, int(expPB.Port), tc.proxyPath)
901
+					expProxies[p] = tc.expReleaseErr != ""
896 902
 				}
897
-				p := newProxyCall(expPB.Proto.String(),
898
-					hip, int(expPB.HostPort),
899
-					expPB.IP, int(expPB.Port), tc.proxyPath)
900
-				expProxies[p] = tc.expReleaseErr != ""
903
+				assert.Check(t, is.DeepEqual(expProxies, proxies))
901 904
 			}
902
-			assert.Check(t, is.DeepEqual(expProxies, proxies))
903 905
 
904 906
 			// Check the port driver has seen the expected port mappings and no others,
905 907
 			// and that they have all been closed.
... ...
@@ -99,7 +99,8 @@ func (c *PortDriverClient) AddPort(
99 99
 	hostIP netip.Addr,
100 100
 	childIP netip.Addr,
101 101
 	hostPort int,
102
-) (func() error, error) { // proto is like "tcp", but we need to convert it to "tcp4" or "tcp6" explicitly
102
+) (func() error, error) {
103
+	// proto is like "tcp", but we need to convert it to "tcp4" or "tcp6" explicitly
103 104
 	// for libnetwork >= 20201216
104 105
 	//
105 106
 	// See https://github.com/moby/libnetwork/pull/2604/files#diff-8fa48beed55dd033bf8e4f8c40b31cf69d0b2cc5d4bb53cde8594670ea6c938aR20
106 107
deleted file mode 100644
... ...
@@ -1,85 +0,0 @@
1
-package portmapper
2
-
3
-import (
4
-	"fmt"
5
-	"io"
6
-	"net"
7
-
8
-	"github.com/ishidawataru/sctp"
9
-)
10
-
11
-// ipVersion refers to IP version - v4 or v6
12
-type ipVersion string
13
-
14
-const (
15
-	// IPv4 is version 4
16
-	ipv4 ipVersion = "4"
17
-	// IPv4 is version 6
18
-	ipv6 ipVersion = "6"
19
-)
20
-
21
-// dummyProxy just listen on some port, it is needed to prevent accidental
22
-// port allocations on bound port, because without userland proxy we using
23
-// iptables rules and not net.Listen
24
-type dummyProxy struct {
25
-	listener  io.Closer
26
-	addr      net.Addr
27
-	ipVersion ipVersion
28
-}
29
-
30
-func newDummyProxy(proto string, hostIP net.IP, hostPort int) (stop func() error, retErr error) {
31
-	// detect version of hostIP to bind only to correct version
32
-	version := ipv4
33
-	if hostIP.To4() == nil {
34
-		version = ipv6
35
-	}
36
-	var addr net.Addr
37
-	switch proto {
38
-	case "tcp":
39
-		addr = &net.TCPAddr{IP: hostIP, Port: hostPort}
40
-	case "udp":
41
-		addr = &net.UDPAddr{IP: hostIP, Port: hostPort}
42
-	case "sctp":
43
-		addr = &sctp.SCTPAddr{IPAddrs: []net.IPAddr{{IP: hostIP}}, Port: hostPort}
44
-	default:
45
-		return nil, fmt.Errorf("Unknown addr type: %s", proto)
46
-	}
47
-	p := &dummyProxy{addr: addr, ipVersion: version}
48
-	if err := p.start(); err != nil {
49
-		return nil, err
50
-	}
51
-	return p.stop, nil
52
-}
53
-
54
-func (p *dummyProxy) start() error {
55
-	switch addr := p.addr.(type) {
56
-	case *net.TCPAddr:
57
-		l, err := net.ListenTCP("tcp"+string(p.ipVersion), addr)
58
-		if err != nil {
59
-			return err
60
-		}
61
-		p.listener = l
62
-	case *net.UDPAddr:
63
-		l, err := net.ListenUDP("udp"+string(p.ipVersion), addr)
64
-		if err != nil {
65
-			return err
66
-		}
67
-		p.listener = l
68
-	case *sctp.SCTPAddr:
69
-		l, err := sctp.ListenSCTP("sctp"+string(p.ipVersion), addr)
70
-		if err != nil {
71
-			return err
72
-		}
73
-		p.listener = l
74
-	default:
75
-		return fmt.Errorf("Unknown addr type: %T", p.addr)
76
-	}
77
-	return nil
78
-}
79
-
80
-func (p *dummyProxy) stop() error {
81
-	if p.listener != nil {
82
-		return p.listener.Close()
83
-	}
84
-	return nil
85
-}
... ...
@@ -1,78 +1,61 @@
1 1
 package portmapper
2 2
 
3 3
 import (
4
+	"errors"
4 5
 	"fmt"
5 6
 	"io"
6
-	"net"
7 7
 	"os"
8 8
 	"os/exec"
9 9
 	"runtime"
10 10
 	"strconv"
11 11
 	"syscall"
12 12
 	"time"
13
-)
14 13
 
15
-// StartProxy starts the proxy process at proxyPath, or instantiates a dummy proxy
16
-// to bind the host port if proxyPath is the empty string.
17
-func StartProxy(
18
-	proto string,
19
-	hostIP net.IP, hostPort int,
20
-	containerIP net.IP, containerPort int,
21
-	proxyPath string,
22
-) (stop func() error, retErr error) {
23
-	if proxyPath == "" {
24
-		return newDummyProxy(proto, hostIP, hostPort)
25
-	}
26
-	return newProxyCommand(proto, hostIP, hostPort, containerIP, containerPort, proxyPath)
27
-}
14
+	"github.com/docker/docker/libnetwork/types"
15
+)
28 16
 
29
-func newProxyCommand(
30
-	proto string,
31
-	hostIP net.IP, hostPort int,
32
-	containerIP net.IP, containerPort int,
17
+// StartProxy starts the proxy process at proxyPath.
18
+// If listenSock is not nil, it must be a bound socket that can be passed to
19
+// the proxy process for it to listen on.
20
+func StartProxy(pb types.PortBinding,
33 21
 	proxyPath string,
22
+	listenSock *os.File,
34 23
 ) (stop func() error, retErr error) {
35 24
 	if proxyPath == "" {
36 25
 		return nil, fmt.Errorf("no path provided for userland-proxy binary")
37 26
 	}
27
+	r, w, err := os.Pipe()
28
+	if err != nil {
29
+		return nil, fmt.Errorf("proxy unable to open os.Pipe %s", err)
30
+	}
31
+	defer func() {
32
+		if w != nil {
33
+			w.Close()
34
+		}
35
+		r.Close()
36
+	}()
38 37
 
39
-	p := &proxyCommand{
40
-		cmd: &exec.Cmd{
41
-			Path: proxyPath,
42
-			Args: []string{
43
-				proxyPath,
44
-				"-proto", proto,
45
-				"-host-ip", hostIP.String(),
46
-				"-host-port", strconv.Itoa(hostPort),
47
-				"-container-ip", containerIP.String(),
48
-				"-container-port", strconv.Itoa(containerPort),
49
-			},
50
-			SysProcAttr: &syscall.SysProcAttr{
51
-				Pdeathsig: syscall.SIGTERM, // send a sigterm to the proxy if the creating thread in the daemon process dies (https://go.dev/issue/27505)
52
-			},
38
+	cmd := &exec.Cmd{
39
+		Path: proxyPath,
40
+		Args: []string{
41
+			proxyPath,
42
+			"-proto", pb.Proto.String(),
43
+			"-host-ip", pb.HostIP.String(),
44
+			"-host-port", strconv.FormatUint(uint64(pb.HostPort), 10),
45
+			"-container-ip", pb.IP.String(),
46
+			"-container-port", strconv.FormatUint(uint64(pb.Port), 10),
47
+		},
48
+		ExtraFiles: []*os.File{w},
49
+		SysProcAttr: &syscall.SysProcAttr{
50
+			Pdeathsig: syscall.SIGTERM, // send a sigterm to the proxy if the creating thread in the daemon process dies (https://go.dev/issue/27505)
53 51
 		},
54
-		wait: make(chan error, 1),
55 52
 	}
56
-	if err := p.start(); err != nil {
57
-		return nil, err
53
+	if listenSock != nil {
54
+		cmd.Args = append(cmd.Args, "-use-listen-fd")
55
+		cmd.ExtraFiles = append(cmd.ExtraFiles, listenSock)
58 56
 	}
59
-	return p.stop, nil
60
-}
61 57
 
62
-// proxyCommand wraps an exec.Cmd to run the userland TCP and UDP
63
-// proxies as separate processes.
64
-type proxyCommand struct {
65
-	cmd  *exec.Cmd
66
-	wait chan error
67
-}
68
-
69
-func (p *proxyCommand) start() error {
70
-	r, w, err := os.Pipe()
71
-	if err != nil {
72
-		return fmt.Errorf("proxy unable to open os.Pipe %s", err)
73
-	}
74
-	defer r.Close()
75
-	p.cmd.ExtraFiles = []*os.File{w}
58
+	wait := make(chan error, 1)
76 59
 
77 60
 	// As p.cmd.SysProcAttr.Pdeathsig is set, the signal will be sent to the
78 61
 	// process when the OS thread on which p.cmd.Start() was executed dies.
... ...
@@ -88,17 +71,18 @@ func (p *proxyCommand) start() error {
88 88
 	go func() {
89 89
 		runtime.LockOSThread()
90 90
 		defer runtime.UnlockOSThread()
91
-		err := p.cmd.Start()
91
+		err := cmd.Start()
92 92
 		started <- err
93 93
 		if err != nil {
94 94
 			return
95 95
 		}
96
-		p.wait <- p.cmd.Wait()
96
+		wait <- cmd.Wait()
97 97
 	}()
98 98
 	if err := <-started; err != nil {
99
-		return err
99
+		return nil, err
100 100
 	}
101 101
 	w.Close()
102
+	w = nil
102 103
 
103 104
 	errchan := make(chan error, 1)
104 105
 	go func() {
... ...
@@ -108,11 +92,16 @@ func (p *proxyCommand) start() error {
108 108
 		if string(buf) != "0\n" {
109 109
 			errStr, err := io.ReadAll(r)
110 110
 			if err != nil {
111
-				errchan <- fmt.Errorf("Error reading exit status from userland proxy: %v", err)
111
+				errchan <- fmt.Errorf("error reading exit status from userland proxy: %v", err)
112 112
 				return
113 113
 			}
114
-
115
-			errchan <- fmt.Errorf("Error starting userland proxy: %s", errStr)
114
+			// If the user has an old docker-proxy in their PATH, and we passed "-use-listen-fd"
115
+			// on the command line, it exits with no response on the pipe.
116
+			if listenSock != nil && buf[0] == 0 && len(errStr) == 0 {
117
+				errchan <- errors.New("failed to start docker-proxy, check that the current version is in your $PATH")
118
+				return
119
+			}
120
+			errchan <- fmt.Errorf("error starting userland proxy: %s", errStr)
116 121
 			return
117 122
 		}
118 123
 		errchan <- nil
... ...
@@ -120,18 +109,21 @@ func (p *proxyCommand) start() error {
120 120
 
121 121
 	select {
122 122
 	case err := <-errchan:
123
-		return err
123
+		if err != nil {
124
+			return nil, err
125
+		}
124 126
 	case <-time.After(16 * time.Second):
125
-		return fmt.Errorf("Timed out proxy starting the userland proxy")
127
+		return nil, fmt.Errorf("timed out starting the userland proxy")
126 128
 	}
127
-}
128 129
 
129
-func (p *proxyCommand) stop() error {
130
-	if p.cmd.Process != nil {
131
-		if err := p.cmd.Process.Signal(os.Interrupt); err != nil {
130
+	stopFn := func() error {
131
+		if cmd.Process == nil {
132
+			return nil
133
+		}
134
+		if err := cmd.Process.Signal(os.Interrupt); err != nil {
132 135
 			return err
133 136
 		}
134
-		return <-p.wait
137
+		return <-wait
135 138
 	}
136
-	return nil
139
+	return stopFn, nil
137 140
 }
138 141
new file mode 100644
... ...
@@ -0,0 +1,85 @@
0
+package portmapper
1
+
2
+import (
3
+	"fmt"
4
+	"io"
5
+	"net"
6
+
7
+	"github.com/ishidawataru/sctp"
8
+)
9
+
10
+// ipVersion refers to IP version - v4 or v6
11
+type ipVersion string
12
+
13
+const (
14
+	// IPv4 is version 4
15
+	ipv4 ipVersion = "4"
16
+	// IPv4 is version 6
17
+	ipv6 ipVersion = "6"
18
+)
19
+
20
+// dummyProxy just listen on some port, it is needed to prevent accidental
21
+// port allocations on bound port, because without userland proxy we using
22
+// iptables rules and not net.Listen
23
+type dummyProxy struct {
24
+	listener  io.Closer
25
+	addr      net.Addr
26
+	ipVersion ipVersion
27
+}
28
+
29
+func newDummyProxy(proto string, hostIP net.IP, hostPort int) (stop func() error, retErr error) {
30
+	// detect version of hostIP to bind only to correct version
31
+	version := ipv4
32
+	if hostIP.To4() == nil {
33
+		version = ipv6
34
+	}
35
+	var addr net.Addr
36
+	switch proto {
37
+	case "tcp":
38
+		addr = &net.TCPAddr{IP: hostIP, Port: hostPort}
39
+	case "udp":
40
+		addr = &net.UDPAddr{IP: hostIP, Port: hostPort}
41
+	case "sctp":
42
+		addr = &sctp.SCTPAddr{IPAddrs: []net.IPAddr{{IP: hostIP}}, Port: hostPort}
43
+	default:
44
+		return nil, fmt.Errorf("Unknown addr type: %s", proto)
45
+	}
46
+	p := &dummyProxy{addr: addr, ipVersion: version}
47
+	if err := p.start(); err != nil {
48
+		return nil, err
49
+	}
50
+	return p.stop, nil
51
+}
52
+
53
+func (p *dummyProxy) start() error {
54
+	switch addr := p.addr.(type) {
55
+	case *net.TCPAddr:
56
+		l, err := net.ListenTCP("tcp"+string(p.ipVersion), addr)
57
+		if err != nil {
58
+			return err
59
+		}
60
+		p.listener = l
61
+	case *net.UDPAddr:
62
+		l, err := net.ListenUDP("udp"+string(p.ipVersion), addr)
63
+		if err != nil {
64
+			return err
65
+		}
66
+		p.listener = l
67
+	case *sctp.SCTPAddr:
68
+		l, err := sctp.ListenSCTP("sctp"+string(p.ipVersion), addr)
69
+		if err != nil {
70
+			return err
71
+		}
72
+		p.listener = l
73
+	default:
74
+		return fmt.Errorf("Unknown addr type: %T", p.addr)
75
+	}
76
+	return nil
77
+}
78
+
79
+func (p *dummyProxy) stop() error {
80
+	if p.listener != nil {
81
+		return p.listener.Close()
82
+	}
83
+	return nil
84
+}