Browse code

libnet/d/bridge: pass SCTP sock to the proxy

Since commit b3fabedec, the bridge driver maps ports following a 3-step
process: 1. create a socket, and bind it to the host port; 2. create
iptables rules; 3. start the userland proxy (if it's enabled). This
ensures that the port is really free before inserting iptables rules
that could otherwise disrupt host services.

However, this 3-step process wasn't implemented for SCTP, because we had
no way to instiantiate an SCTP listener from an fd. Since
github.com/ishidawataru/sctp@4719921f9, we can.

Signed-off-by: Albin Kerouanton <albinker@gmail.com>

Albin Kerouanton authored on 2025/07/02 07:15:23
Showing 3 changed files
... ...
@@ -1,7 +1,6 @@
1 1
 package main
2 2
 
3 3
 import (
4
-	"errors"
5 4
 	"flag"
6 5
 	"fmt"
7 6
 	"net"
... ...
@@ -128,18 +127,18 @@ func newProxy(config ProxyConfig) (p Proxy, err error) {
128 128
 		p, err = NewUDPProxy(listener, container, ipv)
129 129
 	case "sctp":
130 130
 		var listener *sctp.SCTPListener
131
-		if config.ListenSock != nil {
132
-			// There's no way to construct an SCTPListener from a file descriptor at the moment.
133
-			// If a socket has been passed in, it's probably from a newer daemon using a version
134
-			// of the sctp module that does allow it.
135
-			return nil, errors.New("cannot use supplied SCTP socket, check the latest docker-proxy is in your $PATH")
131
+		if config.ListenSock == nil {
132
+			hostAddr := &sctp.SCTPAddr{IPAddrs: []net.IPAddr{{IP: config.HostIP}}, Port: config.HostPort}
133
+			listener, err = sctp.ListenSCTP("sctp"+string(ipv), hostAddr)
134
+			if err != nil {
135
+				return nil, fmt.Errorf("failed to listen on %s: %w", hostAddr, err)
136
+			}
137
+		} else {
138
+			if listener, err = sctp.FileListener(config.ListenSock); err != nil {
139
+				return nil, err
140
+			}
136 141
 		}
137
-		hostAddr := &sctp.SCTPAddr{IPAddrs: []net.IPAddr{{IP: config.HostIP}}, Port: config.HostPort}
138 142
 		container := &sctp.SCTPAddr{IPAddrs: []net.IPAddr{{IP: config.ContainerIP}}, Port: config.ContainerPort}
139
-		listener, err = sctp.ListenSCTP("sctp"+string(ipv), hostAddr)
140
-		if err != nil {
141
-			return nil, fmt.Errorf("failed to listen on %s: %w", hostAddr, err)
142
-		}
143 143
 		p, err = NewSCTPProxy(listener, container)
144 144
 	default:
145 145
 		return nil, fmt.Errorf("unsupported protocol %s", config.Proto)
... ...
@@ -179,7 +178,11 @@ func parseFlags() ProxyConfig {
179 179
 	}
180 180
 
181 181
 	if useListenFd {
182
-		_ = syscall.SetNonblock(int(listenSockFd), true)
182
+		// Unlike the stdlib, passing a non-blocking socket to `sctp.FileListener`
183
+		// will result in a non-blocking Accept(). So, do not set this flag for SCTP.
184
+		if config.Proto != "sctp" {
185
+			_ = syscall.SetNonblock(int(listenSockFd), true)
186
+		}
183 187
 		config.ListenSock = os.NewFile(listenSockFd, "listen-sock")
184 188
 	}
185 189
 
... ...
@@ -13,6 +13,7 @@ import (
13 13
 	"time"
14 14
 
15 15
 	"github.com/ishidawataru/sctp"
16
+	"golang.org/x/sys/unix"
16 17
 	"gotest.tools/v3/assert"
17 18
 )
18 19
 
... ...
@@ -155,6 +156,65 @@ func udpListener(t *testing.T, nw string, addr *net.UDPAddr) (*os.File, *net.UDP
155 155
 	return osFile, l.LocalAddr().(*net.UDPAddr)
156 156
 }
157 157
 
158
+func sctpListener(t *testing.T, nw string, addr *sctp.SCTPAddr) (*os.File, *sctp.SCTPAddr) {
159
+	t.Helper()
160
+
161
+	var domain int
162
+	var sa unix.Sockaddr
163
+	switch nw {
164
+	case "sctp4":
165
+		domain = unix.AF_INET
166
+		sa = &unix.SockaddrInet4{
167
+			Addr: [4]uint8(addr.IPAddrs[0].IP.To4()),
168
+			Port: addr.Port,
169
+		}
170
+	case "sctp6":
171
+		domain = unix.AF_INET6
172
+		sa = &unix.SockaddrInet6{
173
+			Addr: [16]uint8(addr.IPAddrs[0].IP.To16()),
174
+			Port: addr.Port,
175
+		}
176
+	default:
177
+		t.Fatalf("unknown SCTP network type: %s", nw)
178
+	}
179
+
180
+	sockfd, err := unix.Socket(domain, unix.SOCK_STREAM|unix.SOCK_CLOEXEC, unix.IPPROTO_SCTP)
181
+	assert.NilError(t, err)
182
+
183
+	err = unix.Bind(sockfd, sa)
184
+	assert.NilError(t, err)
185
+
186
+	err = unix.Listen(sockfd, -1)
187
+	assert.NilError(t, err)
188
+
189
+	newfd, _, sysErr := unix.Syscall(unix.SYS_FCNTL, uintptr(sockfd), unix.F_DUPFD_CLOEXEC, 0)
190
+	if sysErr != 0 {
191
+		t.Fatal(os.NewSyscallError("fcntl", sysErr))
192
+	}
193
+
194
+	err = unix.Close(sockfd)
195
+	assert.NilError(t, err)
196
+
197
+	sockname, err := unix.Getsockname(int(newfd))
198
+	assert.NilError(t, err)
199
+
200
+	var laddr *sctp.SCTPAddr
201
+	switch sa := sockname.(type) {
202
+	case *unix.SockaddrInet4:
203
+		laddr = &sctp.SCTPAddr{
204
+			IPAddrs: []net.IPAddr{{IP: sa.Addr[:]}},
205
+			Port:    sa.Port,
206
+		}
207
+	case *unix.SockaddrInet6:
208
+		laddr = &sctp.SCTPAddr{
209
+			IPAddrs: []net.IPAddr{{IP: sa.Addr[:]}},
210
+			Port:    sa.Port,
211
+		}
212
+	}
213
+
214
+	return os.NewFile(newfd, ""), laddr
215
+}
216
+
158 217
 func testProxyAt(t *testing.T, proto string, proxy Proxy, addr string, halfClose bool) {
159 218
 	t.Helper()
160 219
 	defer proxy.Close()
... ...
@@ -414,3 +474,41 @@ func TestSCTP6ProxyNoListener(t *testing.T) {
414 414
 	assert.NilError(t, err)
415 415
 	testProxyAt(t, "sctp", proxy, fmt.Sprintf("[%s]:%d", config.HostIP, config.HostPort), false)
416 416
 }
417
+
418
+func TestSCTP4Proxy(t *testing.T) {
419
+	backend := NewEchoServer(t, "sctp", "127.0.0.1:0", EchoServerOptions{})
420
+	defer backend.Close()
421
+	backend.Run()
422
+	listener, frontendAddr := sctpListener(t, "sctp4", &sctp.SCTPAddr{IPAddrs: []net.IPAddr{{IP: net.ParseIP("127.0.0.1")}}, Port: 0})
423
+	backendAddr := backend.LocalAddr().(*sctp.SCTPAddr)
424
+	config := ProxyConfig{
425
+		Proto:         "sctp",
426
+		HostIP:        frontendAddr.IPAddrs[0].IP,
427
+		HostPort:      frontendAddr.Port,
428
+		ContainerIP:   backendAddr.IPAddrs[0].IP,
429
+		ContainerPort: backendAddr.Port,
430
+		ListenSock:    listener,
431
+	}
432
+	proxy, err := newProxy(config)
433
+	assert.NilError(t, err)
434
+	testProxyAt(t, "sctp", proxy, fmt.Sprintf("%s:%d", config.HostIP, config.HostPort), false)
435
+}
436
+
437
+func TestSCTP6Proxy(t *testing.T) {
438
+	backend := NewEchoServer(t, "sctp", "[::1]:0", EchoServerOptions{})
439
+	defer backend.Close()
440
+	backend.Run()
441
+	listener, frontendAddr := sctpListener(t, "sctp6", &sctp.SCTPAddr{IPAddrs: []net.IPAddr{{IP: net.ParseIP("::1")}}, Port: 0})
442
+	backendAddr := backend.LocalAddr().(*sctp.SCTPAddr)
443
+	config := ProxyConfig{
444
+		Proto:         "sctp",
445
+		HostIP:        frontendAddr.IPAddrs[0].IP,
446
+		HostPort:      frontendAddr.Port,
447
+		ContainerIP:   backendAddr.IPAddrs[0].IP,
448
+		ContainerPort: backendAddr.Port,
449
+		ListenSock:    listener,
450
+	}
451
+	proxy, err := newProxy(config)
452
+	assert.NilError(t, err)
453
+	testProxyAt(t, "sctp", proxy, fmt.Sprintf("[%s]:%d", config.HostIP, config.HostPort), false)
454
+}
... ...
@@ -530,19 +530,7 @@ func attemptBindHostPorts(
530 530
 			case "udp":
531 531
 				pb, err = bindTCPOrUDP(c, port, syscall.SOCK_DGRAM, syscall.IPPROTO_UDP)
532 532
 			case "sctp":
533
-				if proxyPath == "" {
534
-					pb, err = bindSCTP(c, port)
535
-				} else {
536
-					// TODO(robmry) - it's not currently possible to pass a bound SCTP port
537
-					//  to the userland proxy, because the proxy is not able to convert the
538
-					//  file descriptor into an sctp.SCTPListener (fd is an unexported member
539
-					//  of the struct, and ListenSCTP is the only constructor).
540
-					//  If that changes, remove this.
541
-					//  Until then, it is possible for the proxy to start listening and accept
542
-					//  connections before iptables rules are created that would bypass
543
-					//  the proxy for external connections.
544
-					pb, err = startSCTPProxy(c, port, proxyPath)
545
-				}
533
+				pb, err = bindSCTP(c, port)
546 534
 			default:
547 535
 				return nil, fmt.Errorf("Unknown addr type: %s", proto)
548 536
 			}
... ...
@@ -567,7 +555,7 @@ func attemptBindHostPorts(
567 567
 	// socket. Listen here anyway because SO_REUSEADDR is set, so bind() won't notice
568 568
 	// the problem if a port's bound to both INADDR_ANY and a specific address. (Also
569 569
 	// so the binding shows up in "netstat -at".)
570
-	if err := tcpListenBoundPorts(res, proxyPath); err != nil {
570
+	if err := listenBoundPorts(res, proxyPath); err != nil {
571 571
 		return nil, err
572 572
 	}
573 573
 	return res, nil
... ...
@@ -684,20 +672,6 @@ func bindSCTP(cfg portBindingReq, port int) (_ portBinding, retErr error) {
684 684
 	return pb, nil
685 685
 }
686 686
 
687
-func startSCTPProxy(cfg portBindingReq, port int, proxyPath string) (_ portBinding, retErr error) {
688
-	pb := portBinding{PortBinding: cfg.GetCopy()}
689
-	pb.HostPort = uint16(port)
690
-	pb.HostPortEnd = pb.HostPort
691
-	pb.childHostIP = cfg.childHostIP
692
-
693
-	var err error
694
-	pb.stopProxy, err = startProxy(pb.childPortBinding(), proxyPath, nil)
695
-	if err != nil {
696
-		return portBinding{}, err
697
-	}
698
-	return pb, nil
699
-}
700
-
701 687
 // configPortDriver passes the port binding's details to rootlesskit, and updates the
702 688
 // port binding with callbacks to remove the rootlesskit config (or marks the binding as
703 689
 // unsupported by rootlesskit).
... ...
@@ -731,26 +705,27 @@ func configPortDriver(ctx context.Context, pbs []portBinding, pdc portDriverClie
731 731
 	return nil
732 732
 }
733 733
 
734
-func tcpListenBoundPorts(pbs []portBinding, proxyPath string) error {
735
-	somaxconn := 0
736
-	if proxyPath != "" {
737
-		somaxconn = -1 // silently capped to "/proc/sys/net/core/somaxconn"
738
-	}
734
+func listenBoundPorts(pbs []portBinding, proxyPath string) error {
739 735
 	for i := range pbs {
740
-		if pbs[i].boundSocket == nil || pbs[i].rootlesskitUnsupported || pbs[i].Proto != types.TCP {
736
+		if pbs[i].boundSocket == nil || pbs[i].rootlesskitUnsupported || pbs[i].Proto == types.UDP {
741 737
 			continue
742 738
 		}
743 739
 		rc, err := pbs[i].boundSocket.SyscallConn()
744 740
 		if err != nil {
745
-			return fmt.Errorf("raw conn not available on TCP socket: %w", err)
741
+			return fmt.Errorf("raw conn not available on %s socket: %w", pbs[i].Proto, err)
746 742
 		}
747 743
 		if errC := rc.Control(func(fd uintptr) {
744
+			somaxconn := 0
745
+			// SCTP sockets do not support somaxconn=0
746
+			if proxyPath != "" || pbs[i].Proto == types.SCTP {
747
+				somaxconn = -1 // silently capped to "/proc/sys/net/core/somaxconn"
748
+			}
748 749
 			err = syscall.Listen(int(fd), somaxconn)
749 750
 		}); errC != nil {
750
-			return fmt.Errorf("failed to Control TCP socket: %w", err)
751
+			return fmt.Errorf("failed to Control %s socket: %w", pbs[i].Proto, err)
751 752
 		}
752 753
 		if err != nil {
753
-			return fmt.Errorf("failed to listen on TCP socket: %w", err)
754
+			return fmt.Errorf("failed to listen on %s socket: %w", pbs[i].Proto, err)
754 755
 		}
755 756
 	}
756 757
 	return nil