When a UDP server is running on a multihomed server, as is the case with
pretty much _all_ Docker hosts (eg. eth0 + docker0), the kernel has to
choose which source address is used when replying to a UDP client. But
that process is based on heuristics and is fallible.
If the address picked doesn't match the original destination address
used by the client, it'll drop the datagram and return an ICMP Port
Unreachable.
To prevent that, we need to:
- `setsockopt(IP_PKTINFO)` on proxy's sockets.
- Extract the original destination address from an ancillary message
every time a new 'UDP connection' is 'established' (ie. every time we
insert a new entry into the UDP conntrack table).
- And finally, pass a control message containing the desired source
address to the kernel, every time we send a response back to the
client.
Also, update the inline comment on read errors in `(*UDPProxy).Run()`.
This comment was misleadingly referencing ECONNREFUSED - Linux's UDP
implementation never returns this error (see [1]). Instead, state why
`net.ErrClosed` is perfectly fine and doesn't need to be logged
(although, docker-proxy currently logs to nowhere).
[1]: https://github.com/search?q=repo%3Atorvalds%2Flinux+ECONNREFUSED+path%3A%2F%5Enet%5C%2F%28ipv4%7Cipv6%29%5C%2F%28udp%7Ctcp%29%2F&type=code
Signed-off-by: Albin Kerouanton <albinker@gmail.com>
| ... | ... |
@@ -11,6 +11,8 @@ import ( |
| 11 | 11 |
|
| 12 | 12 |
"github.com/docker/docker/dockerversion" |
| 13 | 13 |
"github.com/ishidawataru/sctp" |
| 14 |
+ "golang.org/x/net/ipv4" |
|
| 15 |
+ "golang.org/x/net/ipv6" |
|
| 14 | 16 |
) |
| 15 | 17 |
|
| 16 | 18 |
// The caller is expected to pass-in open file descriptors ... |
| ... | ... |
@@ -59,9 +61,9 @@ func main() {
|
| 59 | 59 |
} |
| 60 | 60 |
|
| 61 | 61 |
func newProxy(config ProxyConfig) (p Proxy, err error) {
|
| 62 |
- ipv := ipv4 |
|
| 62 |
+ ipv := ip4 |
|
| 63 | 63 |
if config.HostIP.To4() == nil {
|
| 64 |
- ipv = ipv6 |
|
| 64 |
+ ipv = ip6 |
|
| 65 | 65 |
} |
| 66 | 66 |
|
| 67 | 67 |
switch config.Proto {
|
| ... | ... |
@@ -96,6 +98,21 @@ func newProxy(config ProxyConfig) (p Proxy, err error) {
|
| 96 | 96 |
if err != nil {
|
| 97 | 97 |
return nil, fmt.Errorf("failed to listen on %s: %w", hostAddr, err)
|
| 98 | 98 |
} |
| 99 |
+ // We need to setsockopt(IP_PKTINFO) on the listener to get the destination address as an ancillary |
|
| 100 |
+ // message. The daddr will be used as the source address when sending back replies coming from the |
|
| 101 |
+ // container to the client. If we don't do this, the kernel will have to pick a source address for us, and |
|
| 102 |
+ // it might not pick what the client expects. That would result in ICMP Port Unreachable. |
|
| 103 |
+ if ipv == ip4 {
|
|
| 104 |
+ pc := ipv4.NewPacketConn(listener) |
|
| 105 |
+ if err := pc.SetControlMessage(ipv4.FlagDst, true); err != nil {
|
|
| 106 |
+ return nil, fmt.Errorf("failed to setsockopt(IP_PKTINFO): %w", err)
|
|
| 107 |
+ } |
|
| 108 |
+ } else {
|
|
| 109 |
+ pc := ipv6.NewPacketConn(listener) |
|
| 110 |
+ if err := pc.SetControlMessage(ipv6.FlagDst, true); err != nil {
|
|
| 111 |
+ return nil, fmt.Errorf("failed to setsockopt(IPV6_RECVPKTINFO): %w", err)
|
|
| 112 |
+ } |
|
| 113 |
+ } |
|
| 99 | 114 |
} else {
|
| 100 | 115 |
l, err := net.FilePacketConn(config.ListenSock) |
| 101 | 116 |
if err != nil {
|
| ... | ... |
@@ -108,7 +125,7 @@ func newProxy(config ProxyConfig) (p Proxy, err error) {
|
| 108 | 108 |
} |
| 109 | 109 |
} |
| 110 | 110 |
container := &net.UDPAddr{IP: config.ContainerIP, Port: config.ContainerPort}
|
| 111 |
- p, err = NewUDPProxy(listener, container) |
|
| 111 |
+ p, err = NewUDPProxy(listener, container, ipv) |
|
| 112 | 112 |
case "sctp": |
| 113 | 113 |
var listener *sctp.SCTPListener |
| 114 | 114 |
if config.ListenSock != nil {
|
| ... | ... |
@@ -7,9 +7,9 @@ type ipVersion string |
| 7 | 7 |
|
| 8 | 8 |
const ( |
| 9 | 9 |
// IPv4 is version 4 |
| 10 |
- ipv4 ipVersion = "4" |
|
| 10 |
+ ip4 ipVersion = "4" |
|
| 11 | 11 |
// IPv4 is version 6 |
| 12 |
- ipv6 ipVersion = "6" |
|
| 12 |
+ ip6 ipVersion = "6" |
|
| 13 | 13 |
) |
| 14 | 14 |
|
| 15 | 15 |
// Proxy defines the behavior of a proxy. It forwards traffic back and forth |
| ... | ... |
@@ -2,12 +2,15 @@ package main |
| 2 | 2 |
|
| 3 | 3 |
import ( |
| 4 | 4 |
"encoding/binary" |
| 5 |
+ "errors" |
|
| 5 | 6 |
"log" |
| 6 | 7 |
"net" |
| 7 |
- "strings" |
|
| 8 | 8 |
"sync" |
| 9 | 9 |
"syscall" |
| 10 | 10 |
"time" |
| 11 |
+ |
|
| 12 |
+ "golang.org/x/net/ipv4" |
|
| 13 |
+ "golang.org/x/net/ipv6" |
|
| 11 | 14 |
) |
| 12 | 15 |
|
| 13 | 16 |
const ( |
| ... | ... |
@@ -51,19 +54,21 @@ type UDPProxy struct {
|
| 51 | 51 |
backendAddr *net.UDPAddr |
| 52 | 52 |
connTrackTable connTrackMap |
| 53 | 53 |
connTrackLock sync.Mutex |
| 54 |
+ ipVer ipVersion |
|
| 54 | 55 |
} |
| 55 | 56 |
|
| 56 | 57 |
// NewUDPProxy creates a new UDPProxy. |
| 57 |
-func NewUDPProxy(listener *net.UDPConn, backendAddr *net.UDPAddr) (*UDPProxy, error) {
|
|
| 58 |
+func NewUDPProxy(listener *net.UDPConn, backendAddr *net.UDPAddr, ipVer ipVersion) (*UDPProxy, error) {
|
|
| 58 | 59 |
return &UDPProxy{
|
| 59 | 60 |
listener: listener, |
| 60 | 61 |
frontendAddr: listener.LocalAddr().(*net.UDPAddr), |
| 61 | 62 |
backendAddr: backendAddr, |
| 62 | 63 |
connTrackTable: make(connTrackMap), |
| 64 |
+ ipVer: ipVer, |
|
| 63 | 65 |
}, nil |
| 64 | 66 |
} |
| 65 | 67 |
|
| 66 |
-func (proxy *UDPProxy) replyLoop(proxyConn *net.UDPConn, clientAddr *net.UDPAddr, clientKey *connTrackKey) {
|
|
| 68 |
+func (proxy *UDPProxy) replyLoop(proxyConn *net.UDPConn, serverAddr net.IP, clientAddr *net.UDPAddr, clientKey *connTrackKey) {
|
|
| 67 | 69 |
defer func() {
|
| 68 | 70 |
proxy.connTrackLock.Lock() |
| 69 | 71 |
delete(proxy.connTrackTable, *clientKey) |
| ... | ... |
@@ -71,6 +76,15 @@ func (proxy *UDPProxy) replyLoop(proxyConn *net.UDPConn, clientAddr *net.UDPAddr |
| 71 | 71 |
proxyConn.Close() |
| 72 | 72 |
}() |
| 73 | 73 |
|
| 74 |
+ var oob []byte |
|
| 75 |
+ if proxy.ipVer == ip4 {
|
|
| 76 |
+ cm := &ipv4.ControlMessage{Src: serverAddr}
|
|
| 77 |
+ oob = cm.Marshal() |
|
| 78 |
+ } else {
|
|
| 79 |
+ cm := &ipv6.ControlMessage{Src: serverAddr}
|
|
| 80 |
+ oob = cm.Marshal() |
|
| 81 |
+ } |
|
| 82 |
+ |
|
| 74 | 83 |
readBuf := make([]byte, UDPBufSize) |
| 75 | 84 |
for {
|
| 76 | 85 |
proxyConn.SetReadDeadline(time.Now().Add(UDPConnTrackTimeout)) |
| ... | ... |
@@ -88,7 +102,7 @@ func (proxy *UDPProxy) replyLoop(proxyConn *net.UDPConn, clientAddr *net.UDPAddr |
| 88 | 88 |
return |
| 89 | 89 |
} |
| 90 | 90 |
for i := 0; i != read; {
|
| 91 |
- written, err := proxy.listener.WriteToUDP(readBuf[i:read], clientAddr) |
|
| 91 |
+ written, _, err := proxy.listener.WriteMsgUDP(readBuf[i:read], oob, clientAddr) |
|
| 92 | 92 |
if err != nil {
|
| 93 | 93 |
return |
| 94 | 94 |
} |
| ... | ... |
@@ -100,13 +114,19 @@ func (proxy *UDPProxy) replyLoop(proxyConn *net.UDPConn, clientAddr *net.UDPAddr |
| 100 | 100 |
// Run starts forwarding the traffic using UDP. |
| 101 | 101 |
func (proxy *UDPProxy) Run() {
|
| 102 | 102 |
readBuf := make([]byte, UDPBufSize) |
| 103 |
+ var oob []byte |
|
| 104 |
+ if proxy.ipVer == ip4 {
|
|
| 105 |
+ oob = ipv4.NewControlMessage(ipv4.FlagDst) |
|
| 106 |
+ } else {
|
|
| 107 |
+ oob = ipv6.NewControlMessage(ipv6.FlagDst) |
|
| 108 |
+ } |
|
| 109 |
+ |
|
| 103 | 110 |
for {
|
| 104 |
- read, from, err := proxy.listener.ReadFromUDP(readBuf) |
|
| 111 |
+ read, _, _, from, err := proxy.listener.ReadMsgUDP(readBuf, oob) |
|
| 105 | 112 |
if err != nil {
|
| 106 |
- // NOTE: Apparently ReadFrom doesn't return |
|
| 107 |
- // ECONNREFUSED like Read do (see comment in |
|
| 108 |
- // UDPProxy.replyLoop) |
|
| 109 |
- if !isClosedError(err) {
|
|
| 113 |
+ // The frontend listener socket might be closed by the signal |
|
| 114 |
+ // handler. In that case, don't log anything - it's not an error. |
|
| 115 |
+ if !errors.Is(err, net.ErrClosed) {
|
|
| 110 | 116 |
log.Printf("Stopping proxy on udp/%v for udp/%v (%s)", proxy.frontendAddr, proxy.backendAddr, err)
|
| 111 | 117 |
} |
| 112 | 118 |
break |
| ... | ... |
@@ -123,7 +143,15 @@ func (proxy *UDPProxy) Run() {
|
| 123 | 123 |
continue |
| 124 | 124 |
} |
| 125 | 125 |
proxy.connTrackTable[*fromKey] = proxyConn |
| 126 |
- go proxy.replyLoop(proxyConn, from, fromKey) |
|
| 126 |
+ |
|
| 127 |
+ daddr, err := readDestFromCmsg(oob, proxy.ipVer) |
|
| 128 |
+ if err != nil {
|
|
| 129 |
+ log.Printf("Failed to parse control message: %v", err)
|
|
| 130 |
+ proxy.connTrackLock.Unlock() |
|
| 131 |
+ continue |
|
| 132 |
+ } |
|
| 133 |
+ |
|
| 134 |
+ go proxy.replyLoop(proxyConn, daddr, from, fromKey) |
|
| 127 | 135 |
} |
| 128 | 136 |
proxy.connTrackLock.Unlock() |
| 129 | 137 |
for i := 0; i != read; {
|
| ... | ... |
@@ -137,6 +165,35 @@ func (proxy *UDPProxy) Run() {
|
| 137 | 137 |
} |
| 138 | 138 |
} |
| 139 | 139 |
|
| 140 |
+func readDestFromCmsg(oob []byte, ipVer ipVersion) (_ net.IP, err error) {
|
|
| 141 |
+ defer func() {
|
|
| 142 |
+ // In case of partial upgrade / downgrade, docker-proxy could read |
|
| 143 |
+ // control messages from a socket which doesn't have the sockopt |
|
| 144 |
+ // IP_PKTINFO enabled. In that case, the control message will be all-0 |
|
| 145 |
+ // and Go's ControlMessage.Parse() will report an 'invalid header |
|
| 146 |
+ // length' error. In that case, ignore the error and return an empty |
|
| 147 |
+ // daddr - the kernel will pick a source address for us anyway (but |
|
| 148 |
+ // maybe it'll be the wrong one). |
|
| 149 |
+ if err != nil && err.Error() == "invalid header length" {
|
|
| 150 |
+ err = nil |
|
| 151 |
+ } |
|
| 152 |
+ }() |
|
| 153 |
+ |
|
| 154 |
+ if ipVer == ip4 {
|
|
| 155 |
+ cm := &ipv4.ControlMessage{}
|
|
| 156 |
+ if err := cm.Parse(oob); err != nil {
|
|
| 157 |
+ return nil, err |
|
| 158 |
+ } |
|
| 159 |
+ return cm.Dst, nil |
|
| 160 |
+ } |
|
| 161 |
+ |
|
| 162 |
+ cm := &ipv6.ControlMessage{}
|
|
| 163 |
+ if err := cm.Parse(oob); err != nil {
|
|
| 164 |
+ return nil, err |
|
| 165 |
+ } |
|
| 166 |
+ return cm.Dst, nil |
|
| 167 |
+} |
|
| 168 |
+ |
|
| 140 | 169 |
// Close stops forwarding the traffic. |
| 141 | 170 |
func (proxy *UDPProxy) Close() {
|
| 142 | 171 |
proxy.listener.Close() |
| ... | ... |
@@ -146,13 +203,3 @@ func (proxy *UDPProxy) Close() {
|
| 146 | 146 |
conn.Close() |
| 147 | 147 |
} |
| 148 | 148 |
} |
| 149 |
- |
|
| 150 |
-func isClosedError(err error) bool {
|
|
| 151 |
- /* This comparison is ugly, but unfortunately, net.go doesn't export errClosing. |
|
| 152 |
- * See: |
|
| 153 |
- * http://golang.org/src/pkg/net/net.go |
|
| 154 |
- * https://code.google.com/p/go/issues/detail?id=4337 |
|
| 155 |
- * https://groups.google.com/forum/#!msg/golang-nuts/0_aaCvBmOcM/SptmDyX1XJMJ |
|
| 156 |
- */ |
|
| 157 |
- return strings.HasSuffix(err.Error(), "use of closed network connection") |
|
| 158 |
-} |
| ... | ... |
@@ -128,9 +128,9 @@ func TestDisableNAT(t *testing.T) {
|
| 128 | 128 |
} |
| 129 | 129 |
} |
| 130 | 130 |
|
| 131 |
-// Check that a container on one network can reach a service in a container on |
|
| 132 |
-// another network, via a mapped port on the host. |
|
| 133 |
-func TestPortMappedHairpin(t *testing.T) {
|
|
| 131 |
+// Check that a container on one network can reach a TCP service in a container |
|
| 132 |
+// on another network, via a mapped port on the host. |
|
| 133 |
+func TestPortMappedHairpinTCP(t *testing.T) {
|
|
| 134 | 134 |
skip.If(t, testEnv.IsRootless) |
| 135 | 135 |
|
| 136 | 136 |
ctx := setupTest(t) |
| ... | ... |
@@ -174,6 +174,56 @@ func TestPortMappedHairpin(t *testing.T) {
|
| 174 | 174 |
assert.Check(t, is.Contains(res.Stderr.String(), "404 Not Found")) |
| 175 | 175 |
} |
| 176 | 176 |
|
| 177 |
+// Check that a container on one network can reach a UDP service in a container |
|
| 178 |
+// on another network, via a mapped port on the host. |
|
| 179 |
+// Regression test for https://github.com/moby/libnetwork/issues/1729. |
|
| 180 |
+func TestPortMappedHairpinUDP(t *testing.T) {
|
|
| 181 |
+ skip.If(t, testEnv.IsRootless) |
|
| 182 |
+ |
|
| 183 |
+ ctx := setupTest(t) |
|
| 184 |
+ d := daemon.New(t) |
|
| 185 |
+ d.StartWithBusybox(ctx, t) |
|
| 186 |
+ defer d.Stop(t) |
|
| 187 |
+ c := d.NewClientT(t) |
|
| 188 |
+ defer c.Close() |
|
| 189 |
+ |
|
| 190 |
+ // Find an address on the test host. |
|
| 191 |
+ conn, err := net.Dial("tcp4", "hub.docker.com:80")
|
|
| 192 |
+ assert.NilError(t, err) |
|
| 193 |
+ hostAddr := conn.LocalAddr().(*net.TCPAddr).IP.String() |
|
| 194 |
+ conn.Close() |
|
| 195 |
+ |
|
| 196 |
+ const serverNetName = "servernet" |
|
| 197 |
+ network.CreateNoError(ctx, t, c, serverNetName) |
|
| 198 |
+ defer network.RemoveNoError(ctx, t, c, serverNetName) |
|
| 199 |
+ const clientNetName = "clientnet" |
|
| 200 |
+ network.CreateNoError(ctx, t, c, clientNetName) |
|
| 201 |
+ defer network.RemoveNoError(ctx, t, c, clientNetName) |
|
| 202 |
+ |
|
| 203 |
+ serverId := container.Run(ctx, t, c, |
|
| 204 |
+ container.WithNetworkMode(serverNetName), |
|
| 205 |
+ container.WithExposedPorts("54/udp"),
|
|
| 206 |
+ container.WithPortMap(nat.PortMap{"54/udp": {{HostIP: "0.0.0.0"}}}),
|
|
| 207 |
+ container.WithCmd("/bin/sh", "-c", "echo 'foobar.internal 192.168.155.23' | dnsd -c - -p 54"),
|
|
| 208 |
+ ) |
|
| 209 |
+ defer c.ContainerRemove(ctx, serverId, containertypes.RemoveOptions{Force: true})
|
|
| 210 |
+ |
|
| 211 |
+ inspect := container.Inspect(ctx, t, c, serverId) |
|
| 212 |
+ hostPort := inspect.NetworkSettings.Ports["54/udp"][0].HostPort |
|
| 213 |
+ |
|
| 214 |
+ // nslookup gets an answer quickly from the dns server, but then tries to |
|
| 215 |
+ // query another DNS server (for some unknown reasons) and times out. Hence, |
|
| 216 |
+ // we need >5s to execute this test. |
|
| 217 |
+ clientCtx, cancel := context.WithTimeout(ctx, 10*time.Second) |
|
| 218 |
+ defer cancel() |
|
| 219 |
+ res := container.RunAttach(clientCtx, t, c, |
|
| 220 |
+ container.WithNetworkMode(clientNetName), |
|
| 221 |
+ container.WithCmd("nslookup", "foobar.internal", net.JoinHostPort(hostAddr, hostPort)),
|
|
| 222 |
+ container.WithAutoRemove, |
|
| 223 |
+ ) |
|
| 224 |
+ assert.Check(t, is.Contains(res.Stdout.String(), "192.168.155.23")) |
|
| 225 |
+} |
|
| 226 |
+ |
|
| 177 | 227 |
// Check that a container on an IPv4-only network can have a port mapping |
| 178 | 228 |
// from a specific IPv6 host address (using docker-proxy). |
| 179 | 229 |
// Regression test for https://github.com/moby/moby/issues/48067 (which |
| ... | ... |
@@ -605,6 +605,22 @@ func bindTCPOrUDP(cfg portBindingReq, port, typ, proto int) (_ portBinding, retE |
| 605 | 605 |
if domain == syscall.AF_INET6 {
|
| 606 | 606 |
syscall.SetsockoptInt(sd, syscall.IPPROTO_IPV6, syscall.IPV6_V6ONLY, 1) |
| 607 | 607 |
} |
| 608 |
+ if typ == syscall.SOCK_DGRAM {
|
|
| 609 |
+ // Enable IP_PKTINFO for UDP sockets to get the destination address. |
|
| 610 |
+ // The destination address will be used as the source address when |
|
| 611 |
+ // sending back replies coming from the container. |
|
| 612 |
+ lvl := syscall.IPPROTO_IP |
|
| 613 |
+ opt := syscall.IP_PKTINFO |
|
| 614 |
+ optName := "IP_PKTINFO" |
|
| 615 |
+ if domain == syscall.AF_INET6 {
|
|
| 616 |
+ lvl = syscall.IPPROTO_IPV6 |
|
| 617 |
+ opt = syscall.IPV6_RECVPKTINFO |
|
| 618 |
+ optName = "IPV6_RECVPKTINFO" |
|
| 619 |
+ } |
|
| 620 |
+ if err := syscall.SetsockoptInt(sd, lvl, opt, 1); err != nil {
|
|
| 621 |
+ return portBinding{}, fmt.Errorf("failed to setsockopt(%s) for %s: %w", optName, cfg, err)
|
|
| 622 |
+ } |
|
| 623 |
+ } |
|
| 608 | 624 |
if err := syscall.Bind(sd, sa); err != nil {
|
| 609 | 625 |
if cfg.HostPort == cfg.HostPortEnd {
|
| 610 | 626 |
return portBinding{}, fmt.Errorf("failed to bind host port for %s: %w", cfg, err)
|