Browse code

Merge pull request #12165 from icecrime/optional_userland_proxy

Optional userland proxy

Jessie Frazelle authored on 2015/05/08 06:01:16
Showing 20 changed files
... ...
@@ -79,6 +79,7 @@ func (config *Config) InstallFlags() {
79 79
 	config.Ulimits = make(map[string]*ulimit.Ulimit)
80 80
 	opts.UlimitMapVar(config.Ulimits, []string{"-default-ulimit"}, "Set default ulimits for containers")
81 81
 	flag.StringVar(&config.LogConfig.Type, []string{"-log-driver"}, "json-file", "Default driver for container logs")
82
+	flag.BoolVar(&config.Bridge.EnableUserlandProxy, []string{"-userland-proxy"}, true, "Use userland proxy for loopback traffic")
82 83
 }
83 84
 
84 85
 func getDefaultNetworkMtu() int {
... ...
@@ -307,6 +307,7 @@ func populateCommand(c *Container, env []string) error {
307 307
 				GlobalIPv6Address:    network.GlobalIPv6Address,
308 308
 				GlobalIPv6PrefixLen:  network.GlobalIPv6PrefixLen,
309 309
 				IPv6Gateway:          network.IPv6Gateway,
310
+				HairpinMode:          network.HairpinMode,
310 311
 			}
311 312
 		}
312 313
 	case "container":
... ...
@@ -96,6 +96,7 @@ type NetworkInterface struct {
96 96
 	LinkLocalIPv6Address string `json:"link_local_ipv6"`
97 97
 	GlobalIPv6PrefixLen  int    `json:"global_ipv6_prefix_len"`
98 98
 	IPv6Gateway          string `json:"ipv6_gateway"`
99
+	HairpinMode          bool   `json:"hairpin_mode"`
99 100
 }
100 101
 
101 102
 // TODO Windows: Factor out ulimit.Rlimit
... ...
@@ -112,6 +112,7 @@ func (d *driver) createNetwork(container *configs.Config, c *execdriver.Command)
112 112
 			Gateway:           c.Network.Interface.Gateway,
113 113
 			Type:              "veth",
114 114
 			Bridge:            c.Network.Interface.Bridge,
115
+			HairpinMode:       c.Network.Interface.HairpinMode,
115 116
 		}
116 117
 		if c.Network.Interface.GlobalIPv6Address != "" {
117 118
 			vethNetwork.IPv6Address = fmt.Sprintf("%s/%d", c.Network.Interface.GlobalIPv6Address, c.Network.Interface.GlobalIPv6PrefixLen)
... ...
@@ -15,4 +15,5 @@ type Settings struct {
15 15
 	Bridge                 string
16 16
 	PortMapping            map[string]map[string]string // Deprecated
17 17
 	Ports                  nat.PortMap
18
+	HairpinMode            bool
18 19
 }
... ...
@@ -8,6 +8,7 @@ import (
8 8
 	"net"
9 9
 	"os"
10 10
 	"os/exec"
11
+	"path/filepath"
11 12
 	"strconv"
12 13
 	"strings"
13 14
 	"sync"
... ...
@@ -83,6 +84,7 @@ var (
83 83
 	gatewayIPv6       net.IP
84 84
 	portMapper        *portmapper.PortMapper
85 85
 	once              sync.Once
86
+	hairpinMode       bool
86 87
 
87 88
 	defaultBindingIP  = net.ParseIP("0.0.0.0")
88 89
 	currentInterfaces = ifaces{c: make(map[string]*networkInterface)}
... ...
@@ -100,6 +102,7 @@ type Config struct {
100 100
 	EnableIptables              bool
101 101
 	EnableIpForward             bool
102 102
 	EnableIpMasq                bool
103
+	EnableUserlandProxy         bool
103 104
 	DefaultIp                   net.IP
104 105
 	Iface                       string
105 106
 	IP                          string
... ...
@@ -131,6 +134,8 @@ func InitDriver(config *Config) error {
131 131
 		defaultBindingIP = config.DefaultIp
132 132
 	}
133 133
 
134
+	hairpinMode = !config.EnableUserlandProxy
135
+
134 136
 	bridgeIface = config.Iface
135 137
 	usingDefaultBridge := false
136 138
 	if bridgeIface == "" {
... ...
@@ -243,39 +248,46 @@ func InitDriver(config *Config) error {
243 243
 	if config.EnableIpForward {
244 244
 		// Enable IPv4 forwarding
245 245
 		if err := ioutil.WriteFile("/proc/sys/net/ipv4/ip_forward", []byte{'1', '\n'}, 0644); err != nil {
246
-			logrus.Warnf("WARNING: unable to enable IPv4 forwarding: %s\n", err)
246
+			logrus.Warnf("Unable to enable IPv4 forwarding: %v", err)
247 247
 		}
248 248
 
249 249
 		if config.FixedCIDRv6 != "" {
250 250
 			// Enable IPv6 forwarding
251 251
 			if err := ioutil.WriteFile("/proc/sys/net/ipv6/conf/default/forwarding", []byte{'1', '\n'}, 0644); err != nil {
252
-				logrus.Warnf("WARNING: unable to enable IPv6 default forwarding: %s\n", err)
252
+				logrus.Warnf("Unable to enable IPv6 default forwarding: %v", err)
253 253
 			}
254 254
 			if err := ioutil.WriteFile("/proc/sys/net/ipv6/conf/all/forwarding", []byte{'1', '\n'}, 0644); err != nil {
255
-				logrus.Warnf("WARNING: unable to enable IPv6 all forwarding: %s\n", err)
255
+				logrus.Warnf("Unable to enable IPv6 all forwarding: %v", err)
256 256
 			}
257 257
 		}
258 258
 	}
259 259
 
260
+	if hairpinMode {
261
+		// Enable loopback adresses routing
262
+		sysPath := filepath.Join("/proc/sys/net/ipv4/conf", bridgeIface, "route_localnet")
263
+		if err := ioutil.WriteFile(sysPath, []byte{'1', '\n'}, 0644); err != nil {
264
+			logrus.Warnf("Unable to enable local routing for hairpin mode: %v", err)
265
+		}
266
+	}
267
+
260 268
 	// We can always try removing the iptables
261 269
 	if err := iptables.RemoveExistingChain("DOCKER", iptables.Nat); err != nil {
262 270
 		return err
263 271
 	}
264 272
 
265 273
 	if config.EnableIptables {
266
-		_, err := iptables.NewChain("DOCKER", bridgeIface, iptables.Nat)
274
+		_, err := iptables.NewChain("DOCKER", bridgeIface, iptables.Nat, hairpinMode)
267 275
 		if err != nil {
268 276
 			return err
269 277
 		}
270 278
 		// call this on Firewalld reload
271
-		iptables.OnReloaded(func() { iptables.NewChain("DOCKER", bridgeIface, iptables.Nat) })
272
-
273
-		chain, err := iptables.NewChain("DOCKER", bridgeIface, iptables.Filter)
279
+		iptables.OnReloaded(func() { iptables.NewChain("DOCKER", bridgeIface, iptables.Nat, hairpinMode) })
280
+		chain, err := iptables.NewChain("DOCKER", bridgeIface, iptables.Filter, hairpinMode)
274 281
 		if err != nil {
275 282
 			return err
276 283
 		}
277 284
 		// call this on Firewalld reload
278
-		iptables.OnReloaded(func() { iptables.NewChain("DOCKER", bridgeIface, iptables.Filter) })
285
+		iptables.OnReloaded(func() { iptables.NewChain("DOCKER", bridgeIface, iptables.Filter, hairpinMode) })
279 286
 
280 287
 		portMapper.SetIptablesChain(chain)
281 288
 	}
... ...
@@ -374,6 +386,18 @@ func setupIPTables(addr net.Addr, icc, ipmasq bool) error {
374 374
 		}
375 375
 	}
376 376
 
377
+	// In hairpin mode, masquerade traffic from localhost
378
+	if hairpinMode {
379
+		masqueradeArgs := []string{"-t", "nat", "-m", "addrtype", "--src-type", "LOCAL", "-o", bridgeIface, "-j", "MASQUERADE"}
380
+		if !iptables.Exists(iptables.Filter, "POSTROUTING", masqueradeArgs...) {
381
+			if output, err := iptables.Raw(append([]string{"-I", "POSTROUTING"}, masqueradeArgs...)...); err != nil {
382
+				return fmt.Errorf("Unable to masquerade local traffic: %s", err)
383
+			} else if len(output) != 0 {
384
+				return fmt.Errorf("Error iptables masquerade local traffic: %s", output)
385
+			}
386
+		}
387
+	}
388
+
377 389
 	// Accept all non-intercontainer outgoing packets
378 390
 	outgoingArgs := []string{"-i", bridgeIface, "!", "-o", bridgeIface, "-j", "ACCEPT"}
379 391
 	if !iptables.Exists(iptables.Filter, "FORWARD", outgoingArgs...) {
... ...
@@ -637,6 +661,7 @@ func Allocate(id, requestedMac, requestedIP, requestedIPv6 string) (*network.Set
637 637
 		Bridge:               bridgeIface,
638 638
 		IPPrefixLen:          maskSize,
639 639
 		LinkLocalIPv6Address: localIPv6.String(),
640
+		HairpinMode:          hairpinMode,
640 641
 	}
641 642
 
642 643
 	if globalIPv6Network != nil {
... ...
@@ -722,7 +747,7 @@ func AllocatePort(id string, port nat.Port, binding nat.PortBinding) (nat.PortBi
722 722
 		return nat.PortBinding{}, err
723 723
 	}
724 724
 	for i := 0; i < MaxAllocatedPortAttempts; i++ {
725
-		if host, err = portMapper.Map(container, ip, hostPort); err == nil {
725
+		if host, err = portMapper.Map(container, ip, hostPort, !hairpinMode); err == nil {
726 726
 			break
727 727
 		}
728 728
 		// There is no point in immediately retrying to map an explicitly
... ...
@@ -177,7 +177,7 @@ func TestLinkContainers(t *testing.T) {
177 177
 	}
178 178
 
179 179
 	bridgeIface = "lo"
180
-	if _, err := iptables.NewChain("DOCKER", bridgeIface, iptables.Filter); err != nil {
180
+	if _, err := iptables.NewChain("DOCKER", bridgeIface, iptables.Filter, false); err != nil {
181 181
 		t.Fatal(err)
182 182
 	}
183 183
 
... ...
@@ -51,7 +51,7 @@ func (pm *PortMapper) SetIptablesChain(c *iptables.Chain) {
51 51
 	pm.chain = c
52 52
 }
53 53
 
54
-func (pm *PortMapper) Map(container net.Addr, hostIP net.IP, hostPort int) (host net.Addr, err error) {
54
+func (pm *PortMapper) Map(container net.Addr, hostIP net.IP, hostPort int, useProxy bool) (host net.Addr, err error) {
55 55
 	pm.lock.Lock()
56 56
 	defer pm.lock.Unlock()
57 57
 
... ...
@@ -59,7 +59,6 @@ func (pm *PortMapper) Map(container net.Addr, hostIP net.IP, hostPort int) (host
59 59
 		m                 *mapping
60 60
 		proto             string
61 61
 		allocatedHostPort int
62
-		proxy             UserlandProxy
63 62
 	)
64 63
 
65 64
 	switch container.(type) {
... ...
@@ -75,7 +74,9 @@ func (pm *PortMapper) Map(container net.Addr, hostIP net.IP, hostPort int) (host
75 75
 			container: container,
76 76
 		}
77 77
 
78
-		proxy = NewProxy(proto, hostIP, allocatedHostPort, container.(*net.TCPAddr).IP, container.(*net.TCPAddr).Port)
78
+		if useProxy {
79
+			m.userlandProxy = NewProxy(proto, hostIP, allocatedHostPort, container.(*net.TCPAddr).IP, container.(*net.TCPAddr).Port)
80
+		}
79 81
 	case *net.UDPAddr:
80 82
 		proto = "udp"
81 83
 		if allocatedHostPort, err = pm.Allocator.RequestPort(hostIP, proto, hostPort); err != nil {
... ...
@@ -88,7 +89,9 @@ func (pm *PortMapper) Map(container net.Addr, hostIP net.IP, hostPort int) (host
88 88
 			container: container,
89 89
 		}
90 90
 
91
-		proxy = NewProxy(proto, hostIP, allocatedHostPort, container.(*net.UDPAddr).IP, container.(*net.UDPAddr).Port)
91
+		if useProxy {
92
+			m.userlandProxy = NewProxy(proto, hostIP, allocatedHostPort, container.(*net.UDPAddr).IP, container.(*net.UDPAddr).Port)
93
+		}
92 94
 	default:
93 95
 		return nil, ErrUnknownBackendAddressType
94 96
 	}
... ...
@@ -112,7 +115,9 @@ func (pm *PortMapper) Map(container net.Addr, hostIP net.IP, hostPort int) (host
112 112
 
113 113
 	cleanup := func() error {
114 114
 		// need to undo the iptables rules before we return
115
-		proxy.Stop()
115
+		if m.userlandProxy != nil {
116
+			m.userlandProxy.Stop()
117
+		}
116 118
 		pm.forward(iptables.Delete, m.proto, hostIP, allocatedHostPort, containerIP.String(), containerPort)
117 119
 		if err := pm.Allocator.ReleasePort(hostIP, m.proto, allocatedHostPort); err != nil {
118 120
 			return err
... ...
@@ -121,13 +126,15 @@ func (pm *PortMapper) Map(container net.Addr, hostIP net.IP, hostPort int) (host
121 121
 		return nil
122 122
 	}
123 123
 
124
-	if err := proxy.Start(); err != nil {
125
-		if err := cleanup(); err != nil {
126
-			return nil, fmt.Errorf("Error during port allocation cleanup: %v", err)
124
+	if m.userlandProxy != nil {
125
+		if err := m.userlandProxy.Start(); err != nil {
126
+			if err := cleanup(); err != nil {
127
+				return nil, fmt.Errorf("Error during port allocation cleanup: %v", err)
128
+			}
129
+			return nil, err
127 130
 		}
128
-		return nil, err
129 131
 	}
130
-	m.userlandProxy = proxy
132
+
131 133
 	pm.currentMappings[key] = m
132 134
 	return m.host, nil
133 135
 }
... ...
@@ -154,7 +161,9 @@ func (pm *PortMapper) Unmap(host net.Addr) error {
154 154
 		return ErrPortNotMapped
155 155
 	}
156 156
 
157
-	data.userlandProxy.Stop()
157
+	if data.userlandProxy != nil {
158
+		data.userlandProxy.Stop()
159
+	}
158 160
 
159 161
 	delete(pm.currentMappings, key)
160 162
 
... ...
@@ -44,22 +44,22 @@ func TestMapPorts(t *testing.T) {
44 44
 		return (addr1.Network() == addr2.Network()) && (addr1.String() == addr2.String())
45 45
 	}
46 46
 
47
-	if host, err := pm.Map(srcAddr1, dstIp1, 80); err != nil {
47
+	if host, err := pm.Map(srcAddr1, dstIp1, 80, true); err != nil {
48 48
 		t.Fatalf("Failed to allocate port: %s", err)
49 49
 	} else if !addrEqual(dstAddr1, host) {
50 50
 		t.Fatalf("Incorrect mapping result: expected %s:%s, got %s:%s",
51 51
 			dstAddr1.String(), dstAddr1.Network(), host.String(), host.Network())
52 52
 	}
53 53
 
54
-	if _, err := pm.Map(srcAddr1, dstIp1, 80); err == nil {
54
+	if _, err := pm.Map(srcAddr1, dstIp1, 80, true); err == nil {
55 55
 		t.Fatalf("Port is in use - mapping should have failed")
56 56
 	}
57 57
 
58
-	if _, err := pm.Map(srcAddr2, dstIp1, 80); err == nil {
58
+	if _, err := pm.Map(srcAddr2, dstIp1, 80, true); err == nil {
59 59
 		t.Fatalf("Port is in use - mapping should have failed")
60 60
 	}
61 61
 
62
-	if _, err := pm.Map(srcAddr2, dstIp2, 80); err != nil {
62
+	if _, err := pm.Map(srcAddr2, dstIp2, 80, true); err != nil {
63 63
 		t.Fatalf("Failed to allocate port: %s", err)
64 64
 	}
65 65
 
... ...
@@ -127,14 +127,14 @@ func TestMapAllPortsSingleInterface(t *testing.T) {
127 127
 	for i := 0; i < 10; i++ {
128 128
 		start, end := pm.Allocator.Begin, pm.Allocator.End
129 129
 		for i := start; i < end; i++ {
130
-			if host, err = pm.Map(srcAddr1, dstIp1, 0); err != nil {
130
+			if host, err = pm.Map(srcAddr1, dstIp1, 0, true); err != nil {
131 131
 				t.Fatal(err)
132 132
 			}
133 133
 
134 134
 			hosts = append(hosts, host)
135 135
 		}
136 136
 
137
-		if _, err := pm.Map(srcAddr1, dstIp1, start); err == nil {
137
+		if _, err := pm.Map(srcAddr1, dstIp1, start, true); err == nil {
138 138
 			t.Fatalf("Port %d should be bound but is not", start)
139 139
 		}
140 140
 
... ...
@@ -53,6 +53,9 @@ To see the man page for a command run **man docker <command>**.
53 53
 **-e**, **--exec-driver**=""
54 54
   Force Docker to use specific exec driver. Default is `native`.
55 55
 
56
+**--exec-opt**=[]
57
+  Set exec driver options. See EXEC DRIVER OPTIONS.
58
+
56 59
 **--fixed-cidr**=""
57 60
   IPv4 subnet for fixed IPs (e.g., 10.20.0.0/16); this subnet must be nested in the bridge subnet (which is defined by \-b or \-\-bip)
58 61
 
... ...
@@ -111,6 +114,9 @@ unix://[/path/to/socket] to use.
111 111
 **-s**, **--storage-driver**=""
112 112
   Force the Docker runtime to use a specific storage driver.
113 113
 
114
+**--selinux-enabled**=*true*|*false*
115
+  Enable selinux support. Default is false. SELinux does not presently support the BTRFS storage driver.
116
+
114 117
 **--storage-opt**=[]
115 118
   Set storage driver options. See STORAGE DRIVER OPTIONS.
116 119
 
... ...
@@ -121,15 +127,12 @@ unix://[/path/to/socket] to use.
121 121
   Use TLS and verify the remote (daemon: verify client, client: verify daemon).
122 122
   Default is false.
123 123
 
124
+**--userland-proxy**=*true*|*false*
125
+    Rely on a userland proxy implementation for inter-container and outside-to-container loopback communications. Default is true.
126
+
124 127
 **-v**, **--version**=*true*|*false*
125 128
   Print version information and quit. Default is false.
126 129
 
127
-**--exec-opt**=[]
128
-  Set exec driver options. See EXEC DRIVER OPTIONS.
129
-
130
-**--selinux-enabled**=*true*|*false*
131
-  Enable selinux support. Default is false. SELinux does not presently support the BTRFS storage driver.
132
-
133 130
 # COMMANDS
134 131
 **attach**
135 132
   Attach to a running container
... ...
@@ -93,6 +93,9 @@ server when it starts up, and cannot be changed once it is running:
93 93
  *  `--mtu=BYTES` — see
94 94
     [Customizing docker0](#docker0)
95 95
 
96
+ *  `--userland-proxy=true|false` — see
97
+    [Binding container ports](#binding-ports)
98
+
96 99
 There are two networking options that can be supplied either at startup
97 100
 or when `docker run` is invoked.  When provided at startup, set the
98 101
 default value that `docker run` will later use if the options are not
... ...
@@ -399,7 +402,7 @@ machine that the Docker server creates when it starts:
399 399
     ...
400 400
     Chain POSTROUTING (policy ACCEPT)
401 401
     target     prot opt source               destination
402
-    MASQUERADE  all  --  172.17.0.0/16       !172.17.0.0/16
402
+    MASQUERADE  all  --  172.17.0.0/16       0.0.0.0/0
403 403
     ...
404 404
 
405 405
 But if you want containers to accept incoming connections, you will need
... ...
@@ -452,6 +455,21 @@ address, you can edit your system-wide Docker server settings and add the
452 452
 option `--ip=IP_ADDRESS`.  Remember to restart your Docker server after
453 453
 editing this setting.
454 454
 
455
+> **Note**:
456
+> With hairpin NAT enabled (`--userland-proxy=false`), containers port exposure
457
+> is achieved purely through iptables rules, and no attempt to bind the exposed
458
+> port is ever made. This means that nothing prevents shadowing a previously
459
+> listening service outside of Docker through exposing the same port for a
460
+> container. In such conflicting situation, Docker created iptables rules will
461
+> take precedence and route to the container.
462
+
463
+The `--userland-proxy` parameter, true by default, provides a userland
464
+implementation for inter-container and outside-to-container communication. When
465
+disabled, Docker uses both an additional `MASQUERADE` iptable rule and the
466
+`net.ipv4.route_localnet` kernel parameter which allow the host machine to
467
+connect to a local container exposed port through the commonly used loopback
468
+address: this alternative is preferred for performance reason.
469
+
455 470
 Again, this topic is covered without all of these low-level networking
456 471
 details in the [Docker User Guide](/userguide/dockerlinks/) document if you
457 472
 would like to use that as your port redirection reference instead.
... ...
@@ -149,6 +149,7 @@ expect an integer, and they can only be specified once.
149 149
       --default-gateway-v6=""                Container default gateway IPv6 address
150 150
       --dns=[]                               DNS server to use
151 151
       --dns-search=[]                        DNS search domains to use
152
+      --default-ulimit=[]                    Set default ulimit settings for containers
152 153
       -e, --exec-driver="native"             Exec driver to use
153 154
       --fixed-cidr=""                        IPv4 subnet for fixed IPs
154 155
       --fixed-cidr-v6=""                     IPv6 subnet for fixed IPs
... ...
@@ -177,8 +178,8 @@ expect an integer, and they can only be specified once.
177 177
       --tlscert="~/.docker/cert.pem"         Path to TLS certificate file
178 178
       --tlskey="~/.docker/key.pem"           Path to TLS key file
179 179
       --tlsverify=false                      Use TLS and verify the remote
180
+      --userland-proxy=true                  Use userland proxy for loopback traffic
180 181
       -v, --version=false                    Print version information and quit
181
-      --default-ulimit=[]                    Set default ulimit settings for containers.
182 182
 
183 183
 Options with [] may be specified multiple times.
184 184
 
... ...
@@ -207,6 +207,7 @@ test_env() {
207 207
 		DEST="$DEST" \
208 208
 		DOCKER_EXECDRIVER="$DOCKER_EXECDRIVER" \
209 209
 		DOCKER_GRAPHDRIVER="$DOCKER_GRAPHDRIVER" \
210
+		DOCKER_USERLANDPROXY="$DOCKER_USERLANDPROXY" \
210 211
 		DOCKER_HOST="$DOCKER_HOST" \
211 212
 		GOPATH="$GOPATH" \
212 213
 		HOME="$DEST/fake-HOME" \
... ...
@@ -14,6 +14,7 @@ exec 41>&1 42>&2
14 14
 
15 15
 export DOCKER_GRAPHDRIVER=${DOCKER_GRAPHDRIVER:-vfs}
16 16
 export DOCKER_EXECDRIVER=${DOCKER_EXECDRIVER:-native}
17
+export DOCKER_USERLANDPROXY=${DOCKER_USERLANDPROXY:-true}
17 18
 
18 19
 if [ -z "$DOCKER_TEST_HOST" ]; then
19 20
 	export DOCKER_HOST="unix://$(cd "$DEST" && pwd)/docker.sock" # "pwd" tricks to make sure $DEST is an absolute path, not a relative one
... ...
@@ -23,6 +24,7 @@ if [ -z "$DOCKER_TEST_HOST" ]; then
23 23
 		--storage-driver "$DOCKER_GRAPHDRIVER" \
24 24
 		--exec-driver "$DOCKER_EXECDRIVER" \
25 25
 		--pidfile "$DEST/docker.pid" \
26
+		--userland-proxy="$DOCKER_USERLANDPROXY" \
26 27
 			&> "$DEST/docker.log"
27 28
 	) &
28 29
 	trap "source '${MAKEDIR}/.integration-daemon-stop'" EXIT # make sure that if the script exits unexpectedly, we stop this daemon we just started
... ...
@@ -4,14 +4,26 @@ import (
4 4
 	"fmt"
5 5
 	"net"
6 6
 	"os/exec"
7
+	"strconv"
7 8
 	"strings"
8 9
 
9 10
 	"github.com/go-check/check"
10 11
 )
11 12
 
12
-func (s *DockerSuite) TestNetworkNat(c *check.C) {
13
-	testRequires(c, SameHostDaemon, NativeExecDriver)
13
+func startServerContainer(c *check.C, proto string, port int) string {
14
+	cmd := []string{"-d", "-p", fmt.Sprintf("%d:%d", port, port), "busybox", "nc", "-lp", strconv.Itoa(port)}
15
+	if proto == "udp" {
16
+		cmd = append(cmd, "-u")
17
+	}
14 18
 
19
+	name := "server"
20
+	if err := waitForContainer(name, cmd...); err != nil {
21
+		c.Fatalf("Failed to launch server container: %v", err)
22
+	}
23
+	return name
24
+}
25
+
26
+func getExternalAddress(c *check.C) net.IP {
15 27
 	iface, err := net.InterfaceByName("eth0")
16 28
 	if err != nil {
17 29
 		c.Skip(fmt.Sprintf("Test not running with `make test`. Interface eth0 not found: %v", err))
... ...
@@ -27,35 +39,65 @@ func (s *DockerSuite) TestNetworkNat(c *check.C) {
27 27
 		c.Fatalf("Error retrieving the up for eth0: %s", err)
28 28
 	}
29 29
 
30
-	runCmd := exec.Command(dockerBinary, "run", "-dt", "-p", "8080:8080", "busybox", "nc", "-lp", "8080")
30
+	return ifaceIP
31
+}
32
+
33
+func getContainerLogs(c *check.C, containerID string) string {
34
+	runCmd := exec.Command(dockerBinary, "logs", containerID)
31 35
 	out, _, err := runCommandWithOutput(runCmd)
32 36
 	if err != nil {
33 37
 		c.Fatal(out, err)
34 38
 	}
39
+	return strings.Trim(out, "\r\n")
40
+}
35 41
 
36
-	cleanedContainerID := strings.TrimSpace(out)
37
-
38
-	runCmd = exec.Command(dockerBinary, "run", "busybox", "sh", "-c", fmt.Sprintf("echo hello world | nc -w 30 %s 8080", ifaceIP))
39
-	out, _, err = runCommandWithOutput(runCmd)
42
+func getContainerStatus(c *check.C, containerID string) string {
43
+	runCmd := exec.Command(dockerBinary, "inspect", "-f", "{{.State.Running}}", containerID)
44
+	out, _, err := runCommandWithOutput(runCmd)
40 45
 	if err != nil {
41 46
 		c.Fatal(out, err)
42 47
 	}
48
+	return strings.Trim(out, "\r\n")
49
+}
43 50
 
44
-	runCmd = exec.Command(dockerBinary, "logs", cleanedContainerID)
45
-	out, _, err = runCommandWithOutput(runCmd)
46
-	if err != nil {
47
-		c.Fatalf("failed to retrieve logs for container: %s, %v", out, err)
48
-	}
51
+func (s *DockerSuite) TestNetworkNat(c *check.C) {
52
+	testRequires(c, SameHostDaemon, NativeExecDriver)
53
+	defer deleteAllContainers()
49 54
 
50
-	out = strings.Trim(out, "\r\n")
55
+	srv := startServerContainer(c, "tcp", 8080)
51 56
 
52
-	if expected := "hello world"; out != expected {
53
-		c.Fatalf("Unexpected output. Expected: %q, received: %q for iface %s", expected, out, ifaceIP)
57
+	// Spawn a new container which connects to the server through the
58
+	// interface address.
59
+	endpoint := getExternalAddress(c)
60
+	runCmd := exec.Command(dockerBinary, "run", "busybox", "sh", "-c", fmt.Sprintf("echo hello world | nc -w 30 %s 8080", endpoint))
61
+	if out, _, err := runCommandWithOutput(runCmd); err != nil {
62
+		c.Fatalf("Failed to connect to server: %v (output: %q)", err, string(out))
54 63
 	}
55 64
 
56
-	killCmd := exec.Command(dockerBinary, "kill", cleanedContainerID)
57
-	if out, _, err = runCommandWithOutput(killCmd); err != nil {
58
-		c.Fatalf("failed to kill container: %s, %v", out, err)
65
+	result := getContainerLogs(c, srv)
66
+	if expected := "hello world"; result != expected {
67
+		c.Fatalf("Unexpected output. Expected: %q, received: %q", expected, result)
68
+	}
69
+}
70
+
71
+func (s *DockerSuite) TestNetworkLocalhostTCPNat(c *check.C) {
72
+	testRequires(c, SameHostDaemon, NativeExecDriver)
73
+	defer deleteAllContainers()
74
+
75
+	srv := startServerContainer(c, "tcp", 8081)
76
+
77
+	// Attempt to connect from the host to the listening container.
78
+	conn, err := net.Dial("tcp", "localhost:8081")
79
+	if err != nil {
80
+		c.Fatalf("Failed to connect to container (%v)", err)
81
+	}
82
+	if _, err := conn.Write([]byte("hello world\n")); err != nil {
83
+		c.Fatal(err)
59 84
 	}
85
+	conn.Close()
60 86
 
87
+	result := getContainerLogs(c, srv)
88
+	if expected := "hello world"; result != expected {
89
+		c.Fatalf("Unexpected output. Expected: %q, received: %q", expected, result)
90
+	}
61 91
 }
... ...
@@ -2197,49 +2197,19 @@ func (s *DockerSuite) TestRunPortInUse(c *check.C) {
2197 2197
 	testRequires(c, SameHostDaemon)
2198 2198
 
2199 2199
 	port := "1234"
2200
-	l, err := net.Listen("tcp", ":"+port)
2201
-	if err != nil {
2202
-		c.Fatal(err)
2203
-	}
2204
-	defer l.Close()
2205
-	cmd := exec.Command(dockerBinary, "run", "-d", "-p", port+":80", "busybox", "top")
2206
-	out, _, err := runCommandWithOutput(cmd)
2207
-	if err == nil {
2208
-		c.Fatalf("Binding on used port must fail")
2209
-	}
2210
-	if !strings.Contains(out, "address already in use") {
2211
-		c.Fatalf("Out must be about \"address already in use\", got %s", out)
2212
-	}
2213
-}
2214
-
2215
-// https://github.com/docker/docker/issues/8428
2216
-func (s *DockerSuite) TestRunPortProxy(c *check.C) {
2217
-	testRequires(c, SameHostDaemon)
2218
-
2219
-	port := "12345"
2220 2200
 	cmd := exec.Command(dockerBinary, "run", "-d", "-p", port+":80", "busybox", "top")
2221
-
2222 2201
 	out, _, err := runCommandWithOutput(cmd)
2223 2202
 	if err != nil {
2224
-		c.Fatalf("Failed to run and bind port %s, output: %s, error: %s", port, out, err)
2225
-	}
2226
-
2227
-	// connett for 10 times here. This will trigger 10 EPIPES in the child
2228
-	// process and kill it when it writes to a closed stdout/stderr
2229
-	for i := 0; i < 10; i++ {
2230
-		net.Dial("tcp", fmt.Sprintf("0.0.0.0:%s", port))
2203
+		c.Fatalf("Fail to run listening container")
2231 2204
 	}
2232 2205
 
2233
-	listPs := exec.Command("sh", "-c", "ps ax | grep docker")
2234
-	out, _, err = runCommandWithOutput(listPs)
2235
-	if err != nil {
2236
-		c.Errorf("list docker process failed with output %s, error %s", out, err)
2237
-	}
2238
-	if strings.Contains(out, "docker <defunct>") {
2239
-		c.Errorf("Unexpected defunct docker process")
2206
+	cmd = exec.Command(dockerBinary, "run", "-d", "-p", port+":80", "busybox", "top")
2207
+	out, _, err = runCommandWithOutput(cmd)
2208
+	if err == nil {
2209
+		c.Fatalf("Binding on used port must fail")
2240 2210
 	}
2241
-	if !strings.Contains(out, "docker-proxy -proto tcp -host-ip 0.0.0.0 -host-port 12345") {
2242
-		c.Errorf("Failed to find docker-proxy process, got %s", out)
2211
+	if !strings.Contains(out, "port is already allocated") {
2212
+		c.Fatalf("Out must be about \"port is already allocated\", got %s", out)
2243 2213
 	}
2244 2214
 }
2245 2215
 
... ...
@@ -37,6 +37,16 @@ type Daemon struct {
37 37
 	storageDriver  string
38 38
 	execDriver     string
39 39
 	wait           chan error
40
+	userlandProxy  bool
41
+}
42
+
43
+func enableUserlandProxy() bool {
44
+	if env := os.Getenv("DOCKER_USERLANDPROXY"); env != "" {
45
+		if val, err := strconv.ParseBool(env); err != nil {
46
+			return val
47
+		}
48
+	}
49
+	return true
40 50
 }
41 51
 
42 52
 // NewDaemon returns a Daemon instance to be used for testing.
... ...
@@ -58,11 +68,19 @@ func NewDaemon(c *check.C) *Daemon {
58 58
 		c.Fatalf("Could not create %s/graph directory", daemonFolder)
59 59
 	}
60 60
 
61
+	userlandProxy := true
62
+	if env := os.Getenv("DOCKER_USERLANDPROXY"); env != "" {
63
+		if val, err := strconv.ParseBool(env); err != nil {
64
+			userlandProxy = val
65
+		}
66
+	}
67
+
61 68
 	return &Daemon{
62 69
 		c:             c,
63 70
 		folder:        daemonFolder,
64 71
 		storageDriver: os.Getenv("DOCKER_GRAPHDRIVER"),
65 72
 		execDriver:    os.Getenv("DOCKER_EXECDRIVER"),
73
+		userlandProxy: userlandProxy,
66 74
 	}
67 75
 }
68 76
 
... ...
@@ -79,6 +97,7 @@ func (d *Daemon) Start(arg ...string) error {
79 79
 		"--daemon",
80 80
 		"--graph", fmt.Sprintf("%s/graph", d.folder),
81 81
 		"--pidfile", fmt.Sprintf("%s/docker.pid", d.folder),
82
+		fmt.Sprintf("--userland-proxy=%t", d.userlandProxy),
82 83
 	}
83 84
 
84 85
 	// If we don't explicitly set the log-level or debug flag(-D) then
... ...
@@ -14,7 +14,7 @@ func TestReloaded(t *testing.T) {
14 14
 	var err error
15 15
 	var fwdChain *Chain
16 16
 
17
-	fwdChain, err = NewChain("FWD", "lo", Filter)
17
+	fwdChain, err = NewChain("FWD", "lo", Filter, false)
18 18
 	if err != nil {
19 19
 		t.Fatal(err)
20 20
 	}
... ...
@@ -58,7 +58,7 @@ func initCheck() error {
58 58
 	return nil
59 59
 }
60 60
 
61
-func NewChain(name, bridge string, table Table) (*Chain, error) {
61
+func NewChain(name, bridge string, table Table, hairpinMode bool) (*Chain, error) {
62 62
 	c := &Chain{
63 63
 		Name:   name,
64 64
 		Bridge: bridge,
... ...
@@ -90,8 +90,10 @@ func NewChain(name, bridge string, table Table) (*Chain, error) {
90 90
 		}
91 91
 		output := []string{
92 92
 			"-m", "addrtype",
93
-			"--dst-type", "LOCAL",
94
-			"!", "--dst", "127.0.0.0/8"}
93
+			"--dst-type", "LOCAL"}
94
+		if !hairpinMode {
95
+			output = append(output, "!", "--dst", "127.0.0.0/8")
96
+		}
95 97
 		if !Exists(Nat, "OUTPUT", output...) {
96 98
 			if err := c.Output(Append, output...); err != nil {
97 99
 				return nil, fmt.Errorf("Failed to inject docker in OUTPUT chain: %s", err)
... ...
@@ -137,7 +139,6 @@ func (c *Chain) Forward(action Action, ip net.IP, port int, proto, destAddr stri
137 137
 		"-p", proto,
138 138
 		"-d", daddr,
139 139
 		"--dport", strconv.Itoa(port),
140
-		"!", "-i", c.Bridge,
141 140
 		"-j", "DNAT",
142 141
 		"--to-destination", net.JoinHostPort(destAddr, strconv.Itoa(destPort))); err != nil {
143 142
 		return err
... ...
@@ -16,12 +16,12 @@ var filterChain *Chain
16 16
 func TestNewChain(t *testing.T) {
17 17
 	var err error
18 18
 
19
-	natChain, err = NewChain(chainName, "lo", Nat)
19
+	natChain, err = NewChain(chainName, "lo", Nat, false)
20 20
 	if err != nil {
21 21
 		t.Fatal(err)
22 22
 	}
23 23
 
24
-	filterChain, err = NewChain(chainName, "lo", Filter)
24
+	filterChain, err = NewChain(chainName, "lo", Filter, false)
25 25
 	if err != nil {
26 26
 		t.Fatal(err)
27 27
 	}
... ...
@@ -40,7 +40,6 @@ func TestForward(t *testing.T) {
40 40
 	}
41 41
 
42 42
 	dnatRule := []string{
43
-		"!", "-i", filterChain.Bridge,
44 43
 		"-d", ip.String(),
45 44
 		"-p", proto,
46 45
 		"--dport", strconv.Itoa(port),