Browse code

Vendoring libnetwork v0.7.0-dev.5

Signed-off-by: Alessandro Boch <aboch@docker.com>

Alessandro Boch authored on 2016/03/09 11:47:02
Showing 47 changed files
... ...
@@ -29,14 +29,14 @@ clone git github.com/RackSec/srslog 6eb773f331e46fbba8eecb8e794e635e75fc04de
29 29
 clone git github.com/imdario/mergo 0.2.1
30 30
 
31 31
 #get libnetwork packages
32
-clone git github.com/docker/libnetwork v0.7.0-dev.3
32
+clone git github.com/docker/libnetwork v0.7.0-dev.5
33 33
 clone git github.com/armon/go-metrics eb0af217e5e9747e41dd5303755356b62d28e3ec
34 34
 clone git github.com/hashicorp/go-msgpack 71c2886f5a673a35f909803f38ece5810165097b
35 35
 clone git github.com/hashicorp/memberlist 9a1e242e454d2443df330bdd51a436d5a9058fc4
36 36
 clone git github.com/hashicorp/serf 7151adcef72687bf95f451a2e0ba15cb19412bf2
37 37
 clone git github.com/docker/libkv c2aac5dbbaa5c872211edea7c0f32b3bd67e7410
38 38
 clone git github.com/vishvananda/netns 604eaf189ee867d8c147fafc28def2394e878d25
39
-clone git github.com/vishvananda/netlink bfd70f556483c008636b920dda142fdaa0d59ef9
39
+clone git github.com/vishvananda/netlink 631962935bff4f3d20ff32a72e8944f6d2836a26
40 40
 clone git github.com/BurntSushi/toml f706d00e3de6abe700c994cdd545a1a4915af060
41 41
 clone git github.com/samuel/go-zookeeper d0e0d8e11f318e000a8cc434616d69e329edc374
42 42
 clone git github.com/deckarep/golang-set ef32fa3046d9f249d399f98ebaf9be944430fd1d
... ...
@@ -1432,7 +1432,7 @@ func (s *DockerSuite) TestDockerNetworkInternalMode(c *check.C) {
1432 1432
 	c.Assert(waitRun("second"), check.IsNil)
1433 1433
 	out, _, err := dockerCmdWithError("exec", "first", "ping", "-W", "4", "-c", "1", "www.google.com")
1434 1434
 	c.Assert(err, check.NotNil)
1435
-	c.Assert(out, checker.Contains, "100% packet loss")
1435
+	c.Assert(out, checker.Contains, "ping: bad address")
1436 1436
 	_, _, err = dockerCmdWithError("exec", "second", "ping", "-c", "1", "first")
1437 1437
 	c.Assert(err, check.IsNil)
1438 1438
 }
... ...
@@ -1,5 +1,25 @@
1 1
 # Changelog
2 2
 
3
+## 0.7.0-dev.5 (2016-03-08)
4
+- Fixes https://github.com/docker/docker/issues/20847
5
+- Fixes https://github.com/docker/docker/issues/20997
6
+- Fixes issues unveiled by docker integ test over 0.7.0-dev.4
7
+
8
+## 0.7.0-dev.4 (2016-03-07)
9
+- Changed ownership of exposed ports and port-mapping options from Endpoint to Sandbox
10
+- Implement DNS RR in the Docker embedded DNS server
11
+- Fixes https://github.com/docker/libnetwork/issues/984 (multi container overlay veth leak)
12
+- Libnetwork to program container's interface MAC address
13
+- Fixed bug in iptables.Exists() logic
14
+- Fixes https://github.com/docker/docker/issues/20694
15
+- Source external DNS queries from container namespace
16
+- Added inbuilt nil IPAM driver
17
+- Windows drivers integration fixes
18
+- Extract hostname from (hostname.domainname). Related to https://github.com/docker/docker/issues/14282
19
+- Fixed race in sandbox statistics read
20
+- Fixes https://github.com/docker/libnetwork/issues/892 (docker start fails when ipv6.disable=1)
21
+- Fixed error message on bridge network creation conflict
22
+
3 23
 ## 0.7.0-dev.3 (2016-02-17)
4 24
 - Fixes https://github.com/docker/docker/issues/20350
5 25
 - Fixes https://github.com/docker/docker/issues/20145
... ...
@@ -90,7 +110,7 @@
90 90
 - DEPRECATE service discovery from default bridge network
91 91
 - Introduced new network UX
92 92
 - Support for multiple networks in bridge driver
93
-- Local persistance with boltdb
93
+- Local persistence with boltdb
94 94
 
95 95
 ## 0.4.0 (2015-07-24)
96 96
 
... ...
@@ -17,6 +17,7 @@
17 17
 			"mrjana",
18 18
 			"mavenugo",
19 19
                         "sanimej",
20
+                        "chenchun",
20 21
 		]
21 22
 
22 23
 [people]
... ...
@@ -37,6 +38,11 @@
37 37
 	Email = "lk4d4@docker.com"
38 38
 	GitHub = "LK4D4"
39 39
 
40
+	[people.chenchun]
41
+	Name = "Chun Chen"
42
+	Email = "ramichen@tencent.com"
43
+	GitHub = "chenchun"
44
+
40 45
 	[people.icecrime]
41 46
 	Name = "Arnaud Porterie"
42 47
 	Email = "arnaud@docker.com"
... ...
@@ -163,7 +163,7 @@ func (s *sequence) toByteArray() ([]byte, error) {
163 163
 func (s *sequence) fromByteArray(data []byte) error {
164 164
 	l := len(data)
165 165
 	if l%12 != 0 {
166
-		return fmt.Errorf("cannot deserialize byte sequence of lenght %d (%v)", l, data)
166
+		return fmt.Errorf("cannot deserialize byte sequence of length %d (%v)", l, data)
167 167
 	}
168 168
 
169 169
 	p := s
... ...
@@ -170,7 +170,7 @@ func New(cfgOptions ...config.Option) (NetworkController, error) {
170 170
 
171 171
 	if c.cfg != nil && c.cfg.Cluster.Watcher != nil {
172 172
 		if err := c.initDiscovery(c.cfg.Cluster.Watcher); err != nil {
173
-			// Failing to initalize discovery is a bad situation to be in.
173
+			// Failing to initialize discovery is a bad situation to be in.
174 174
 			// But it cannot fail creating the Controller
175 175
 			log.Errorf("Failed to Initialize Discovery : %v", err)
176 176
 		}
... ...
@@ -31,7 +31,7 @@ type DataStore interface {
31 31
 	DeleteObjectAtomic(kvObject KVObject) error
32 32
 	// DeleteTree deletes a record
33 33
 	DeleteTree(kvObject KVObject) error
34
-	// Watchable returns whether the store is watchable are not
34
+	// Watchable returns whether the store is watchable or not
35 35
 	Watchable() bool
36 36
 	// Watch for changes on a KVObject
37 37
 	Watch(kvObject KVObject, stopCh <-chan struct{}) (<-chan KVObject, error)
... ...
@@ -3,7 +3,6 @@ package libnetwork
3 3
 import (
4 4
 	"fmt"
5 5
 
6
-	"github.com/docker/libnetwork/netlabel"
7 6
 	"github.com/docker/libnetwork/types"
8 7
 )
9 8
 
... ...
@@ -28,15 +27,15 @@ var procGwNetwork = make(chan (bool), 1)
28 28
    - its deleted when an endpoint with GW joins the container
29 29
 */
30 30
 
31
-func (sb *sandbox) setupDefaultGW(srcEp *endpoint) error {
32
-	var createOptions []EndpointOption
33
-	c := srcEp.getNetwork().getController()
31
+func (sb *sandbox) setupDefaultGW() error {
34 32
 
35 33
 	// check if the conitainer already has a GW endpoint
36 34
 	if ep := sb.getEndpointInGWNetwork(); ep != nil {
37 35
 		return nil
38 36
 	}
39 37
 
38
+	c := sb.controller
39
+
40 40
 	// Look for default gw network. In case of error (includes not found),
41 41
 	// retry and create it if needed in a serialized execution.
42 42
 	n, err := c.NetworkByName(libnGWNetwork)
... ...
@@ -46,19 +45,7 @@ func (sb *sandbox) setupDefaultGW(srcEp *endpoint) error {
46 46
 		}
47 47
 	}
48 48
 
49
-	if opt, ok := srcEp.generic[netlabel.PortMap]; ok {
50
-		if pb, ok := opt.([]types.PortBinding); ok {
51
-			createOptions = append(createOptions, CreateOptionPortMapping(pb))
52
-		}
53
-	}
54
-
55
-	if opt, ok := srcEp.generic[netlabel.ExposedPorts]; ok {
56
-		if exp, ok := opt.([]types.TransportPort); ok {
57
-			createOptions = append(createOptions, CreateOptionExposedPorts(exp))
58
-		}
59
-	}
60
-
61
-	createOptions = append(createOptions, CreateOptionAnonymous())
49
+	createOptions := []EndpointOption{CreateOptionAnonymous()}
62 50
 
63 51
 	eplen := gwEPlen
64 52
 	if len(sb.containerID) < gwEPlen {
... ...
@@ -74,9 +61,13 @@ func (sb *sandbox) setupDefaultGW(srcEp *endpoint) error {
74 74
 	if err := epLocal.sbJoin(sb); err != nil {
75 75
 		return fmt.Errorf("container %s: endpoint join on GW Network failed: %v", sb.containerID, err)
76 76
 	}
77
+
77 78
 	return nil
78 79
 }
79 80
 
81
+// If present, removes the endpoint connecting the sandbox to the default gw network.
82
+// Unless it is the endpoint designated to provide the external connectivity.
83
+// If the sandbox is being deleted, removes the endpoint unconditionally.
80 84
 func (sb *sandbox) clearDefaultGW() error {
81 85
 	var ep *endpoint
82 86
 
... ...
@@ -84,6 +75,10 @@ func (sb *sandbox) clearDefaultGW() error {
84 84
 		return nil
85 85
 	}
86 86
 
87
+	if ep == sb.getGatewayEndpoint() && !sb.inDelete {
88
+		return nil
89
+	}
90
+
87 91
 	if err := ep.sbLeave(sb, false); err != nil {
88 92
 		return fmt.Errorf("container %s: endpoint leaving GW Network failed: %v", sb.containerID, err)
89 93
 	}
... ...
@@ -98,7 +93,7 @@ func (sb *sandbox) needDefaultGW() bool {
98 98
 
99 99
 	for _, ep := range sb.getConnectedEndpoints() {
100 100
 		if ep.endpointInGWNetwork() {
101
-			continue
101
+			return false
102 102
 		}
103 103
 		if ep.getNetwork().Type() == "null" || ep.getNetwork().Type() == "host" {
104 104
 			continue
... ...
@@ -165,3 +160,16 @@ func (c *controller) defaultGwNetwork() (Network, error) {
165 165
 	}
166 166
 	return n, err
167 167
 }
168
+
169
+// Returns the endpoint which is providing external connectivity to the sandbox
170
+func (sb *sandbox) getGatewayEndpoint() *endpoint {
171
+	for _, ep := range sb.getConnectedEndpoints() {
172
+		if ep.getNetwork().Type() == "null" || ep.getNetwork().Type() == "host" {
173
+			continue
174
+		}
175
+		if len(ep.Gateway()) != 0 {
176
+			return ep
177
+		}
178
+	}
179
+	return nil
180
+}
... ...
@@ -16,7 +16,7 @@ type DiscoveryType int
16 16
 const (
17 17
 	// NodeDiscovery represents Node join/leave events provided by discovery
18 18
 	NodeDiscovery = iota + 1
19
-	// DatastoreConfig represents a add/remove datastore event
19
+	// DatastoreConfig represents an add/remove datastore event
20 20
 	DatastoreConfig
21 21
 )
22 22
 
... ...
@@ -42,6 +42,14 @@ type Driver interface {
42 42
 	// Leave method is invoked when a Sandbox detaches from an endpoint.
43 43
 	Leave(nid, eid string) error
44 44
 
45
+	// ProgramExternalConnectivity invokes the driver method which does the necessary
46
+	// programming to allow the external connectivity dictated by the passed options
47
+	ProgramExternalConnectivity(nid, eid string, options map[string]interface{}) error
48
+
49
+	// RevokeExternalConnectivity aks the driver to remove any external connectivity
50
+	// programming that was done so far
51
+	RevokeExternalConnectivity(nid, eid string) error
52
+
45 53
 	// Type returns the the type of this driver, the network type this driver manages
46 54
 	Type() string
47 55
 }
... ...
@@ -88,8 +96,8 @@ type JoinInfo interface {
88 88
 	// SetGatewayIPv6 sets the default IPv6 gateway when a container joins the endpoint.
89 89
 	SetGatewayIPv6(net.IP) error
90 90
 
91
-	// AddStaticRoute adds a routes to the sandbox.
92
-	// It may be used in addtion to or instead of a default gateway (as above).
91
+	// AddStaticRoute adds a route to the sandbox.
92
+	// It may be used in addition to or instead of a default gateway (as above).
93 93
 	AddStaticRoute(destination *net.IPNet, routeType int, nextHop net.IP) error
94 94
 
95 95
 	// DisableGatewayService tells libnetwork not to provide Default GW for the container
... ...
@@ -64,7 +64,7 @@ func (i *IPAMData) UnmarshalJSON(data []byte) error {
64 64
 	return nil
65 65
 }
66 66
 
67
-// Validate checks wheter the IPAMData structure contains congruent data
67
+// Validate checks whether the IPAMData structure contains congruent data
68 68
 func (i *IPAMData) Validate() error {
69 69
 	var isV6 bool
70 70
 	if i.Pool == nil {
... ...
@@ -93,7 +93,7 @@ func (i *IPAMData) Validate() error {
93 93
 	return nil
94 94
 }
95 95
 
96
-// IsV6 returns wheter this is an IPv6 IPAMData structure
96
+// IsV6 returns whether this is an IPv6 IPAMData structure
97 97
 func (i *IPAMData) IsV6() bool {
98 98
 	return nil == i.Pool.IP.To4()
99 99
 }
... ...
@@ -9,6 +9,7 @@ import (
9 9
 	"github.com/docker/libnetwork/netlabel"
10 10
 
11 11
 	builtinIpam "github.com/docker/libnetwork/ipams/builtin"
12
+	nullIpam "github.com/docker/libnetwork/ipams/null"
12 13
 	remoteIpam "github.com/docker/libnetwork/ipams/remote"
13 14
 )
14 15
 
... ...
@@ -73,6 +74,7 @@ func initIpams(ic ipamapi.Callback, lDs, gDs interface{}) error {
73 73
 	for _, fn := range [](func(ipamapi.Callback, interface{}, interface{}) error){
74 74
 		builtinIpam.Init,
75 75
 		remoteIpam.Init,
76
+		nullIpam.Init,
76 77
 	} {
77 78
 		if err := fn(ic, lDs, gDs); err != nil {
78 79
 			return err
... ...
@@ -74,9 +74,7 @@ type networkConfiguration struct {
74 74
 
75 75
 // endpointConfiguration represents the user specified configuration for the sandbox endpoint
76 76
 type endpointConfiguration struct {
77
-	MacAddress   net.HardwareAddr
78
-	PortBindings []types.PortBinding
79
-	ExposedPorts []types.TransportPort
77
+	MacAddress net.HardwareAddr
80 78
 }
81 79
 
82 80
 // containerConfiguration represents the user specified configuration for a container
... ...
@@ -85,6 +83,12 @@ type containerConfiguration struct {
85 85
 	ChildEndpoints  []string
86 86
 }
87 87
 
88
+// cnnectivityConfiguration represents the user specified configuration regarding the external connectivity
89
+type connectivityConfiguration struct {
90
+	PortBindings []types.PortBinding
91
+	ExposedPorts []types.TransportPort
92
+}
93
+
88 94
 type bridgeEndpoint struct {
89 95
 	id              string
90 96
 	srcName         string
... ...
@@ -93,6 +97,7 @@ type bridgeEndpoint struct {
93 93
 	macAddress      net.HardwareAddr
94 94
 	config          *endpointConfiguration // User specified parameters
95 95
 	containerConfig *containerConfiguration
96
+	extConnConfig   *connectivityConfiguration
96 97
 	portMapping     []types.PortBinding // Operation port bindings
97 98
 }
98 99
 
... ...
@@ -183,7 +188,7 @@ func (c *networkConfiguration) Conflicts(o *networkConfiguration) error {
183 183
 		return fmt.Errorf("same configuration")
184 184
 	}
185 185
 
186
-	// Also empty, becasue only one network with empty name is allowed
186
+	// Also empty, because only one network with empty name is allowed
187 187
 	if c.BridgeName == o.BridgeName {
188 188
 		return fmt.Errorf("networks have same bridge name")
189 189
 	}
... ...
@@ -450,7 +455,7 @@ func parseNetworkGenericOptions(data interface{}) (*networkConfiguration, error)
450 450
 
451 451
 func (c *networkConfiguration) processIPAM(id string, ipamV4Data, ipamV6Data []driverapi.IPAMData) error {
452 452
 	if len(ipamV4Data) > 1 || len(ipamV6Data) > 1 {
453
-		return types.ForbiddenErrorf("bridge driver doesnt support multiple subnets")
453
+		return types.ForbiddenErrorf("bridge driver doesn't support multiple subnets")
454 454
 	}
455 455
 
456 456
 	if len(ipamV4Data) == 0 {
... ...
@@ -543,6 +548,9 @@ func (d *driver) getNetworks() []*bridgeNetwork {
543 543
 
544 544
 // Create a new network using bridge plugin
545 545
 func (d *driver) CreateNetwork(id string, option map[string]interface{}, ipV4Data, ipV6Data []driverapi.IPAMData) error {
546
+	if len(ipV4Data) == 0 || ipV4Data[0].Pool.String() == "0.0.0.0/0" {
547
+		return types.BadRequestErrorf("ipv4 pool is empty")
548
+	}
546 549
 	// Sanity checks
547 550
 	d.Lock()
548 551
 	if _, ok := d.networks[id]; ok {
... ...
@@ -581,7 +589,7 @@ func (d *driver) createNetwork(config *networkConfiguration) error {
581 581
 		nw.Unlock()
582 582
 		if err := nwConfig.Conflicts(config); err != nil {
583 583
 			return types.ForbiddenErrorf("cannot create network %s (%s): conflicts with network %s (%s): %s",
584
-				nwConfig.BridgeName, config.ID, nw.id, nw.config.BridgeName, err.Error())
584
+				config.ID, config.BridgeName, nwConfig.ID, nwConfig.BridgeName, err.Error())
585 585
 		}
586 586
 	}
587 587
 
... ...
@@ -948,28 +956,19 @@ func (d *driver) CreateEndpoint(nid, eid string, ifInfo driverapi.InterfaceInfo,
948 948
 		}
949 949
 	}
950 950
 
951
-	// Create the sandbox side pipe interface
951
+	// Store the sandbox side pipe interface parameters
952 952
 	endpoint.srcName = containerIfName
953 953
 	endpoint.macAddress = ifInfo.MacAddress()
954 954
 	endpoint.addr = ifInfo.Address()
955 955
 	endpoint.addrv6 = ifInfo.AddressIPv6()
956 956
 
957
-	// Down the interface before configuring mac address.
958
-	if err = netlink.LinkSetDown(sbox); err != nil {
959
-		return fmt.Errorf("could not set link down for container interface %s: %v", containerIfName, err)
960
-	}
961
-
962
-	// Set the sbox's MAC. If specified, use the one configured by user, otherwise generate one based on IP.
957
+	// Set the sbox's MAC if not provided. If specified, use the one configured by user, otherwise generate one based on IP.
963 958
 	if endpoint.macAddress == nil {
964 959
 		endpoint.macAddress = electMacAddress(epConfig, endpoint.addr.IP)
965
-		if err := ifInfo.SetMacAddress(endpoint.macAddress); err != nil {
960
+		if err = ifInfo.SetMacAddress(endpoint.macAddress); err != nil {
966 961
 			return err
967 962
 		}
968 963
 	}
969
-	err = netlink.LinkSetHardwareAddr(sbox, endpoint.macAddress)
970
-	if err != nil {
971
-		return fmt.Errorf("could not set mac address for container interface %s: %v", containerIfName, err)
972
-	}
973 964
 
974 965
 	// Up the host interface after finishing all netlink configuration
975 966
 	if err = netlink.LinkSetUp(host); err != nil {
... ...
@@ -996,17 +995,11 @@ func (d *driver) CreateEndpoint(nid, eid string, ifInfo driverapi.InterfaceInfo,
996 996
 		}
997 997
 
998 998
 		endpoint.addrv6 = &net.IPNet{IP: ip6, Mask: network.Mask}
999
-		if err := ifInfo.SetIPAddress(endpoint.addrv6); err != nil {
999
+		if err = ifInfo.SetIPAddress(endpoint.addrv6); err != nil {
1000 1000
 			return err
1001 1001
 		}
1002 1002
 	}
1003 1003
 
1004
-	// Program any required port mapping and store them in the endpoint
1005
-	endpoint.portMapping, err = n.allocatePorts(epConfig, endpoint, config.DefaultBindingIP, d.config.EnableUserlandProxy)
1006
-	if err != nil {
1007
-		return err
1008
-	}
1009
-
1010 1004
 	return nil
1011 1005
 }
1012 1006
 
... ...
@@ -1061,9 +1054,6 @@ func (d *driver) DeleteEndpoint(nid, eid string) error {
1061 1061
 		}
1062 1062
 	}()
1063 1063
 
1064
-	// Remove port mappings. Do not stop endpoint delete on unmap failure
1065
-	n.releasePorts(ep)
1066
-
1067 1064
 	// Try removal of link. Discard error: it is a best effort.
1068 1065
 	// Also make sure defer does not see this error either.
1069 1066
 	if link, err := netlink.LinkByName(ep.srcName); err == nil {
... ...
@@ -1104,10 +1094,10 @@ func (d *driver) EndpointOperInfo(nid, eid string) (map[string]interface{}, erro
1104 1104
 
1105 1105
 	m := make(map[string]interface{})
1106 1106
 
1107
-	if ep.config.ExposedPorts != nil {
1107
+	if ep.extConnConfig != nil && ep.extConnConfig.ExposedPorts != nil {
1108 1108
 		// Return a copy of the config data
1109
-		epc := make([]types.TransportPort, 0, len(ep.config.ExposedPorts))
1110
-		for _, tp := range ep.config.ExposedPorts {
1109
+		epc := make([]types.TransportPort, 0, len(ep.extConnConfig.ExposedPorts))
1110
+		for _, tp := range ep.extConnConfig.ExposedPorts {
1111 1111
 			epc = append(epc, tp.GetCopy())
1112 1112
 		}
1113 1113
 		m[netlabel.ExposedPorts] = epc
... ...
@@ -1147,6 +1137,11 @@ func (d *driver) Join(nid, eid string, sboxKey string, jinfo driverapi.JoinInfo,
1147 1147
 		return EndpointNotFoundError(eid)
1148 1148
 	}
1149 1149
 
1150
+	endpoint.containerConfig, err = parseContainerOptions(options)
1151
+	if err != nil {
1152
+		return err
1153
+	}
1154
+
1150 1155
 	iNames := jinfo.InterfaceName()
1151 1156
 	err = iNames.SetNames(endpoint.srcName, containerVethPrefix)
1152 1157
 	if err != nil {
... ...
@@ -1163,10 +1158,6 @@ func (d *driver) Join(nid, eid string, sboxKey string, jinfo driverapi.JoinInfo,
1163 1163
 		return err
1164 1164
 	}
1165 1165
 
1166
-	if !network.config.EnableICC {
1167
-		return d.link(network, endpoint, options, true)
1168
-	}
1169
-
1170 1166
 	return nil
1171 1167
 }
1172 1168
 
... ...
@@ -1189,32 +1180,87 @@ func (d *driver) Leave(nid, eid string) error {
1189 1189
 	}
1190 1190
 
1191 1191
 	if !network.config.EnableICC {
1192
-		return d.link(network, endpoint, nil, false)
1192
+		if err = d.link(network, endpoint, false); err != nil {
1193
+			return err
1194
+		}
1193 1195
 	}
1194 1196
 
1195 1197
 	return nil
1196 1198
 }
1197 1199
 
1198
-func (d *driver) link(network *bridgeNetwork, endpoint *bridgeEndpoint, options map[string]interface{}, enable bool) error {
1199
-	var (
1200
-		cc  *containerConfiguration
1201
-		err error
1202
-	)
1200
+func (d *driver) ProgramExternalConnectivity(nid, eid string, options map[string]interface{}) error {
1201
+	defer osl.InitOSContext()()
1203 1202
 
1204
-	if enable {
1205
-		cc, err = parseContainerOptions(options)
1206
-		if err != nil {
1207
-			return err
1208
-		}
1209
-	} else {
1210
-		cc = endpoint.containerConfig
1203
+	network, err := d.getNetwork(nid)
1204
+	if err != nil {
1205
+		return err
1211 1206
 	}
1212 1207
 
1208
+	endpoint, err := network.getEndpoint(eid)
1209
+	if err != nil {
1210
+		return err
1211
+	}
1212
+
1213
+	if endpoint == nil {
1214
+		return EndpointNotFoundError(eid)
1215
+	}
1216
+
1217
+	endpoint.extConnConfig, err = parseConnectivityOptions(options)
1218
+	if err != nil {
1219
+		return err
1220
+	}
1221
+
1222
+	// Program any required port mapping and store them in the endpoint
1223
+	endpoint.portMapping, err = network.allocatePorts(endpoint, network.config.DefaultBindingIP, d.config.EnableUserlandProxy)
1224
+	if err != nil {
1225
+		return err
1226
+	}
1227
+
1228
+	if !network.config.EnableICC {
1229
+		return d.link(network, endpoint, true)
1230
+	}
1231
+
1232
+	return nil
1233
+}
1234
+
1235
+func (d *driver) RevokeExternalConnectivity(nid, eid string) error {
1236
+	defer osl.InitOSContext()()
1237
+
1238
+	network, err := d.getNetwork(nid)
1239
+	if err != nil {
1240
+		return err
1241
+	}
1242
+
1243
+	endpoint, err := network.getEndpoint(eid)
1244
+	if err != nil {
1245
+		return err
1246
+	}
1247
+
1248
+	if endpoint == nil {
1249
+		return EndpointNotFoundError(eid)
1250
+	}
1251
+
1252
+	err = network.releasePorts(endpoint)
1253
+	if err != nil {
1254
+		logrus.Warn(err)
1255
+	}
1256
+
1257
+	return nil
1258
+}
1259
+
1260
+func (d *driver) link(network *bridgeNetwork, endpoint *bridgeEndpoint, enable bool) error {
1261
+	var err error
1262
+
1263
+	cc := endpoint.containerConfig
1213 1264
 	if cc == nil {
1214 1265
 		return nil
1215 1266
 	}
1267
+	ec := endpoint.extConnConfig
1268
+	if ec == nil {
1269
+		return nil
1270
+	}
1216 1271
 
1217
-	if endpoint.config != nil && endpoint.config.ExposedPorts != nil {
1272
+	if ec.ExposedPorts != nil {
1218 1273
 		for _, p := range cc.ParentEndpoints {
1219 1274
 			var parentEndpoint *bridgeEndpoint
1220 1275
 			parentEndpoint, err = network.getEndpoint(p)
... ...
@@ -1228,7 +1274,7 @@ func (d *driver) link(network *bridgeNetwork, endpoint *bridgeEndpoint, options
1228 1228
 
1229 1229
 			l := newLink(parentEndpoint.addr.IP.String(),
1230 1230
 				endpoint.addr.IP.String(),
1231
-				endpoint.config.ExposedPorts, network.config.BridgeName)
1231
+				ec.ExposedPorts, network.config.BridgeName)
1232 1232
 			if enable {
1233 1233
 				err = l.Enable()
1234 1234
 				if err != nil {
... ...
@@ -1255,13 +1301,13 @@ func (d *driver) link(network *bridgeNetwork, endpoint *bridgeEndpoint, options
1255 1255
 			err = InvalidEndpointIDError(c)
1256 1256
 			return err
1257 1257
 		}
1258
-		if childEndpoint.config == nil || childEndpoint.config.ExposedPorts == nil {
1258
+		if childEndpoint.extConnConfig == nil || childEndpoint.extConnConfig.ExposedPorts == nil {
1259 1259
 			continue
1260 1260
 		}
1261 1261
 
1262 1262
 		l := newLink(endpoint.addr.IP.String(),
1263 1263
 			childEndpoint.addr.IP.String(),
1264
-			childEndpoint.config.ExposedPorts, network.config.BridgeName)
1264
+			childEndpoint.extConnConfig.ExposedPorts, network.config.BridgeName)
1265 1265
 		if enable {
1266 1266
 			err = l.Enable()
1267 1267
 			if err != nil {
... ...
@@ -1277,10 +1323,6 @@ func (d *driver) link(network *bridgeNetwork, endpoint *bridgeEndpoint, options
1277 1277
 		}
1278 1278
 	}
1279 1279
 
1280
-	if enable {
1281
-		endpoint.containerConfig = cc
1282
-	}
1283
-
1284 1280
 	return nil
1285 1281
 }
1286 1282
 
... ...
@@ -1313,22 +1355,6 @@ func parseEndpointOptions(epOptions map[string]interface{}) (*endpointConfigurat
1313 1313
 		}
1314 1314
 	}
1315 1315
 
1316
-	if opt, ok := epOptions[netlabel.PortMap]; ok {
1317
-		if bs, ok := opt.([]types.PortBinding); ok {
1318
-			ec.PortBindings = bs
1319
-		} else {
1320
-			return nil, &ErrInvalidEndpointConfig{}
1321
-		}
1322
-	}
1323
-
1324
-	if opt, ok := epOptions[netlabel.ExposedPorts]; ok {
1325
-		if ports, ok := opt.([]types.TransportPort); ok {
1326
-			ec.ExposedPorts = ports
1327
-		} else {
1328
-			return nil, &ErrInvalidEndpointConfig{}
1329
-		}
1330
-	}
1331
-
1332 1316
 	return ec, nil
1333 1317
 }
1334 1318
 
... ...
@@ -1354,6 +1380,32 @@ func parseContainerOptions(cOptions map[string]interface{}) (*containerConfigura
1354 1354
 	}
1355 1355
 }
1356 1356
 
1357
+func parseConnectivityOptions(cOptions map[string]interface{}) (*connectivityConfiguration, error) {
1358
+	if cOptions == nil {
1359
+		return nil, nil
1360
+	}
1361
+
1362
+	cc := &connectivityConfiguration{}
1363
+
1364
+	if opt, ok := cOptions[netlabel.PortMap]; ok {
1365
+		if pb, ok := opt.([]types.PortBinding); ok {
1366
+			cc.PortBindings = pb
1367
+		} else {
1368
+			return nil, types.BadRequestErrorf("Invalid port mapping data in connectivity configuration: %v", opt)
1369
+		}
1370
+	}
1371
+
1372
+	if opt, ok := cOptions[netlabel.ExposedPorts]; ok {
1373
+		if ports, ok := opt.([]types.TransportPort); ok {
1374
+			cc.ExposedPorts = ports
1375
+		} else {
1376
+			return nil, types.BadRequestErrorf("Invalid exposed ports data in connectivity configuration: %v", opt)
1377
+		}
1378
+	}
1379
+
1380
+	return cc, nil
1381
+}
1382
+
1357 1383
 func electMacAddress(epConfig *endpointConfiguration, ip net.IP) net.HardwareAddr {
1358 1384
 	if epConfig != nil && epConfig.MacAddress != nil {
1359 1385
 		return epConfig.MacAddress
... ...
@@ -14,8 +14,8 @@ var (
14 14
 	defaultBindingIP = net.IPv4(0, 0, 0, 0)
15 15
 )
16 16
 
17
-func (n *bridgeNetwork) allocatePorts(epConfig *endpointConfiguration, ep *bridgeEndpoint, reqDefBindIP net.IP, ulPxyEnabled bool) ([]types.PortBinding, error) {
18
-	if epConfig == nil || epConfig.PortBindings == nil {
17
+func (n *bridgeNetwork) allocatePorts(ep *bridgeEndpoint, reqDefBindIP net.IP, ulPxyEnabled bool) ([]types.PortBinding, error) {
18
+	if ep.extConnConfig == nil || ep.extConnConfig.PortBindings == nil {
19 19
 		return nil, nil
20 20
 	}
21 21
 
... ...
@@ -24,7 +24,7 @@ func (n *bridgeNetwork) allocatePorts(epConfig *endpointConfiguration, ep *bridg
24 24
 		defHostIP = reqDefBindIP
25 25
 	}
26 26
 
27
-	return n.allocatePortsInternal(epConfig.PortBindings, ep.addr.IP, defHostIP, ulPxyEnabled)
27
+	return n.allocatePortsInternal(ep.extConnConfig.PortBindings, ep.addr.IP, defHostIP, ulPxyEnabled)
28 28
 }
29 29
 
30 30
 func (n *bridgeNetwork) allocatePortsInternal(bindings []types.PortBinding, containerIP, defHostIP net.IP, ulPxyEnabled bool) ([]types.PortBinding, error) {
... ...
@@ -63,6 +63,14 @@ func (d *driver) Leave(nid, eid string) error {
63 63
 	return nil
64 64
 }
65 65
 
66
+func (d *driver) ProgramExternalConnectivity(nid, eid string, options map[string]interface{}) error {
67
+	return nil
68
+}
69
+
70
+func (d *driver) RevokeExternalConnectivity(nid, eid string) error {
71
+	return nil
72
+}
73
+
66 74
 func (d *driver) Type() string {
67 75
 	return networkType
68 76
 }
... ...
@@ -63,6 +63,14 @@ func (d *driver) Leave(nid, eid string) error {
63 63
 	return nil
64 64
 }
65 65
 
66
+func (d *driver) ProgramExternalConnectivity(nid, eid string, options map[string]interface{}) error {
67
+	return nil
68
+}
69
+
70
+func (d *driver) RevokeExternalConnectivity(nid, eid string) error {
71
+	return nil
72
+}
73
+
66 74
 func (d *driver) Type() string {
67 75
 	return networkType
68 76
 }
... ...
@@ -78,7 +78,7 @@ func setFilters(cname, brName string, remove bool) error {
78 78
 		opt = "-D"
79 79
 	}
80 80
 
81
-	// Everytime we set filters for a new subnet make sure to move the global overlay hook to the top of the both the OUTPUT and forward chains
81
+	// Every time we set filters for a new subnet make sure to move the global overlay hook to the top of the both the OUTPUT and forward chains
82 82
 	if !remove {
83 83
 		for _, chain := range []string{"OUTPUT", "FORWARD"} {
84 84
 			exists := iptables.Exists(iptables.Filter, chain, "-j", globalChain)
... ...
@@ -54,7 +54,7 @@ func (d *driver) Join(nid, eid string, sboxKey string, jinfo driverapi.JoinInfo,
54 54
 		return err
55 55
 	}
56 56
 
57
-	ep.ifName = overlayIfName
57
+	ep.ifName = containerIfName
58 58
 
59 59
 	// Set the container interface and its peer MTU to 1450 to allow
60 60
 	// for 50 bytes vxlan encap (inner eth header(14) + outer IP(20) +
... ...
@@ -63,6 +63,9 @@ func (d *driver) CreateNetwork(id string, option map[string]interface{}, ipV4Dat
63 63
 	if id == "" {
64 64
 		return fmt.Errorf("invalid network id")
65 65
 	}
66
+	if len(ipV4Data) == 0 || ipV4Data[0].Pool.String() == "0.0.0.0/0" {
67
+		return types.BadRequestErrorf("ipv4 pool is empty")
68
+	}
66 69
 
67 70
 	// Since we perform lazy configuration make sure we try
68 71
 	// configuring the driver when we enter CreateNetwork
... ...
@@ -111,6 +114,14 @@ func (d *driver) DeleteNetwork(nid string) error {
111 111
 	return n.releaseVxlanID()
112 112
 }
113 113
 
114
+func (d *driver) ProgramExternalConnectivity(nid, eid string, options map[string]interface{}) error {
115
+	return nil
116
+}
117
+
118
+func (d *driver) RevokeExternalConnectivity(nid, eid string) error {
119
+	return nil
120
+}
121
+
114 122
 func (n *network) incEndpointCount() {
115 123
 	n.Lock()
116 124
 	defer n.Unlock()
... ...
@@ -153,6 +153,29 @@ type LeaveResponse struct {
153 153
 	Response
154 154
 }
155 155
 
156
+// ProgramExternalConnectivityRequest describes the API for programming the external connectivity for the given endpoint.
157
+type ProgramExternalConnectivityRequest struct {
158
+	NetworkID  string
159
+	EndpointID string
160
+	Options    map[string]interface{}
161
+}
162
+
163
+// ProgramExternalConnectivityResponse is the answer to ProgramExternalConnectivityRequest.
164
+type ProgramExternalConnectivityResponse struct {
165
+	Response
166
+}
167
+
168
+// RevokeExternalConnectivityRequest describes the API for revoking the external connectivity for the given endpoint.
169
+type RevokeExternalConnectivityRequest struct {
170
+	NetworkID  string
171
+	EndpointID string
172
+}
173
+
174
+// RevokeExternalConnectivityResponse is the answer to RevokeExternalConnectivityRequest.
175
+type RevokeExternalConnectivityResponse struct {
176
+	Response
177
+}
178
+
156 179
 // DiscoveryNotification represents a discovery notification
157 180
 type DiscoveryNotification struct {
158 181
 	DiscoveryType discoverapi.DiscoveryType
... ...
@@ -3,6 +3,7 @@ package remote
3 3
 import (
4 4
 	"fmt"
5 5
 	"net"
6
+	"strings"
6 7
 
7 8
 	log "github.com/Sirupsen/logrus"
8 9
 	"github.com/docker/docker/pkg/plugins"
... ...
@@ -13,6 +14,10 @@ import (
13 13
 	"github.com/docker/libnetwork/types"
14 14
 )
15 15
 
16
+const (
17
+	missingMethod = "404 page not found"
18
+)
19
+
16 20
 type driver struct {
17 21
 	endpoint    *plugins.Client
18 22
 	networkType string
... ...
@@ -247,6 +252,35 @@ func (d *driver) Leave(nid, eid string) error {
247 247
 	return d.call("Leave", leave, &api.LeaveResponse{})
248 248
 }
249 249
 
250
+// ProgramExternalConnectivity is invoked to program the rules to allow external connectivity for the endpoint.
251
+func (d *driver) ProgramExternalConnectivity(nid, eid string, options map[string]interface{}) error {
252
+	data := &api.ProgramExternalConnectivityRequest{
253
+		NetworkID:  nid,
254
+		EndpointID: eid,
255
+		Options:    options,
256
+	}
257
+	err := d.call("ProgramExternalConnectivity", data, &api.ProgramExternalConnectivityResponse{})
258
+	if err != nil && strings.Contains(err.Error(), missingMethod) {
259
+		// It is not mandatory yet to support this method
260
+		return nil
261
+	}
262
+	return err
263
+}
264
+
265
+// RevokeExternalConnectivity method is invoked to remove any external connectivity programming related to the endpoint.
266
+func (d *driver) RevokeExternalConnectivity(nid, eid string) error {
267
+	data := &api.RevokeExternalConnectivityRequest{
268
+		NetworkID:  nid,
269
+		EndpointID: eid,
270
+	}
271
+	err := d.call("RevokeExternalConnectivity", data, &api.RevokeExternalConnectivityResponse{})
272
+	if err != nil && strings.Contains(err.Error(), missingMethod) {
273
+		// It is not mandatory yet to support this method
274
+		return nil
275
+	}
276
+	return err
277
+}
278
+
250 279
 func (d *driver) Type() string {
251 280
 	return d.networkType
252 281
 }
... ...
@@ -36,11 +36,20 @@ type networkConfiguration struct {
36 36
 	RDID  string
37 37
 }
38 38
 
39
+// endpointConfiguration represents the user specified configuration for the sandbox endpoint
40
+type endpointConfiguration struct {
41
+	MacAddress   net.HardwareAddr
42
+	PortBindings []types.PortBinding
43
+	ExposedPorts []types.TransportPort
44
+}
45
+
39 46
 type hnsEndpoint struct {
40
-	id         string
41
-	profileID  string
42
-	macAddress net.HardwareAddr
43
-	addr       *net.IPNet
47
+	id          string
48
+	profileID   string
49
+	macAddress  net.HardwareAddr
50
+	config      *endpointConfiguration // User specified parameters
51
+	portMapping []types.PortBinding    // Operation port bindings
52
+	addr        *net.IPNet
44 53
 }
45 54
 
46 55
 type hnsNetwork struct {
... ...
@@ -58,7 +67,7 @@ type driver struct {
58 58
 }
59 59
 
60 60
 func isValidNetworkType(networkType string) bool {
61
-	if "L2Bridge" == networkType || "L2Tunnel" == networkType || "NAT" == networkType || "Transparent" == networkType {
61
+	if "l2bridge" == networkType || "l2tunnel" == networkType || "nat" == networkType || "transparent" == networkType {
62 62
 		return true
63 63
 	}
64 64
 
... ...
@@ -126,7 +135,7 @@ func (d *driver) parseNetworkOptions(id string, genericOptions map[string]string
126 126
 
127 127
 func (c *networkConfiguration) processIPAM(id string, ipamV4Data, ipamV6Data []driverapi.IPAMData) error {
128 128
 	if len(ipamV6Data) > 0 {
129
-		return types.ForbiddenErrorf("windowsshim driver doesnt support v6 subnets")
129
+		return types.ForbiddenErrorf("windowsshim driver doesn't support v6 subnets")
130 130
 	}
131 131
 
132 132
 	if len(ipamV4Data) == 0 {
... ...
@@ -177,8 +186,11 @@ func (d *driver) CreateNetwork(id string, option map[string]interface{}, ipV4Dat
177 177
 
178 178
 		for _, ipData := range ipV4Data {
179 179
 			subnet := hcsshim.Subnet{
180
-				AddressPrefix:  ipData.Pool.String(),
181
-				GatewayAddress: ipData.Gateway.IP.String(),
180
+				AddressPrefix: ipData.Pool.String(),
181
+			}
182
+
183
+			if ipData.Gateway != nil {
184
+				subnet.GatewayAddress = ipData.Gateway.IP.String()
182 185
 			}
183 186
 
184 187
 			subnets = append(subnets, subnet)
... ...
@@ -276,6 +288,64 @@ func convertPortBindings(portBindings []types.PortBinding) ([]json.RawMessage, e
276 276
 	return pbs, nil
277 277
 }
278 278
 
279
+func parsePortBindingPolicies(policies []json.RawMessage) ([]types.PortBinding, error) {
280
+	var bindings []types.PortBinding
281
+	hcsPolicy := &hcsshim.NatPolicy{}
282
+
283
+	for _, elem := range policies {
284
+
285
+		if err := json.Unmarshal([]byte(elem), &hcsPolicy); err != nil || hcsPolicy.Type != "NAT" {
286
+			continue
287
+		}
288
+
289
+		binding := types.PortBinding{
290
+			HostPort:    hcsPolicy.ExternalPort,
291
+			HostPortEnd: hcsPolicy.ExternalPort,
292
+			Port:        hcsPolicy.InternalPort,
293
+			Proto:       types.ParseProtocol(hcsPolicy.Protocol),
294
+			HostIP:      net.IPv4(0, 0, 0, 0),
295
+		}
296
+
297
+		bindings = append(bindings, binding)
298
+	}
299
+
300
+	return bindings, nil
301
+}
302
+
303
+func parseEndpointOptions(epOptions map[string]interface{}) (*endpointConfiguration, error) {
304
+	if epOptions == nil {
305
+		return nil, nil
306
+	}
307
+
308
+	ec := &endpointConfiguration{}
309
+
310
+	if opt, ok := epOptions[netlabel.MacAddress]; ok {
311
+		if mac, ok := opt.(net.HardwareAddr); ok {
312
+			ec.MacAddress = mac
313
+		} else {
314
+			return nil, fmt.Errorf("Invalid endpoint configuration")
315
+		}
316
+	}
317
+
318
+	if opt, ok := epOptions[netlabel.PortMap]; ok {
319
+		if bs, ok := opt.([]types.PortBinding); ok {
320
+			ec.PortBindings = bs
321
+		} else {
322
+			return nil, fmt.Errorf("Invalid endpoint configuration")
323
+		}
324
+	}
325
+
326
+	if opt, ok := epOptions[netlabel.ExposedPorts]; ok {
327
+		if ports, ok := opt.([]types.TransportPort); ok {
328
+			ec.ExposedPorts = ports
329
+		} else {
330
+			return nil, fmt.Errorf("Invalid endpoint configuration")
331
+		}
332
+	}
333
+
334
+	return ec, nil
335
+}
336
+
279 337
 func (d *driver) CreateEndpoint(nid, eid string, ifInfo driverapi.InterfaceInfo, epOptions map[string]interface{}) error {
280 338
 	n, err := d.getNetwork(nid)
281 339
 	if err != nil {
... ...
@@ -292,16 +362,16 @@ func (d *driver) CreateEndpoint(nid, eid string, ifInfo driverapi.InterfaceInfo,
292 292
 		VirtualNetwork: n.config.HnsID,
293 293
 	}
294 294
 
295
-	// Convert the port mapping for the network
296
-	if opt, ok := epOptions[netlabel.PortMap]; ok {
297
-		if bs, ok := opt.([]types.PortBinding); ok {
298
-			endpointStruct.Policies, err = convertPortBindings(bs)
299
-			if err != nil {
300
-				return err
301
-			}
302
-		} else {
303
-			return fmt.Errorf("Invalid endpoint configuration for endpoint id%s", eid)
304
-		}
295
+	ec, err := parseEndpointOptions(epOptions)
296
+
297
+	if err != nil {
298
+		return err
299
+	}
300
+
301
+	endpointStruct.Policies, err = convertPortBindings(ec.PortBindings)
302
+
303
+	if err != nil {
304
+		return err
305 305
 	}
306 306
 
307 307
 	configurationb, err := json.Marshal(endpointStruct)
... ...
@@ -325,7 +395,16 @@ func (d *driver) CreateEndpoint(nid, eid string, ifInfo driverapi.InterfaceInfo,
325 325
 		addr:       &net.IPNet{IP: hnsresponse.IPAddress, Mask: hnsresponse.IPAddress.DefaultMask()},
326 326
 		macAddress: mac,
327 327
 	}
328
+
328 329
 	endpoint.profileID = hnsresponse.Id
330
+	endpoint.config = ec
331
+	endpoint.portMapping, err = parsePortBindingPolicies(hnsresponse.Policies)
332
+
333
+	if err != nil {
334
+		hcsshim.HNSEndpointRequest("DELETE", hnsresponse.Id, "")
335
+		return err
336
+	}
337
+
329 338
 	n.Lock()
330 339
 	n.endpoints[eid] = endpoint
331 340
 	n.Unlock()
... ...
@@ -365,13 +444,34 @@ func (d *driver) EndpointOperInfo(nid, eid string) (map[string]interface{}, erro
365 365
 		return nil, err
366 366
 	}
367 367
 
368
-	endpoint, err := network.getEndpoint(eid)
368
+	ep, err := network.getEndpoint(eid)
369 369
 	if err != nil {
370 370
 		return nil, err
371 371
 	}
372 372
 
373 373
 	data := make(map[string]interface{}, 1)
374
-	data["hnsid"] = endpoint.profileID
374
+	data["hnsid"] = ep.profileID
375
+	if ep.config.ExposedPorts != nil {
376
+		// Return a copy of the config data
377
+		epc := make([]types.TransportPort, 0, len(ep.config.ExposedPorts))
378
+		for _, tp := range ep.config.ExposedPorts {
379
+			epc = append(epc, tp.GetCopy())
380
+		}
381
+		data[netlabel.ExposedPorts] = epc
382
+	}
383
+
384
+	if ep.portMapping != nil {
385
+		// Return a copy of the operational data
386
+		pmc := make([]types.PortBinding, 0, len(ep.portMapping))
387
+		for _, pm := range ep.portMapping {
388
+			pmc = append(pmc, pm.GetCopy())
389
+		}
390
+		data[netlabel.PortMap] = pmc
391
+	}
392
+
393
+	if len(ep.macAddress) != 0 {
394
+		data[netlabel.MacAddress] = ep.macAddress
395
+	}
375 396
 	return data, nil
376 397
 }
377 398
 
... ...
@@ -412,6 +512,14 @@ func (d *driver) Leave(nid, eid string) error {
412 412
 	return nil
413 413
 }
414 414
 
415
+func (d *driver) ProgramExternalConnectivity(nid, eid string, options map[string]interface{}) error {
416
+	return nil
417
+}
418
+
419
+func (d *driver) RevokeExternalConnectivity(nid, eid string) error {
420
+	return nil
421
+}
422
+
415 423
 func (d *driver) Type() string {
416 424
 	return d.name
417 425
 }
... ...
@@ -8,9 +8,9 @@ import (
8 8
 func getInitializers() []initializer {
9 9
 	return []initializer{
10 10
 		{null.Init, "null"},
11
-		{windows.GetInit("Transparent"), "Transparent"},
12
-		{windows.GetInit("L2Bridge"), "L2Bridge"},
13
-		{windows.GetInit("L2Tunnel"), "L2Tunnel"},
14
-		{windows.GetInit("NAT"), "NAT"},
11
+		{windows.GetInit("transparent"), "transparent"},
12
+		{windows.GetInit("l2bridge"), "l2bridge"},
13
+		{windows.GetInit("l2tunnel"), "l2tunnel"},
14
+		{windows.GetInit("nat"), "nat"},
15 15
 	}
16 16
 }
... ...
@@ -359,22 +359,16 @@ func (ep *endpoint) Join(sbox Sandbox, options ...EndpointOption) error {
359 359
 	sb.joinLeaveStart()
360 360
 	defer sb.joinLeaveEnd()
361 361
 
362
-	return ep.sbJoin(sbox, options...)
362
+	return ep.sbJoin(sb, options...)
363 363
 }
364 364
 
365
-func (ep *endpoint) sbJoin(sbox Sandbox, options ...EndpointOption) error {
366
-	var err error
367
-	sb, ok := sbox.(*sandbox)
368
-	if !ok {
369
-		return types.BadRequestErrorf("not a valid Sandbox interface")
370
-	}
371
-
372
-	network, err := ep.getNetworkFromStore()
365
+func (ep *endpoint) sbJoin(sb *sandbox, options ...EndpointOption) error {
366
+	n, err := ep.getNetworkFromStore()
373 367
 	if err != nil {
374 368
 		return fmt.Errorf("failed to get network from store during join: %v", err)
375 369
 	}
376 370
 
377
-	ep, err = network.getEndpointFromStore(ep.ID())
371
+	ep, err = n.getEndpointFromStore(ep.ID())
378 372
 	if err != nil {
379 373
 		return fmt.Errorf("failed to get endpoint from store during join: %v", err)
380 374
 	}
... ...
@@ -384,11 +378,8 @@ func (ep *endpoint) sbJoin(sbox Sandbox, options ...EndpointOption) error {
384 384
 		ep.Unlock()
385 385
 		return types.ForbiddenErrorf("another container is attached to the same network endpoint")
386 386
 	}
387
-	ep.Unlock()
388
-
389
-	ep.Lock()
390
-	ep.network = network
391
-	ep.sandboxID = sbox.ID()
387
+	ep.network = n
388
+	ep.sandboxID = sb.ID()
392 389
 	ep.joinInfo = &endpointJoinInfo{}
393 390
 	epid := ep.id
394 391
 	ep.Unlock()
... ...
@@ -400,32 +391,29 @@ func (ep *endpoint) sbJoin(sbox Sandbox, options ...EndpointOption) error {
400 400
 		}
401 401
 	}()
402 402
 
403
-	network.Lock()
404
-	nid := network.id
405
-	network.Unlock()
403
+	nid := n.ID()
406 404
 
407 405
 	ep.processOptions(options...)
408 406
 
409
-	driver, err := network.driver(true)
407
+	d, err := n.driver(true)
410 408
 	if err != nil {
411 409
 		return fmt.Errorf("failed to join endpoint: %v", err)
412 410
 	}
413 411
 
414
-	err = driver.Join(nid, epid, sbox.Key(), ep, sbox.Labels())
412
+	err = d.Join(nid, epid, sb.Key(), ep, sb.Labels())
415 413
 	if err != nil {
416 414
 		return err
417 415
 	}
418 416
 	defer func() {
419 417
 		if err != nil {
420
-			// Do not alter global err variable, it's needed by the previous defer
421
-			if err := driver.Leave(nid, epid); err != nil {
418
+			if err := d.Leave(nid, epid); err != nil {
422 419
 				log.Warnf("driver leave failed while rolling back join: %v", err)
423 420
 			}
424 421
 		}
425 422
 	}()
426 423
 
427 424
 	// Watch for service records
428
-	network.getController().watchSvcRecord(ep)
425
+	n.getController().watchSvcRecord(ep)
429 426
 
430 427
 	address := ""
431 428
 	if ip := ep.getFirstInterfaceAddress(); ip != nil {
... ...
@@ -434,27 +422,23 @@ func (ep *endpoint) sbJoin(sbox Sandbox, options ...EndpointOption) error {
434 434
 	if err = sb.updateHostsFile(address); err != nil {
435 435
 		return err
436 436
 	}
437
-	if err = sb.updateDNS(network.enableIPv6); err != nil {
437
+	if err = sb.updateDNS(n.enableIPv6); err != nil {
438 438
 		return err
439 439
 	}
440 440
 
441
-	if err = network.getController().updateToStore(ep); err != nil {
441
+	if err = n.getController().updateToStore(ep); err != nil {
442 442
 		return err
443 443
 	}
444 444
 
445
+	// Current endpoint providing external connectivity for the sandbox
446
+	extEp := sb.getGatewayEndpoint()
447
+
445 448
 	sb.Lock()
446 449
 	heap.Push(&sb.endpoints, ep)
447 450
 	sb.Unlock()
448 451
 	defer func() {
449 452
 		if err != nil {
450
-			for i, e := range sb.getConnectedEndpoints() {
451
-				if e == ep {
452
-					sb.Lock()
453
-					heap.Remove(&sb.endpoints, i)
454
-					sb.Unlock()
455
-					return
456
-				}
457
-			}
453
+			sb.removeEndpoint(ep)
458 454
 		}
459 455
 	}()
460 456
 
... ...
@@ -463,9 +447,39 @@ func (ep *endpoint) sbJoin(sbox Sandbox, options ...EndpointOption) error {
463 463
 	}
464 464
 
465 465
 	if sb.needDefaultGW() {
466
-		return sb.setupDefaultGW(ep)
466
+		return sb.setupDefaultGW()
467 467
 	}
468
-	return nil
468
+
469
+	moveExtConn := sb.getGatewayEndpoint() != extEp
470
+
471
+	if moveExtConn {
472
+		if extEp != nil {
473
+			log.Debugf("Revoking external connectivity on endpoint %s (%s)", extEp.Name(), extEp.ID())
474
+			if err = d.RevokeExternalConnectivity(extEp.network.ID(), extEp.ID()); err != nil {
475
+				return types.InternalErrorf(
476
+					"driver failed revoking external connectivity on endpoint %s (%s): %v",
477
+					extEp.Name(), extEp.ID(), err)
478
+			}
479
+			defer func() {
480
+				if err != nil {
481
+					if e := d.ProgramExternalConnectivity(extEp.network.ID(), extEp.ID(), sb.Labels()); e != nil {
482
+						log.Warnf("Failed to roll-back external connectivity on endpoint %s (%s): %v",
483
+							extEp.Name(), extEp.ID(), e)
484
+					}
485
+				}
486
+			}()
487
+		}
488
+		if !n.internal {
489
+			log.Debugf("Programming external connectivity on endpoint %s (%s)", ep.Name(), ep.ID())
490
+			if err = d.ProgramExternalConnectivity(n.ID(), ep.ID(), sb.Labels()); err != nil {
491
+				return types.InternalErrorf(
492
+					"driver failed programming external connectivity on endpoint %s (%s): %v",
493
+					ep.Name(), ep.ID(), err)
494
+			}
495
+		}
496
+	}
497
+
498
+	return sb.clearDefaultGW()
469 499
 }
470 500
 
471 501
 func (ep *endpoint) rename(name string) error {
... ...
@@ -533,15 +547,10 @@ func (ep *endpoint) Leave(sbox Sandbox, options ...EndpointOption) error {
533 533
 	sb.joinLeaveStart()
534 534
 	defer sb.joinLeaveEnd()
535 535
 
536
-	return ep.sbLeave(sbox, false, options...)
536
+	return ep.sbLeave(sb, false, options...)
537 537
 }
538 538
 
539
-func (ep *endpoint) sbLeave(sbox Sandbox, force bool, options ...EndpointOption) error {
540
-	sb, ok := sbox.(*sandbox)
541
-	if !ok {
542
-		return types.BadRequestErrorf("not a valid Sandbox interface")
543
-	}
544
-
539
+func (ep *endpoint) sbLeave(sb *sandbox, force bool, options ...EndpointOption) error {
545 540
 	n, err := ep.getNetworkFromStore()
546 541
 	if err != nil {
547 542
 		return fmt.Errorf("failed to get network from store during leave: %v", err)
... ...
@@ -559,8 +568,8 @@ func (ep *endpoint) sbLeave(sbox Sandbox, force bool, options ...EndpointOption)
559 559
 	if sid == "" {
560 560
 		return types.ForbiddenErrorf("cannot leave endpoint with no attached sandbox")
561 561
 	}
562
-	if sid != sbox.ID() {
563
-		return types.ForbiddenErrorf("unexpected sandbox ID in leave request. Expected %s. Got %s", ep.sandboxID, sbox.ID())
562
+	if sid != sb.ID() {
563
+		return types.ForbiddenErrorf("unexpected sandbox ID in leave request. Expected %s. Got %s", ep.sandboxID, sb.ID())
564 564
 	}
565 565
 
566 566
 	ep.processOptions(options...)
... ...
@@ -575,7 +584,19 @@ func (ep *endpoint) sbLeave(sbox Sandbox, force bool, options ...EndpointOption)
575 575
 	ep.network = n
576 576
 	ep.Unlock()
577 577
 
578
+	// Current endpoint providing external connectivity to the sandbox
579
+	extEp := sb.getGatewayEndpoint()
580
+	moveExtConn := extEp != nil && (extEp.ID() == ep.ID())
581
+
578 582
 	if d != nil {
583
+		if moveExtConn {
584
+			log.Debugf("Revoking external connectivity on endpoint %s (%s)", ep.Name(), ep.ID())
585
+			if err := d.RevokeExternalConnectivity(n.id, ep.id); err != nil {
586
+				log.Warnf("driver failed revoking external connectivity on endpoint %s (%s): %v",
587
+					ep.Name(), ep.ID(), err)
588
+			}
589
+		}
590
+
579 591
 		if err := d.Leave(n.id, ep.id); err != nil {
580 592
 			if _, ok := err.(types.MaskableError); !ok {
581 593
 				log.Warnf("driver error disconnecting container %s : %v", ep.name, err)
... ...
@@ -597,7 +618,24 @@ func (ep *endpoint) sbLeave(sbox Sandbox, force bool, options ...EndpointOption)
597 597
 	}
598 598
 
599 599
 	sb.deleteHostsEntries(n.getSvcRecords(ep))
600
-	return nil
600
+	if !sb.inDelete && sb.needDefaultGW() {
601
+		if sb.getEPwithoutGateway() == nil {
602
+			return fmt.Errorf("endpoint without GW expected, but not found")
603
+		}
604
+		return sb.setupDefaultGW()
605
+	}
606
+
607
+	// New endpoint providing external connectivity for the sandbox
608
+	extEp = sb.getGatewayEndpoint()
609
+	if moveExtConn && extEp != nil {
610
+		log.Debugf("Programming external connectivity on endpoint %s (%s)", extEp.Name(), extEp.ID())
611
+		if err := d.ProgramExternalConnectivity(extEp.network.ID(), extEp.ID(), sb.Labels()); err != nil {
612
+			log.Warnf("driver failed programming external connectivity on endpoint %s: (%s) %v",
613
+				extEp.Name(), extEp.ID(), err)
614
+		}
615
+	}
616
+
617
+	return sb.clearDefaultGW()
601 618
 }
602 619
 
603 620
 func (n *network) validateForceDelete(locator string) error {
... ...
@@ -643,7 +681,7 @@ func (ep *endpoint) Delete(force bool) error {
643 643
 	}
644 644
 
645 645
 	if sb != nil {
646
-		if e := ep.sbLeave(sb, force); e != nil {
646
+		if e := ep.sbLeave(sb.(*sandbox), force); e != nil {
647 647
 			log.Warnf("failed to leave sandbox for endpoint %s : %v", name, e)
648 648
 		}
649 649
 	}
... ...
@@ -929,9 +967,13 @@ func (ep *endpoint) releaseAddress() {
929 929
 		log.Warnf("Failed to retrieve ipam driver to release interface address on delete of endpoint %s (%s): %v", ep.Name(), ep.ID(), err)
930 930
 		return
931 931
 	}
932
-	if err := ipam.ReleaseAddress(ep.iface.v4PoolID, ep.iface.addr.IP); err != nil {
933
-		log.Warnf("Failed to release ip address %s on delete of endpoint %s (%s): %v", ep.iface.addr.IP, ep.Name(), ep.ID(), err)
932
+
933
+	if ep.iface.addr != nil {
934
+		if err := ipam.ReleaseAddress(ep.iface.v4PoolID, ep.iface.addr.IP); err != nil {
935
+			log.Warnf("Failed to release ip address %s on delete of endpoint %s (%s): %v", ep.iface.addr.IP, ep.Name(), ep.ID(), err)
936
+		}
934 937
 	}
938
+
935 939
 	if ep.iface.addrv6 != nil && ep.iface.addrv6.IP.IsGlobalUnicast() {
936 940
 		if err := ipam.ReleaseAddress(ep.iface.v6PoolID, ep.iface.addrv6.IP); err != nil {
937 941
 			log.Warnf("Failed to release ip address %s on delete of endpoint %s (%s): %v", ep.iface.addrv6.IP, ep.Name(), ep.ID(), err)
... ...
@@ -8,6 +8,7 @@ import (
8 8
 	"io/ioutil"
9 9
 	"os"
10 10
 	"regexp"
11
+	"strings"
11 12
 	"sync"
12 13
 )
13 14
 
... ...
@@ -78,10 +79,17 @@ func Build(path, IP, hostname, domainname string, extraContent []Record) error {
78 78
 		//set main record
79 79
 		var mainRec Record
80 80
 		mainRec.IP = IP
81
+		// User might have provided a FQDN in hostname or split it across hostname
82
+		// and domainname.  We want the FQDN and the bare hostname.
83
+		fqdn := hostname
81 84
 		if domainname != "" {
82
-			mainRec.Hosts = fmt.Sprintf("%s.%s %s", hostname, domainname, hostname)
85
+			fqdn = fmt.Sprintf("%s.%s", fqdn, domainname)
86
+		}
87
+		parts := strings.SplitN(fqdn, ".", 2)
88
+		if len(parts) == 2 {
89
+			mainRec.Hosts = fmt.Sprintf("%s %s", fqdn, parts[0])
83 90
 		} else {
84
-			mainRec.Hosts = hostname
91
+			mainRec.Hosts = fqdn
85 92
 		}
86 93
 		if _, err := mainRec.WriteTo(content); err != nil {
87 94
 			return err
... ...
@@ -151,6 +159,10 @@ func Delete(path string, recs []Record) error {
151 151
 loop:
152 152
 	for s.Scan() {
153 153
 		b := s.Bytes()
154
+		if len(b) == 0 {
155
+			continue
156
+		}
157
+
154 158
 		if b[0] == '#' {
155 159
 			buf.Write(b)
156 160
 			buf.Write(eol)
... ...
@@ -8,7 +8,7 @@ import (
8 8
 	"github.com/docker/libnetwork/datastore"
9 9
 )
10 10
 
11
-// Idm manages the reservation/release of numerical ids from a contiguos set
11
+// Idm manages the reservation/release of numerical ids from a contiguous set
12 12
 type Idm struct {
13 13
 	start  uint64
14 14
 	end    uint64
... ...
@@ -40,7 +40,7 @@ type addrSpace struct {
40 40
 }
41 41
 
42 42
 // AddressRange specifies first and last ip ordinal which
43
-// identify a range in a a pool of addresses
43
+// identifies a range in a pool of addresses
44 44
 type AddressRange struct {
45 45
 	Sub        *net.IPNet
46 46
 	Start, End uint64
... ...
@@ -85,7 +85,7 @@ func (s *SubnetKey) String() string {
85 85
 	return k
86 86
 }
87 87
 
88
-// FromString populate the SubnetKey object reading it from string
88
+// FromString populates the SubnetKey object reading it from string
89 89
 func (s *SubnetKey) FromString(str string) error {
90 90
 	if str == "" || !strings.Contains(str, "/") {
91 91
 		return types.BadRequestErrorf("invalid string form for subnetkey: %s", str)
... ...
@@ -62,7 +62,7 @@ func getAddressVersion(ip net.IP) ipVersion {
62 62
 }
63 63
 
64 64
 // Adds the ordinal IP to the current array
65
-// 192.168.0.0 + 53 => 192.168.53
65
+// 192.168.0.0 + 53 => 192.168.0.53
66 66
 func addIntToIP(array []byte, ordinal uint64) {
67 67
 	for i := len(array) - 1; i >= 0; i-- {
68 68
 		array[i] |= (byte)(ordinal & 0xff)
... ...
@@ -15,6 +15,8 @@ import (
15 15
 const (
16 16
 	// DefaultIPAM is the name of the built-in default ipam driver
17 17
 	DefaultIPAM = "default"
18
+	// NullIPAM is the name of the built-in null ipam driver
19
+	NullIPAM = "null"
18 20
 	// PluginEndpointType represents the Endpoint Type used by Plugin system
19 21
 	PluginEndpointType = "IpamDriver"
20 22
 	// RequestAddressType represents the Address Type used when requesting an address
... ...
@@ -33,7 +35,7 @@ type Callback interface {
33 33
  * IPAM Errors
34 34
  **************/
35 35
 
36
-// Weel-known errors returned by IPAM
36
+// Well-known errors returned by IPAM
37 37
 var (
38 38
 	ErrIpamInternalError   = types.InternalErrorf("IPAM Internal Error")
39 39
 	ErrInvalidAddressSpace = types.BadRequestErrorf("Invalid Address Space")
40 40
new file mode 100644
... ...
@@ -0,0 +1,71 @@
0
+// Package null implements the null ipam driver. Null ipam driver satisfies ipamapi contract,
1
+// but does not effectively reserve/allocate any address pool or address
2
+package null
3
+
4
+import (
5
+	"fmt"
6
+	"net"
7
+
8
+	"github.com/docker/libnetwork/discoverapi"
9
+	"github.com/docker/libnetwork/ipamapi"
10
+	"github.com/docker/libnetwork/types"
11
+)
12
+
13
+var (
14
+	defaultAS      = "null"
15
+	defaultPool, _ = types.ParseCIDR("0.0.0.0/0")
16
+	defaultPoolID  = fmt.Sprintf("%s/%s", defaultAS, defaultPool.String())
17
+)
18
+
19
+type allocator struct{}
20
+
21
+func (a *allocator) GetDefaultAddressSpaces() (string, string, error) {
22
+	return defaultAS, defaultAS, nil
23
+}
24
+
25
+func (a *allocator) RequestPool(addressSpace, pool, subPool string, options map[string]string, v6 bool) (string, *net.IPNet, map[string]string, error) {
26
+	if addressSpace != defaultAS {
27
+		return "", nil, nil, types.BadRequestErrorf("unknown address space: %s", addressSpace)
28
+	}
29
+	if pool != "" {
30
+		return "", nil, nil, types.BadRequestErrorf("null ipam driver does not handle specific address pool requests")
31
+	}
32
+	if subPool != "" {
33
+		return "", nil, nil, types.BadRequestErrorf("null ipam driver does not handle specific address subpool requests")
34
+	}
35
+	if v6 {
36
+		return "", nil, nil, types.BadRequestErrorf("null ipam driver does not handle IPv6 address pool pool requests")
37
+	}
38
+	return defaultPoolID, defaultPool, nil, nil
39
+}
40
+
41
+func (a *allocator) ReleasePool(poolID string) error {
42
+	return nil
43
+}
44
+
45
+func (a *allocator) RequestAddress(poolID string, ip net.IP, opts map[string]string) (*net.IPNet, map[string]string, error) {
46
+	if poolID != defaultPoolID {
47
+		return nil, nil, types.BadRequestErrorf("unknown pool id: %s", poolID)
48
+	}
49
+	return nil, nil, nil
50
+}
51
+
52
+func (a *allocator) ReleaseAddress(poolID string, ip net.IP) error {
53
+	if poolID != defaultPoolID {
54
+		return types.BadRequestErrorf("unknown pool id: %s", poolID)
55
+	}
56
+	return nil
57
+}
58
+
59
+func (a *allocator) DiscoverNew(dType discoverapi.DiscoveryType, data interface{}) error {
60
+	return nil
61
+}
62
+
63
+func (a *allocator) DiscoverDelete(dType discoverapi.DiscoveryType, data interface{}) error {
64
+	return nil
65
+}
66
+
67
+// Init registers a remote ipam when its plugin is activated
68
+func Init(ic ipamapi.Callback, l, g interface{}) error {
69
+	return ic.RegisterIpamDriver(ipamapi.NullIPAM, &allocator{})
70
+}
... ...
@@ -6,6 +6,7 @@ import (
6 6
 	log "github.com/Sirupsen/logrus"
7 7
 	"github.com/docker/libnetwork/discoverapi"
8 8
 	"github.com/docker/libnetwork/ipamapi"
9
+	"github.com/docker/libnetwork/netlabel"
9 10
 	"github.com/docker/libnetwork/types"
10 11
 )
11 12
 
... ...
@@ -33,7 +34,7 @@ func (a *allocator) GetDefaultAddressSpaces() (string, string, error) {
33 33
 }
34 34
 
35 35
 // RequestPool returns an address pool along with its unique id. This is a null ipam driver. It allocates the
36
-// subnet user asked and does not validate anything. Doesnt support subpool allocation
36
+// subnet user asked and does not validate anything. Doesn't support subpool allocation
37 37
 func (a *allocator) RequestPool(addressSpace, pool, subPool string, options map[string]string, v6 bool) (string, *net.IPNet, map[string]string, error) {
38 38
 	log.Debugf("RequestPool(%s, %s, %s, %v, %t)", addressSpace, pool, subPool, options, v6)
39 39
 	if subPool != "" || v6 {
... ...
@@ -64,14 +65,19 @@ func (a *allocator) ReleasePool(poolID string) error {
64 64
 // RequestAddress returns an address from the specified pool ID.
65 65
 // Always allocate the 0.0.0.0/32 ip if no preferred address was specified
66 66
 func (a *allocator) RequestAddress(poolID string, prefAddress net.IP, opts map[string]string) (*net.IPNet, map[string]string, error) {
67
-	log.Debugf("RequestAddress(%s, %v, %v) %s", poolID, prefAddress, opts, opts["RequestAddressType"])
67
+	log.Debugf("RequestAddress(%s, %v, %v)", poolID, prefAddress, opts)
68 68
 	_, ipNet, err := net.ParseCIDR(poolID)
69 69
 
70 70
 	if err != nil {
71 71
 		return nil, nil, err
72 72
 	}
73
-	if prefAddress == nil {
73
+
74
+	// TODO Windows: Remove this once the bug in docker daemon is fixed
75
+	// that causes it to throw an exception on nil gateway
76
+	if opts[ipamapi.RequestAddressType] == netlabel.Gateway {
74 77
 		return ipNet, nil, nil
78
+	} else if prefAddress == nil {
79
+		return nil, nil, nil
75 80
 	}
76 81
 	return &net.IPNet{IP: prefAddress, Mask: ipNet.Mask}, nil, nil
77 82
 }
... ...
@@ -5,6 +5,7 @@ import (
5 5
 	"fmt"
6 6
 	"net"
7 7
 	"os/exec"
8
+	"regexp"
8 9
 	"strconv"
9 10
 	"strings"
10 11
 	"sync"
... ...
@@ -36,6 +37,7 @@ const (
36 36
 var (
37 37
 	iptablesPath  string
38 38
 	supportsXlock = false
39
+	supportsCOpt  = false
39 40
 	// used to lock iptables commands if xtables lock is not supported
40 41
 	bestEffortLock sync.Mutex
41 42
 	// ErrIptablesNotFound is returned when the rule is not found.
... ...
@@ -60,7 +62,6 @@ func (e ChainError) Error() string {
60 60
 }
61 61
 
62 62
 func initCheck() error {
63
-
64 63
 	if iptablesPath == "" {
65 64
 		path, err := exec.LookPath("iptables")
66 65
 		if err != nil {
... ...
@@ -68,6 +69,12 @@ func initCheck() error {
68 68
 		}
69 69
 		iptablesPath = path
70 70
 		supportsXlock = exec.Command(iptablesPath, "--wait", "-L", "-n").Run() == nil
71
+		mj, mn, mc, err := GetVersion()
72
+		if err != nil {
73
+			logrus.Warnf("Failed to read iptables version: %v", err)
74
+			return nil
75
+		}
76
+		supportsCOpt = supportsCOption(mj, mn, mc)
71 77
 	}
72 78
 	return nil
73 79
 }
... ...
@@ -299,20 +306,21 @@ func Exists(table Table, chain string, rule ...string) bool {
299 299
 		table = Filter
300 300
 	}
301 301
 
302
-	// iptables -C, --check option was added in v.1.4.11
303
-	// http://ftp.netfilter.org/pub/iptables/changes-iptables-1.4.11.txt
302
+	initCheck()
304 303
 
305
-	// try -C
306
-	// if exit status is 0 then return true, the rule exists
307
-	if _, err := Raw(append([]string{
308
-		"-t", string(table), "-C", chain}, rule...)...); err == nil {
309
-		return true
304
+	if supportsCOpt {
305
+		// if exit status is 0 then return true, the rule exists
306
+		_, err := Raw(append([]string{"-t", string(table), "-C", chain}, rule...)...)
307
+		return err == nil
310 308
 	}
311 309
 
312
-	// parse "iptables -S" for the rule (this checks rules in a specific chain
313
-	// in a specific table)
314
-	ruleString := strings.Join(rule, " ")
315
-	ruleString = chain + " " + ruleString
310
+	// parse "iptables -S" for the rule (it checks rules in a specific chain
311
+	// in a specific table and it is very unreliable)
312
+	return existsRaw(table, chain, rule...)
313
+}
314
+
315
+func existsRaw(table Table, chain string, rule ...string) bool {
316
+	ruleString := fmt.Sprintf("%s %s\n", chain, strings.Join(rule, " "))
316 317
 	existingRules, _ := exec.Command(iptablesPath, "-t", string(table), "-S", chain).Output()
317 318
 
318 319
 	return strings.Contains(string(existingRules), ruleString)
... ...
@@ -380,3 +388,25 @@ func ExistChain(chain string, table Table) bool {
380 380
 	}
381 381
 	return false
382 382
 }
383
+
384
+// GetVersion reads the iptables version numbers
385
+func GetVersion() (major, minor, micro int, err error) {
386
+	out, err := Raw("--version")
387
+	if err == nil {
388
+		major, minor, micro = parseVersionNumbers(string(out))
389
+	}
390
+	return
391
+}
392
+
393
+func parseVersionNumbers(input string) (major, minor, micro int) {
394
+	re := regexp.MustCompile(`v\d*.\d*.\d*`)
395
+	line := re.FindString(input)
396
+	fmt.Sscanf(line, "v%d.%d.%d", &major, &minor, &micro)
397
+	return
398
+}
399
+
400
+// iptables -C, --check option was added in v.1.4.11
401
+// http://ftp.netfilter.org/pub/iptables/changes-iptables-1.4.11.txt
402
+func supportsCOption(mj, mn, mc int) bool {
403
+	return mj > 1 || (mj == 1 && (mn > 4 || (mn == 4 && mc >= 11)))
404
+}
... ...
@@ -600,7 +600,7 @@ func (n *network) driver(load bool) (driverapi.Driver, error) {
600 600
 			return nil, err
601 601
 		}
602 602
 	} else if !ok {
603
-		// dont fail if driver loading is not required
603
+		// don't fail if driver loading is not required
604 604
 		return nil, nil
605 605
 	}
606 606
 
... ...
@@ -851,14 +851,25 @@ func (n *network) updateSvcRecord(ep *endpoint, localEps []*endpoint, isAdd bool
851 851
 	if iface := ep.Iface(); iface.Address() != nil {
852 852
 		myAliases := ep.MyAliases()
853 853
 		if isAdd {
854
-			if !ep.isAnonymous() {
854
+			// If anonymous endpoint has an alias use the first alias
855
+			// for ip->name mapping. Not having the reverse mapping
856
+			// breaks some apps
857
+			if ep.isAnonymous() {
858
+				if len(myAliases) > 0 {
859
+					n.addSvcRecords(myAliases[0], iface.Address().IP, true)
860
+				}
861
+			} else {
855 862
 				n.addSvcRecords(epName, iface.Address().IP, true)
856 863
 			}
857 864
 			for _, alias := range myAliases {
858 865
 				n.addSvcRecords(alias, iface.Address().IP, false)
859 866
 			}
860 867
 		} else {
861
-			if !ep.isAnonymous() {
868
+			if ep.isAnonymous() {
869
+				if len(myAliases) > 0 {
870
+					n.deleteSvcRecords(myAliases[0], iface.Address().IP, true)
871
+				}
872
+			} else {
862 873
 				n.deleteSvcRecords(epName, iface.Address().IP, true)
863 874
 			}
864 875
 			for _, alias := range myAliases {
... ...
@@ -21,6 +21,7 @@ type nwIface struct {
21 21
 	dstName     string
22 22
 	master      string
23 23
 	dstMaster   string
24
+	mac         net.HardwareAddr
24 25
 	address     *net.IPNet
25 26
 	addressIPv6 *net.IPNet
26 27
 	routes      []*net.IPNet
... ...
@@ -64,6 +65,13 @@ func (i *nwIface) Master() string {
64 64
 	return i.master
65 65
 }
66 66
 
67
+func (i *nwIface) MacAddress() net.HardwareAddr {
68
+	i.Lock()
69
+	defer i.Unlock()
70
+
71
+	return types.GetMacCopy(i.mac)
72
+}
73
+
67 74
 func (i *nwIface) Address() *net.IPNet {
68 75
 	i.Lock()
69 76
 	defer i.Unlock()
... ...
@@ -304,6 +312,7 @@ func configureInterface(iface netlink.Link, i *nwIface) error {
304 304
 		ErrMessage string
305 305
 	}{
306 306
 		{setInterfaceName, fmt.Sprintf("error renaming interface %q to %q", ifaceName, i.DstName())},
307
+		{setInterfaceMAC, fmt.Sprintf("error setting interface %q MAC to %q", ifaceName, i.MacAddress())},
307 308
 		{setInterfaceIP, fmt.Sprintf("error setting interface %q IP to %q", ifaceName, i.Address())},
308 309
 		{setInterfaceIPv6, fmt.Sprintf("error setting interface %q IPv6 to %q", ifaceName, i.AddressIPv6())},
309 310
 		{setInterfaceMaster, fmt.Sprintf("error setting interface %q master to %q", ifaceName, i.DstMaster())},
... ...
@@ -326,6 +335,13 @@ func setInterfaceMaster(iface netlink.Link, i *nwIface) error {
326 326
 		LinkAttrs: netlink.LinkAttrs{Name: i.DstMaster()}})
327 327
 }
328 328
 
329
+func setInterfaceMAC(iface netlink.Link, i *nwIface) error {
330
+	if i.MacAddress() == nil {
331
+		return nil
332
+	}
333
+	return netlink.LinkSetHardwareAddr(iface, i.MacAddress())
334
+}
335
+
329 336
 func setInterfaceIP(iface netlink.Link, i *nwIface) error {
330 337
 	if i.Address() == nil {
331 338
 		return nil
... ...
@@ -42,6 +42,12 @@ func (n *networkNamespace) Master(name string) IfaceOption {
42 42
 	}
43 43
 }
44 44
 
45
+func (n *networkNamespace) MacAddress(mac net.HardwareAddr) IfaceOption {
46
+	return func(i *nwIface) {
47
+		i.mac = mac
48
+	}
49
+}
50
+
45 51
 func (n *networkNamespace) Address(addr *net.IPNet) IfaceOption {
46 52
 	return func(i *nwIface) {
47 53
 		i.address = addr
... ...
@@ -76,6 +76,9 @@ type IfaceOptionSetter interface {
76 76
 	// Bridge returns an option setter to set if the interface is a bridge.
77 77
 	Bridge(bool) IfaceOption
78 78
 
79
+	// MacAddress returns an option setter to set the MAC address.
80
+	MacAddress(net.HardwareAddr) IfaceOption
81
+
79 82
 	// Address returns an option setter to set IPv4 address.
80 83
 	Address(*net.IPNet) IfaceOption
81 84
 
... ...
@@ -2,8 +2,11 @@ package libnetwork
2 2
 
3 3
 import (
4 4
 	"fmt"
5
+	"math/rand"
5 6
 	"net"
6 7
 	"strings"
8
+	"sync"
9
+	"time"
7 10
 
8 11
 	log "github.com/Sirupsen/logrus"
9 12
 	"github.com/docker/libnetwork/iptables"
... ...
@@ -31,23 +34,35 @@ type Resolver interface {
31 31
 }
32 32
 
33 33
 const (
34
-	resolverIP    = "127.0.0.11"
35
-	dnsPort       = "53"
36
-	ptrIPv4domain = ".in-addr.arpa."
37
-	ptrIPv6domain = ".ip6.arpa."
38
-	respTTL       = 600
39
-	maxExtDNS     = 3 //max number of external servers to try
34
+	resolverIP      = "127.0.0.11"
35
+	dnsPort         = "53"
36
+	ptrIPv4domain   = ".in-addr.arpa."
37
+	ptrIPv6domain   = ".ip6.arpa."
38
+	respTTL         = 600
39
+	maxExtDNS       = 3 //max number of external servers to try
40
+	extIOTimeout    = 3 * time.Second
41
+	defaultRespSize = 512
40 42
 )
41 43
 
44
+type extDNSEntry struct {
45
+	ipStr   string
46
+	extConn net.Conn
47
+	extOnce sync.Once
48
+}
49
+
42 50
 // resolver implements the Resolver interface
43 51
 type resolver struct {
44
-	sb        *sandbox
45
-	extDNS    []string
46
-	server    *dns.Server
47
-	conn      *net.UDPConn
48
-	tcpServer *dns.Server
49
-	tcpListen *net.TCPListener
50
-	err       error
52
+	sb         *sandbox
53
+	extDNSList [maxExtDNS]extDNSEntry
54
+	server     *dns.Server
55
+	conn       *net.UDPConn
56
+	tcpServer  *dns.Server
57
+	tcpListen  *net.TCPListener
58
+	err        error
59
+}
60
+
61
+func init() {
62
+	rand.Seed(time.Now().Unix())
51 63
 }
52 64
 
53 65
 // NewResolver creates a new instance of the Resolver
... ...
@@ -136,7 +151,13 @@ func (r *resolver) Stop() {
136 136
 }
137 137
 
138 138
 func (r *resolver) SetExtServers(dns []string) {
139
-	r.extDNS = dns
139
+	l := len(dns)
140
+	if l > maxExtDNS {
141
+		l = maxExtDNS
142
+	}
143
+	for i := 0; i < l; i++ {
144
+		r.extDNSList[i].ipStr = dns[i]
145
+	}
140 146
 }
141 147
 
142 148
 func (r *resolver) NameServer() string {
... ...
@@ -151,22 +172,36 @@ func setCommonFlags(msg *dns.Msg) {
151 151
 	msg.RecursionAvailable = true
152 152
 }
153 153
 
154
+func shuffleAddr(addr []net.IP) []net.IP {
155
+	for i := len(addr) - 1; i > 0; i-- {
156
+		r := rand.Intn(i + 1)
157
+		addr[i], addr[r] = addr[r], addr[i]
158
+	}
159
+	return addr
160
+}
161
+
154 162
 func (r *resolver) handleIPv4Query(name string, query *dns.Msg) (*dns.Msg, error) {
155 163
 	addr := r.sb.ResolveName(name)
156 164
 	if addr == nil {
157 165
 		return nil, nil
158 166
 	}
159 167
 
160
-	log.Debugf("Lookup for %s: IP %s", name, addr.String())
168
+	log.Debugf("Lookup for %s: IP %v", name, addr)
161 169
 
162 170
 	resp := new(dns.Msg)
163 171
 	resp.SetReply(query)
164 172
 	setCommonFlags(resp)
165 173
 
166
-	rr := new(dns.A)
167
-	rr.Hdr = dns.RR_Header{Name: name, Rrtype: dns.TypeA, Class: dns.ClassINET, Ttl: respTTL}
168
-	rr.A = addr
169
-	resp.Answer = append(resp.Answer, rr)
174
+	if len(addr) > 1 {
175
+		addr = shuffleAddr(addr)
176
+	}
177
+
178
+	for _, ip := range addr {
179
+		rr := new(dns.A)
180
+		rr.Hdr = dns.RR_Header{Name: name, Rrtype: dns.TypeA, Class: dns.ClassINET, Ttl: respTTL}
181
+		rr.A = ip
182
+		resp.Answer = append(resp.Answer, rr)
183
+	}
170 184
 	return resp, nil
171 185
 }
172 186
 
... ...
@@ -200,10 +235,23 @@ func (r *resolver) handlePTRQuery(ptr string, query *dns.Msg) (*dns.Msg, error)
200 200
 	return resp, nil
201 201
 }
202 202
 
203
+func truncateResp(resp *dns.Msg, maxSize int, isTCP bool) {
204
+	if !isTCP {
205
+		resp.Truncated = true
206
+	}
207
+
208
+	// trim the Answer RRs one by one till the whole message fits
209
+	// within the reply size
210
+	for resp.Len() > maxSize {
211
+		resp.Answer = resp.Answer[:len(resp.Answer)-1]
212
+	}
213
+}
214
+
203 215
 func (r *resolver) ServeDNS(w dns.ResponseWriter, query *dns.Msg) {
204 216
 	var (
205
-		resp *dns.Msg
206
-		err  error
217
+		extConn net.Conn
218
+		resp    *dns.Msg
219
+		err     error
207 220
 	)
208 221
 
209 222
 	if query == nil || len(query.Question) == 0 {
... ...
@@ -221,28 +269,82 @@ func (r *resolver) ServeDNS(w dns.ResponseWriter, query *dns.Msg) {
221 221
 		return
222 222
 	}
223 223
 
224
-	if resp == nil {
225
-		if len(r.extDNS) == 0 {
226
-			return
224
+	proto := w.LocalAddr().Network()
225
+	maxSize := 0
226
+	if proto == "tcp" {
227
+		maxSize = dns.MaxMsgSize - 1
228
+	} else if proto == "udp" {
229
+		optRR := query.IsEdns0()
230
+		if optRR != nil {
231
+			maxSize = int(optRR.UDPSize())
232
+		}
233
+		if maxSize < defaultRespSize {
234
+			maxSize = defaultRespSize
227 235
 		}
236
+	}
228 237
 
229
-		num := maxExtDNS
230
-		if len(r.extDNS) < maxExtDNS {
231
-			num = len(r.extDNS)
238
+	if resp != nil {
239
+		if resp.Len() > maxSize {
240
+			truncateResp(resp, maxSize, proto == "tcp")
232 241
 		}
233
-		for i := 0; i < num; i++ {
234
-			log.Debugf("Querying ext dns %s:%s for %s[%d]", w.LocalAddr().Network(), r.extDNS[i], name, query.Question[0].Qtype)
242
+	} else {
243
+		for i := 0; i < maxExtDNS; i++ {
244
+			extDNS := &r.extDNSList[i]
245
+			if extDNS.ipStr == "" {
246
+				break
247
+			}
248
+			log.Debugf("Querying ext dns %s:%s for %s[%d]", proto, extDNS.ipStr, name, query.Question[0].Qtype)
235 249
 
236
-			c := &dns.Client{Net: w.LocalAddr().Network()}
237
-			addr := fmt.Sprintf("%s:%d", r.extDNS[i], 53)
250
+			extConnect := func() {
251
+				addr := fmt.Sprintf("%s:%d", extDNS.ipStr, 53)
252
+				extConn, err = net.DialTimeout(proto, addr, extIOTimeout)
253
+			}
238 254
 
239
-			resp, _, err = c.Exchange(query, addr)
240
-			if err == nil {
241
-				resp.Compress = true
242
-				break
255
+			// For udp clients connection is persisted to reuse for further queries.
256
+			// Accessing extDNS.extConn be a race here between go rouines. Hence the
257
+			// connection setup is done in a Once block and fetch the extConn again
258
+			extConn = extDNS.extConn
259
+			if extConn == nil || proto == "tcp" {
260
+				if proto == "udp" {
261
+					extDNS.extOnce.Do(func() {
262
+						r.sb.execFunc(extConnect)
263
+						extDNS.extConn = extConn
264
+					})
265
+					extConn = extDNS.extConn
266
+				} else {
267
+					r.sb.execFunc(extConnect)
268
+				}
269
+				if err != nil {
270
+					log.Debugf("Connect failed, %s", err)
271
+					continue
272
+				}
273
+			}
274
+
275
+			// Timeout has to be set for every IO operation.
276
+			extConn.SetDeadline(time.Now().Add(extIOTimeout))
277
+			co := &dns.Conn{Conn: extConn}
278
+
279
+			defer func() {
280
+				if proto == "tcp" {
281
+					co.Close()
282
+				}
283
+			}()
284
+			err = co.WriteMsg(query)
285
+			if err != nil {
286
+				log.Debugf("Send to DNS server failed, %s", err)
287
+				continue
243 288
 			}
244
-			log.Errorf("external resolution failed, %s", err)
289
+
290
+			resp, err = co.ReadMsg()
291
+			if err != nil {
292
+				log.Debugf("Read from DNS server failed, %s", err)
293
+				continue
294
+			}
295
+
296
+			resp.Compress = true
297
+			break
245 298
 		}
299
+
246 300
 		if resp == nil {
247 301
 			return
248 302
 		}
... ...
@@ -10,6 +10,7 @@ import (
10 10
 
11 11
 	log "github.com/Sirupsen/logrus"
12 12
 	"github.com/docker/libnetwork/etchosts"
13
+	"github.com/docker/libnetwork/netlabel"
13 14
 	"github.com/docker/libnetwork/osl"
14 15
 	"github.com/docker/libnetwork/types"
15 16
 )
... ...
@@ -37,7 +38,7 @@ type Sandbox interface {
37 37
 	Delete() error
38 38
 	// ResolveName searches for the service name in the networks to which the sandbox
39 39
 	// is connected to.
40
-	ResolveName(name string) net.IP
40
+	ResolveName(name string) []net.IP
41 41
 	// ResolveIP returns the service name for the passed in IP. IP is in reverse dotted
42 42
 	// notation; the format used for DNS PTR records
43 43
 	ResolveIP(name string) string
... ...
@@ -118,6 +119,7 @@ type containerConfig struct {
118 118
 	useDefaultSandBox bool
119 119
 	useExternalKey    bool
120 120
 	prio              int // higher the value, more the priority
121
+	exposedPorts      []types.TransportPort
121 122
 }
122 123
 
123 124
 func (sb *sandbox) ID() string {
... ...
@@ -136,18 +138,27 @@ func (sb *sandbox) Key() string {
136 136
 }
137 137
 
138 138
 func (sb *sandbox) Labels() map[string]interface{} {
139
-	return sb.config.generic
139
+	sb.Lock()
140
+	sb.Unlock()
141
+	opts := make(map[string]interface{}, len(sb.config.generic))
142
+	for k, v := range sb.config.generic {
143
+		opts[k] = v
144
+	}
145
+	return opts
140 146
 }
141 147
 
142 148
 func (sb *sandbox) Statistics() (map[string]*types.InterfaceStatistics, error) {
143 149
 	m := make(map[string]*types.InterfaceStatistics)
144 150
 
145
-	if sb.osSbox == nil {
151
+	sb.Lock()
152
+	osb := sb.osSbox
153
+	sb.Unlock()
154
+	if osb == nil {
146 155
 		return m, nil
147 156
 	}
148 157
 
149 158
 	var err error
150
-	for _, i := range sb.osSbox.Info().Interfaces() {
159
+	for _, i := range osb.Info().Interfaces() {
151 160
 		if m[i.DstName()], err = i.Statistics(); err != nil {
152 161
 			return m, err
153 162
 		}
... ...
@@ -326,6 +337,18 @@ func (sb *sandbox) getConnectedEndpoints() []*endpoint {
326 326
 	return eps
327 327
 }
328 328
 
329
+func (sb *sandbox) removeEndpoint(ep *endpoint) {
330
+	sb.Lock()
331
+	defer sb.Unlock()
332
+
333
+	for i, e := range sb.endpoints {
334
+		if e == ep {
335
+			heap.Remove(&sb.endpoints, i)
336
+			return
337
+		}
338
+	}
339
+}
340
+
329 341
 func (sb *sandbox) getEndpoint(id string) *endpoint {
330 342
 	sb.Lock()
331 343
 	defer sb.Unlock()
... ...
@@ -391,8 +414,12 @@ func (sb *sandbox) ResolveIP(ip string) string {
391 391
 	return svc
392 392
 }
393 393
 
394
-func (sb *sandbox) ResolveName(name string) net.IP {
395
-	var ip net.IP
394
+func (sb *sandbox) execFunc(f func()) {
395
+	sb.osSbox.InvokeFunc(f)
396
+}
397
+
398
+func (sb *sandbox) ResolveName(name string) []net.IP {
399
+	var ip []net.IP
396 400
 
397 401
 	// Embedded server owns the docker network domain. Resolution should work
398 402
 	// for both container_name and container_name.network_name
... ...
@@ -440,7 +467,7 @@ func (sb *sandbox) ResolveName(name string) net.IP {
440 440
 	return nil
441 441
 }
442 442
 
443
-func (sb *sandbox) resolveName(req string, networkName string, epList []*endpoint, alias bool) net.IP {
443
+func (sb *sandbox) resolveName(req string, networkName string, epList []*endpoint, alias bool) []net.IP {
444 444
 	for _, ep := range epList {
445 445
 		name := req
446 446
 		n := ep.getNetwork()
... ...
@@ -463,7 +490,7 @@ func (sb *sandbox) resolveName(req string, networkName string, epList []*endpoin
463 463
 			}
464 464
 		} else {
465 465
 			// If it is a regular lookup and if the requested name is an alias
466
-			// dont perform a svc lookup for this endpoint.
466
+			// don't perform a svc lookup for this endpoint.
467 467
 			ep.Lock()
468 468
 			if _, ok := ep.aliases[req]; ok {
469 469
 				ep.Unlock()
... ...
@@ -481,7 +508,7 @@ func (sb *sandbox) resolveName(req string, networkName string, epList []*endpoin
481 481
 		ip, ok := sr.svcMap[name]
482 482
 		n.Unlock()
483 483
 		if ok {
484
-			return ip[0]
484
+			return ip
485 485
 		}
486 486
 	}
487 487
 	return nil
... ...
@@ -606,6 +633,9 @@ func (sb *sandbox) populateNetworkResources(ep *endpoint) error {
606 606
 		if i.addrv6 != nil && i.addrv6.IP.To16() != nil {
607 607
 			ifaceOptions = append(ifaceOptions, sb.osSbox.InterfaceOptions().AddressIPv6(i.addrv6))
608 608
 		}
609
+		if i.mac != nil {
610
+			ifaceOptions = append(ifaceOptions, sb.osSbox.InterfaceOptions().MacAddress(i.mac))
611
+		}
609 612
 
610 613
 		if err := sb.osSbox.AddInterface(i.srcName, i.dstPrefix, ifaceOptions...); err != nil {
611 614
 			return fmt.Errorf("failed to add interface %s to sandbox: %v", i.srcName, err)
... ...
@@ -621,14 +651,9 @@ func (sb *sandbox) populateNetworkResources(ep *endpoint) error {
621 621
 		}
622 622
 	}
623 623
 
624
-	for _, gwep := range sb.getConnectedEndpoints() {
625
-		if len(gwep.Gateway()) > 0 {
626
-			if gwep != ep {
627
-				break
628
-			}
629
-			if err := sb.updateGateway(gwep); err != nil {
630
-				return err
631
-			}
624
+	if ep == sb.getGatewayEndpoint() {
625
+		if err := sb.updateGateway(ep); err != nil {
626
+			return err
632 627
 		}
633 628
 	}
634 629
 
... ...
@@ -647,7 +672,7 @@ func (sb *sandbox) clearNetworkResources(origEp *endpoint) error {
647 647
 	ep := sb.getEndpoint(origEp.id)
648 648
 	if ep == nil {
649 649
 		return fmt.Errorf("could not find the sandbox endpoint data for endpoint %s",
650
-			ep.name)
650
+			origEp.id)
651 651
 	}
652 652
 
653 653
 	sb.Lock()
... ...
@@ -739,6 +764,13 @@ func (sb *sandbox) joinLeaveEnd() {
739 739
 	}
740 740
 }
741 741
 
742
+func (sb *sandbox) hasPortConfigs() bool {
743
+	opts := sb.Labels()
744
+	_, hasExpPorts := opts[netlabel.ExposedPorts]
745
+	_, hasPortMaps := opts[netlabel.PortMap]
746
+	return hasExpPorts || hasPortMaps
747
+}
748
+
742 749
 // OptionHostname function returns an option setter for hostname option to
743 750
 // be passed to NewSandbox method.
744 751
 func OptionHostname(name string) SandboxOption {
... ...
@@ -848,7 +880,42 @@ func OptionUseExternalKey() SandboxOption {
848 848
 // net container creation method. Container Labels are a good example.
849 849
 func OptionGeneric(generic map[string]interface{}) SandboxOption {
850 850
 	return func(sb *sandbox) {
851
-		sb.config.generic = generic
851
+		if sb.config.generic == nil {
852
+			sb.config.generic = make(map[string]interface{}, len(generic))
853
+		}
854
+		for k, v := range generic {
855
+			sb.config.generic[k] = v
856
+		}
857
+	}
858
+}
859
+
860
+// OptionExposedPorts function returns an option setter for the container exposed
861
+// ports option to be passed to container Create method.
862
+func OptionExposedPorts(exposedPorts []types.TransportPort) SandboxOption {
863
+	return func(sb *sandbox) {
864
+		if sb.config.generic == nil {
865
+			sb.config.generic = make(map[string]interface{})
866
+		}
867
+		// Defensive copy
868
+		eps := make([]types.TransportPort, len(exposedPorts))
869
+		copy(eps, exposedPorts)
870
+		// Store endpoint label and in generic because driver needs it
871
+		sb.config.exposedPorts = eps
872
+		sb.config.generic[netlabel.ExposedPorts] = eps
873
+	}
874
+}
875
+
876
+// OptionPortMapping function returns an option setter for the mapping
877
+// ports option to be passed to container Create method.
878
+func OptionPortMapping(portBindings []types.PortBinding) SandboxOption {
879
+	return func(sb *sandbox) {
880
+		if sb.config.generic == nil {
881
+			sb.config.generic = make(map[string]interface{})
882
+		}
883
+		// Store a copy of the bindings as generic data to pass to the driver
884
+		pbs := make([]types.PortBinding, len(portBindings))
885
+		copy(pbs, portBindings)
886
+		sb.config.generic[netlabel.PortMap] = pbs
852 887
 	}
853 888
 }
854 889
 
... ...
@@ -130,7 +130,7 @@ func (c *controller) acceptClientConnections(sock string, l net.Listener) {
130 130
 		conn, err := l.Accept()
131 131
 		if err != nil {
132 132
 			if _, err1 := os.Stat(sock); os.IsNotExist(err1) {
133
-				logrus.Debugf("Unix socket %s doesnt exist. cannot accept client connections", sock)
133
+				logrus.Debugf("Unix socket %s doesn't exist. cannot accept client connections", sock)
134 134
 				return
135 135
 			}
136 136
 			logrus.Errorf("Error accepting connection %v", err)
... ...
@@ -389,7 +389,7 @@ const (
389 389
 	// NEXTHOP indicates a StaticRoute with an IP next hop.
390 390
 	NEXTHOP = iota
391 391
 
392
-	// CONNECTED indicates a StaticRoute with a interface for directly connected peers.
392
+	// CONNECTED indicates a StaticRoute with an interface for directly connected peers.
393 393
 	CONNECTED
394 394
 )
395 395
 
... ...
@@ -458,25 +458,25 @@ type NotFoundError interface {
458 458
 	NotFound()
459 459
 }
460 460
 
461
-// ForbiddenError is an interface for errors which denote an valid request that cannot be honored
461
+// ForbiddenError is an interface for errors which denote a valid request that cannot be honored
462 462
 type ForbiddenError interface {
463 463
 	// Forbidden makes implementer into ForbiddenError type
464 464
 	Forbidden()
465 465
 }
466 466
 
467
-// NoServiceError  is an interface for errors returned when the required service is not available
467
+// NoServiceError is an interface for errors returned when the required service is not available
468 468
 type NoServiceError interface {
469 469
 	// NoService makes implementer into NoServiceError type
470 470
 	NoService()
471 471
 }
472 472
 
473
-// TimeoutError  is an interface for errors raised because of timeout
473
+// TimeoutError is an interface for errors raised because of timeout
474 474
 type TimeoutError interface {
475 475
 	// Timeout makes implementer into TimeoutError type
476 476
 	Timeout()
477 477
 }
478 478
 
479
-// NotImplementedError  is an interface for errors raised because of requested functionality is not yet implemented
479
+// NotImplementedError is an interface for errors raised because of requested functionality is not yet implemented
480 480
 type NotImplementedError interface {
481 481
 	// NotImplemented makes implementer into NotImplementedError type
482 482
 	NotImplemented()
... ...
@@ -101,6 +101,10 @@ func AddrList(link Link, family int) ([]Addr, error) {
101 101
 			continue
102 102
 		}
103 103
 
104
+		if family != FAMILY_ALL && msg.Family != uint8(family) {
105
+			continue
106
+		}
107
+
104 108
 		attrs, err := nl.ParseRouteAttr(m[msg.Len():])
105 109
 		if err != nil {
106 110
 			return nil, err
... ...
@@ -56,6 +56,7 @@ func NewHtbClass(attrs ClassAttrs, cattrs HtbClassAttrs) *HtbClass {
56 56
 	ceil := cattrs.Ceil / 8
57 57
 	buffer := cattrs.Buffer
58 58
 	cbuffer := cattrs.Cbuffer
59
+
59 60
 	if ceil == 0 {
60 61
 		ceil = rate
61 62
 	}
... ...
@@ -1,6 +1,7 @@
1 1
 package netlink
2 2
 
3 3
 import (
4
+	"errors"
4 5
 	"syscall"
5 6
 
6 7
 	"github.com/vishvananda/netlink/nl"
... ...
@@ -65,15 +66,32 @@ func classPayload(req *nl.NetlinkRequest, class Class) error {
65 65
 	options := nl.NewRtAttr(nl.TCA_OPTIONS, nil)
66 66
 	if htb, ok := class.(*HtbClass); ok {
67 67
 		opt := nl.TcHtbCopt{}
68
-		opt.Rate.Rate = uint32(htb.Rate)
69
-		opt.Ceil.Rate = uint32(htb.Ceil)
70 68
 		opt.Buffer = htb.Buffer
71 69
 		opt.Cbuffer = htb.Cbuffer
72 70
 		opt.Quantum = htb.Quantum
73 71
 		opt.Level = htb.Level
74 72
 		opt.Prio = htb.Prio
75 73
 		// TODO: Handle Debug properly. For now default to 0
74
+		/* Calculate {R,C}Tab and set Rate and Ceil */
75
+		cell_log := -1
76
+		ccell_log := -1
77
+		linklayer := nl.LINKLAYER_ETHERNET
78
+		mtu := 1600
79
+		var rtab [256]uint32
80
+		var ctab [256]uint32
81
+		tcrate := nl.TcRateSpec{Rate: uint32(htb.Rate)}
82
+		if CalcRtable(&tcrate, rtab, cell_log, uint32(mtu), linklayer) < 0 {
83
+			return errors.New("HTB: failed to calculate rate table.")
84
+		}
85
+		opt.Rate = tcrate
86
+		tcceil := nl.TcRateSpec{Rate: uint32(htb.Ceil)}
87
+		if CalcRtable(&tcceil, ctab, ccell_log, uint32(mtu), linklayer) < 0 {
88
+			return errors.New("HTB: failed to calculate ceil rate table.")
89
+		}
90
+		opt.Ceil = tcceil
76 91
 		nl.NewRtAttrChild(options, nl.TCA_HTB_PARMS, opt.Serialize())
92
+		nl.NewRtAttrChild(options, nl.TCA_HTB_RTAB, SerializeRtab(rtab))
93
+		nl.NewRtAttrChild(options, nl.TCA_HTB_CTAB, SerializeRtab(ctab))
77 94
 	}
78 95
 	req.AddData(options)
79 96
 	return nil
... ...
@@ -204,6 +204,7 @@ type Vxlan struct {
204 204
 	RSC          bool
205 205
 	L2miss       bool
206 206
 	L3miss       bool
207
+	UDPCSum      bool
207 208
 	NoAge        bool
208 209
 	GBP          bool
209 210
 	Age          int
... ...
@@ -142,6 +142,54 @@ func LinkSetHardwareAddr(link Link, hwaddr net.HardwareAddr) error {
142 142
 	return err
143 143
 }
144 144
 
145
+// LinkSetVfHardwareAddr sets the hardware address of a vf for the link.
146
+// Equivalent to: `ip link set $link vf $vf mac $hwaddr`
147
+func LinkSetVfHardwareAddr(link Link, vf int, hwaddr net.HardwareAddr) error {
148
+	base := link.Attrs()
149
+	ensureIndex(base)
150
+	req := nl.NewNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK)
151
+
152
+	msg := nl.NewIfInfomsg(syscall.AF_UNSPEC)
153
+	msg.Index = int32(base.Index)
154
+	req.AddData(msg)
155
+
156
+	data := nl.NewRtAttr(nl.IFLA_VFINFO_LIST, nil)
157
+	info := nl.NewRtAttrChild(data, nl.IFLA_VF_INFO, nil)
158
+	vfmsg := nl.VfMac{
159
+		Vf: uint32(vf),
160
+	}
161
+	copy(vfmsg.Mac[:], []byte(hwaddr))
162
+	nl.NewRtAttrChild(info, nl.IFLA_VF_MAC, vfmsg.Serialize())
163
+	req.AddData(data)
164
+
165
+	_, err := req.Execute(syscall.NETLINK_ROUTE, 0)
166
+	return err
167
+}
168
+
169
+// LinkSetVfVlan sets the vlan of a vf for the link.
170
+// Equivalent to: `ip link set $link vf $vf vlan $vlan`
171
+func LinkSetVfVlan(link Link, vf, vlan int) error {
172
+	base := link.Attrs()
173
+	ensureIndex(base)
174
+	req := nl.NewNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK)
175
+
176
+	msg := nl.NewIfInfomsg(syscall.AF_UNSPEC)
177
+	msg.Index = int32(base.Index)
178
+	req.AddData(msg)
179
+
180
+	data := nl.NewRtAttr(nl.IFLA_VFINFO_LIST, nil)
181
+	info := nl.NewRtAttrChild(data, nl.IFLA_VF_INFO, nil)
182
+	vfmsg := nl.VfVlan{
183
+		Vf:   uint32(vf),
184
+		Vlan: uint32(vlan),
185
+	}
186
+	nl.NewRtAttrChild(info, nl.IFLA_VF_VLAN, vfmsg.Serialize())
187
+	req.AddData(data)
188
+
189
+	_, err := req.Execute(syscall.NETLINK_ROUTE, 0)
190
+	return err
191
+}
192
+
145 193
 // LinkSetMaster sets the master of the link device.
146 194
 // Equivalent to: `ip link set $link master $master`
147 195
 func LinkSetMaster(link Link, master *Bridge) error {
... ...
@@ -277,10 +325,12 @@ func addVxlanAttrs(vxlan *Vxlan, linkInfo *nl.RtAttr) {
277 277
 	nl.NewRtAttrChild(data, nl.IFLA_VXLAN_L2MISS, boolAttr(vxlan.L2miss))
278 278
 	nl.NewRtAttrChild(data, nl.IFLA_VXLAN_L3MISS, boolAttr(vxlan.L3miss))
279 279
 
280
+	if vxlan.UDPCSum {
281
+		nl.NewRtAttrChild(data, nl.IFLA_VXLAN_UDP_CSUM, boolAttr(vxlan.UDPCSum))
282
+	}
280 283
 	if vxlan.GBP {
281 284
 		nl.NewRtAttrChild(data, nl.IFLA_VXLAN_GBP, boolAttr(vxlan.GBP))
282 285
 	}
283
-
284 286
 	if vxlan.NoAge {
285 287
 		nl.NewRtAttrChild(data, nl.IFLA_VXLAN_AGEING, nl.Uint32Attr(0))
286 288
 	} else if vxlan.Age > 0 {
... ...
@@ -815,6 +865,7 @@ func LinkList() ([]Link, error) {
815 815
 // LinkUpdate is used to pass information back from LinkSubscribe()
816 816
 type LinkUpdate struct {
817 817
 	nl.IfInfomsg
818
+	Header syscall.NlMsghdr
818 819
 	Link
819 820
 }
820 821
 
... ...
@@ -844,7 +895,7 @@ func LinkSubscribe(ch chan<- LinkUpdate, done <-chan struct{}) error {
844 844
 				if err != nil {
845 845
 					return
846 846
 				}
847
-				ch <- LinkUpdate{IfInfomsg: *ifmsg, Link: link}
847
+				ch <- LinkUpdate{IfInfomsg: *ifmsg, Header: m.Header, Link: link}
848 848
 			}
849 849
 		}
850 850
 	}()
... ...
@@ -935,6 +986,8 @@ func parseVxlanData(link Link, data []syscall.NetlinkRouteAttr) {
935 935
 			vxlan.L2miss = int8(datum.Value[0]) != 0
936 936
 		case nl.IFLA_VXLAN_L3MISS:
937 937
 			vxlan.L3miss = int8(datum.Value[0]) != 0
938
+		case nl.IFLA_VXLAN_UDP_CSUM:
939
+			vxlan.UDPCSum = int8(datum.Value[0]) != 0
938 940
 		case nl.IFLA_VXLAN_GBP:
939 941
 			vxlan.GBP = int8(datum.Value[0]) != 0
940 942
 		case nl.IFLA_VXLAN_AGEING:
... ...
@@ -1,7 +1,13 @@
1 1
 package nl
2 2
 
3
+import (
4
+	"unsafe"
5
+)
6
+
3 7
 const (
4 8
 	DEFAULT_CHANGE = 0xFFFFFFFF
9
+	// doesn't exist in syscall
10
+	IFLA_VFINFO_LIST = 0x16
5 11
 )
6 12
 
7 13
 const (
... ...
@@ -182,3 +188,209 @@ const (
182 182
 	GRE_FLAGS   = 0x00F8
183 183
 	GRE_VERSION = 0x0007
184 184
 )
185
+
186
+const (
187
+	IFLA_VF_INFO_UNSPEC = iota
188
+	IFLA_VF_INFO
189
+	IFLA_VF_INFO_MAX = IFLA_VF_INFO
190
+)
191
+
192
+const (
193
+	IFLA_VF_UNSPEC = iota
194
+	IFLA_VF_MAC    /* Hardware queue specific attributes */
195
+	IFLA_VF_VLAN
196
+	IFLA_VF_TX_RATE      /* Max TX Bandwidth Allocation */
197
+	IFLA_VF_SPOOFCHK     /* Spoof Checking on/off switch */
198
+	IFLA_VF_LINK_STATE   /* link state enable/disable/auto switch */
199
+	IFLA_VF_RATE         /* Min and Max TX Bandwidth Allocation */
200
+	IFLA_VF_RSS_QUERY_EN /* RSS Redirection Table and Hash Key query
201
+	 * on/off switch
202
+	 */
203
+	IFLA_VF_STATS /* network device statistics */
204
+	IFLA_VF_MAX   = IFLA_VF_STATS
205
+)
206
+
207
+const (
208
+	IFLA_VF_LINK_STATE_AUTO    = iota /* link state of the uplink */
209
+	IFLA_VF_LINK_STATE_ENABLE         /* link always up */
210
+	IFLA_VF_LINK_STATE_DISABLE        /* link always down */
211
+	IFLA_VF_LINK_STATE_MAX     = IFLA_VF_LINK_STATE_DISABLE
212
+)
213
+
214
+const (
215
+	IFLA_VF_STATS_RX_PACKETS = iota
216
+	IFLA_VF_STATS_TX_PACKETS
217
+	IFLA_VF_STATS_RX_BYTES
218
+	IFLA_VF_STATS_TX_BYTES
219
+	IFLA_VF_STATS_BROADCAST
220
+	IFLA_VF_STATS_MULTICAST
221
+	IFLA_VF_STATS_MAX = IFLA_VF_STATS_MULTICAST
222
+)
223
+
224
+const (
225
+	SizeofVfMac        = 0x24
226
+	SizeofVfVlan       = 0x0c
227
+	SizeofVfTxRate     = 0x08
228
+	SizeofVfRate       = 0x0c
229
+	SizeofVfSpoofchk   = 0x08
230
+	SizeofVfLinkState  = 0x08
231
+	SizeofVfRssQueryEn = 0x08
232
+)
233
+
234
+// struct ifla_vf_mac {
235
+//   __u32 vf;
236
+//   __u8 mac[32]; /* MAX_ADDR_LEN */
237
+// };
238
+
239
+type VfMac struct {
240
+	Vf  uint32
241
+	Mac [32]byte
242
+}
243
+
244
+func (msg *VfMac) Len() int {
245
+	return SizeofVfMac
246
+}
247
+
248
+func DeserializeVfMac(b []byte) *VfMac {
249
+	return (*VfMac)(unsafe.Pointer(&b[0:SizeofVfMac][0]))
250
+}
251
+
252
+func (msg *VfMac) Serialize() []byte {
253
+	return (*(*[SizeofVfMac]byte)(unsafe.Pointer(msg)))[:]
254
+}
255
+
256
+// struct ifla_vf_vlan {
257
+//   __u32 vf;
258
+//   __u32 vlan; /* 0 - 4095, 0 disables VLAN filter */
259
+//   __u32 qos;
260
+// };
261
+
262
+type VfVlan struct {
263
+	Vf   uint32
264
+	Vlan uint32
265
+	Qos  uint32
266
+}
267
+
268
+func (msg *VfVlan) Len() int {
269
+	return SizeofVfVlan
270
+}
271
+
272
+func DeserializeVfVlan(b []byte) *VfVlan {
273
+	return (*VfVlan)(unsafe.Pointer(&b[0:SizeofVfVlan][0]))
274
+}
275
+
276
+func (msg *VfVlan) Serialize() []byte {
277
+	return (*(*[SizeofVfVlan]byte)(unsafe.Pointer(msg)))[:]
278
+}
279
+
280
+// struct ifla_vf_tx_rate {
281
+//   __u32 vf;
282
+//   __u32 rate; /* Max TX bandwidth in Mbps, 0 disables throttling */
283
+// };
284
+
285
+type VfTxRate struct {
286
+	Vf   uint32
287
+	Rate uint32
288
+}
289
+
290
+func (msg *VfTxRate) Len() int {
291
+	return SizeofVfTxRate
292
+}
293
+
294
+func DeserializeVfTxRate(b []byte) *VfTxRate {
295
+	return (*VfTxRate)(unsafe.Pointer(&b[0:SizeofVfTxRate][0]))
296
+}
297
+
298
+func (msg *VfTxRate) Serialize() []byte {
299
+	return (*(*[SizeofVfTxRate]byte)(unsafe.Pointer(msg)))[:]
300
+}
301
+
302
+// struct ifla_vf_rate {
303
+//   __u32 vf;
304
+//   __u32 min_tx_rate; /* Min Bandwidth in Mbps */
305
+//   __u32 max_tx_rate; /* Max Bandwidth in Mbps */
306
+// };
307
+
308
+type VfRate struct {
309
+	Vf        uint32
310
+	MinTxRate uint32
311
+	MaxTxRate uint32
312
+}
313
+
314
+func (msg *VfRate) Len() int {
315
+	return SizeofVfRate
316
+}
317
+
318
+func DeserializeVfRate(b []byte) *VfRate {
319
+	return (*VfRate)(unsafe.Pointer(&b[0:SizeofVfRate][0]))
320
+}
321
+
322
+func (msg *VfRate) Serialize() []byte {
323
+	return (*(*[SizeofVfRate]byte)(unsafe.Pointer(msg)))[:]
324
+}
325
+
326
+// struct ifla_vf_spoofchk {
327
+//   __u32 vf;
328
+//   __u32 setting;
329
+// };
330
+
331
+type VfSpoofchk struct {
332
+	Vf      uint32
333
+	Setting uint32
334
+}
335
+
336
+func (msg *VfSpoofchk) Len() int {
337
+	return SizeofVfSpoofchk
338
+}
339
+
340
+func DeserializeVfSpoofchk(b []byte) *VfSpoofchk {
341
+	return (*VfSpoofchk)(unsafe.Pointer(&b[0:SizeofVfSpoofchk][0]))
342
+}
343
+
344
+func (msg *VfSpoofchk) Serialize() []byte {
345
+	return (*(*[SizeofVfSpoofchk]byte)(unsafe.Pointer(msg)))[:]
346
+}
347
+
348
+// struct ifla_vf_link_state {
349
+//   __u32 vf;
350
+//   __u32 link_state;
351
+// };
352
+
353
+type VfLinkState struct {
354
+	Vf        uint32
355
+	LinkState uint32
356
+}
357
+
358
+func (msg *VfLinkState) Len() int {
359
+	return SizeofVfLinkState
360
+}
361
+
362
+func DeserializeVfLinkState(b []byte) *VfLinkState {
363
+	return (*VfLinkState)(unsafe.Pointer(&b[0:SizeofVfLinkState][0]))
364
+}
365
+
366
+func (msg *VfLinkState) Serialize() []byte {
367
+	return (*(*[SizeofVfLinkState]byte)(unsafe.Pointer(msg)))[:]
368
+}
369
+
370
+// struct ifla_vf_rss_query_en {
371
+//   __u32 vf;
372
+//   __u32 setting;
373
+// };
374
+
375
+type VfRssQueryEn struct {
376
+	Vf      uint32
377
+	Setting uint32
378
+}
379
+
380
+func (msg *VfRssQueryEn) Len() int {
381
+	return SizeofVfRssQueryEn
382
+}
383
+
384
+func DeserializeVfRssQueryEn(b []byte) *VfRssQueryEn {
385
+	return (*VfRssQueryEn)(unsafe.Pointer(&b[0:SizeofVfRssQueryEn][0]))
386
+}
387
+
388
+func (msg *VfRssQueryEn) Serialize() []byte {
389
+	return (*(*[SizeofVfRssQueryEn]byte)(unsafe.Pointer(msg)))[:]
390
+}
... ...
@@ -110,9 +110,6 @@ func XfrmStateDel(state *XfrmState) error {
110 110
 func XfrmStateList(family int) ([]XfrmState, error) {
111 111
 	req := nl.NewNetlinkRequest(nl.XFRM_MSG_GETSA, syscall.NLM_F_DUMP)
112 112
 
113
-	msg := nl.NewIfInfomsg(family)
114
-	req.AddData(msg)
115
-
116 113
 	msgs, err := req.Execute(syscall.NETLINK_XFRM, nl.XFRM_MSG_NEWSA)
117 114
 	if err != nil {
118 115
 		return nil, err