Browse code

Bump libnetwork to 3ac297bc

Bump libnetwork to 3ac297bc7fd0afec9051bbb47024c9bc1d75bf5b in order to
get fix 0c3d9f00 which addresses a flaw that the scalable load balancing
code revealed. Attempting to print sandbox IDs where the sandbox name
was too short results in a goroutine panic. This can occur with
sandboxes with names of 1 or 2 characters in the previous code. But due
to naming updates in the scalable load balancing code, it could now
occur for networks whose name was 3 characters and at least one of the
integration tests employed such networks (named 'foo', 'bar' and 'baz').

This update also brings in several changes as well:
* 6c7c6017 - Fix error handling about bridgeSetup
* 5ed38221 - Optimize networkDB queue
* cfa9afdb - ndots: produce error on negative numbers
* 5586e226 - improve error message for invalid ndots number
* 449672e5 - Allows to set generic knobs on the Sandbox
* 6b4c4af7 - do not ignore user-provided "ndots:0" option
* 843a0e42 - Adjust corner case for reconnect logic

Signed-off-by: Chris Telfer <ctelfer@docker.com>

Chris Telfer authored on 2018/07/06 11:54:25
Showing 42 changed files
... ...
@@ -3,7 +3,7 @@
3 3
 # LIBNETWORK_COMMIT is used to build the docker-userland-proxy binary. When
4 4
 # updating the binary version, consider updating github.com/docker/libnetwork
5 5
 # in vendor.conf accordingly
6
-LIBNETWORK_COMMIT=b0186632522c68f4e1222c4f6d7dbe518882024f
6
+LIBNETWORK_COMMIT=3ac297bc7fd0afec9051bbb47024c9bc1d75bf5b
7 7
 
8 8
 install_proxy() {
9 9
 	case "$1" in
... ...
@@ -37,7 +37,7 @@ github.com/mitchellh/hashstructure 2bca23e0e452137f789efbc8610126fd8b94f73b
37 37
 #get libnetwork packages
38 38
 
39 39
 # When updating, also update LIBNETWORK_COMMIT in hack/dockerfile/install/proxy accordingly
40
-github.com/docker/libnetwork b0186632522c68f4e1222c4f6d7dbe518882024f
40
+github.com/docker/libnetwork 3ac297bc7fd0afec9051bbb47024c9bc1d75bf5b
41 41
 github.com/docker/go-events 9461782956ad83b30282bf90e31fa6a70c255ba9
42 42
 github.com/armon/go-radix e39d623f12e8e41c7b5529e9a9dd67a1e2261f80
43 43
 github.com/armon/go-metrics eb0af217e5e9747e41dd5303755356b62d28e3ec
... ...
@@ -1144,6 +1144,11 @@ func (c *controller) NewSandbox(containerID string, options ...SandboxOption) (S
1144 1144
 		}
1145 1145
 	}
1146 1146
 
1147
+	if sb.osSbox != nil {
1148
+		// Apply operating specific knobs on the load balancer sandbox
1149
+		sb.osSbox.ApplyOSTweaks(sb.oslTypes)
1150
+	}
1151
+
1147 1152
 	c.Lock()
1148 1153
 	c.sandboxes[sb.id] = sb
1149 1154
 	c.Unlock()
... ...
@@ -120,3 +120,13 @@ type TablePeersResult struct {
120 120
 	TableObj
121 121
 	Elements []PeerEntryObj `json:"entries"`
122 122
 }
123
+
124
+// NetworkStatsResult network db stats related to entries and queue len for a network
125
+type NetworkStatsResult struct {
126
+	Entries  int `json:"entries"`
127
+	QueueLen int `jsoin:"qlen"`
128
+}
129
+
130
+func (n *NetworkStatsResult) String() string {
131
+	return fmt.Sprintf("entries: %d, qlen: %d\n", n.Entries, n.QueueLen)
132
+}
... ...
@@ -614,9 +614,7 @@ func (d *driver) checkConflict(config *networkConfiguration) error {
614 614
 	return nil
615 615
 }
616 616
 
617
-func (d *driver) createNetwork(config *networkConfiguration) error {
618
-	var err error
619
-
617
+func (d *driver) createNetwork(config *networkConfiguration) (err error) {
620 618
 	defer osl.InitOSContext()()
621 619
 
622 620
 	networkList := d.getNetworks()
... ...
@@ -775,7 +773,7 @@ func (d *driver) deleteNetwork(nid string) error {
775 775
 		}
776 776
 
777 777
 		if err := d.storeDelete(ep); err != nil {
778
-			logrus.Warnf("Failed to remove bridge endpoint %s from store: %v", ep.id[0:7], err)
778
+			logrus.Warnf("Failed to remove bridge endpoint %.7s from store: %v", ep.id, err)
779 779
 		}
780 780
 	}
781 781
 
... ...
@@ -1050,7 +1048,7 @@ func (d *driver) CreateEndpoint(nid, eid string, ifInfo driverapi.InterfaceInfo,
1050 1050
 	}
1051 1051
 
1052 1052
 	if err = d.storeUpdate(endpoint); err != nil {
1053
-		return fmt.Errorf("failed to save bridge endpoint %s to store: %v", endpoint.id[0:7], err)
1053
+		return fmt.Errorf("failed to save bridge endpoint %.7s to store: %v", endpoint.id, err)
1054 1054
 	}
1055 1055
 
1056 1056
 	return nil
... ...
@@ -1116,7 +1114,7 @@ func (d *driver) DeleteEndpoint(nid, eid string) error {
1116 1116
 	}
1117 1117
 
1118 1118
 	if err := d.storeDelete(ep); err != nil {
1119
-		logrus.Warnf("Failed to remove bridge endpoint %s from store: %v", ep.id[0:7], err)
1119
+		logrus.Warnf("Failed to remove bridge endpoint %.7s from store: %v", ep.id, err)
1120 1120
 	}
1121 1121
 
1122 1122
 	return nil
... ...
@@ -1290,7 +1288,7 @@ func (d *driver) ProgramExternalConnectivity(nid, eid string, options map[string
1290 1290
 	}()
1291 1291
 
1292 1292
 	if err = d.storeUpdate(endpoint); err != nil {
1293
-		return fmt.Errorf("failed to update bridge endpoint %s to store: %v", endpoint.id[0:7], err)
1293
+		return fmt.Errorf("failed to update bridge endpoint %.7s to store: %v", endpoint.id, err)
1294 1294
 	}
1295 1295
 
1296 1296
 	if !network.config.EnableICC {
... ...
@@ -1332,7 +1330,7 @@ func (d *driver) RevokeExternalConnectivity(nid, eid string) error {
1332 1332
 	clearEndpointConnections(d.nlh, endpoint)
1333 1333
 
1334 1334
 	if err = d.storeUpdate(endpoint); err != nil {
1335
-		return fmt.Errorf("failed to update bridge endpoint %s to store: %v", endpoint.id[0:7], err)
1335
+		return fmt.Errorf("failed to update bridge endpoint %.7s to store: %v", endpoint.id, err)
1336 1336
 	}
1337 1337
 
1338 1338
 	return nil
... ...
@@ -62,7 +62,7 @@ func (d *driver) populateNetworks() error {
62 62
 		if err = d.createNetwork(ncfg); err != nil {
63 63
 			logrus.Warnf("could not create bridge network for id %s bridge name %s while booting up from persistent state: %v", ncfg.ID, ncfg.BridgeName, err)
64 64
 		}
65
-		logrus.Debugf("Network (%s) restored", ncfg.ID[0:7])
65
+		logrus.Debugf("Network (%.7s) restored", ncfg.ID)
66 66
 	}
67 67
 
68 68
 	return nil
... ...
@@ -82,16 +82,16 @@ func (d *driver) populateEndpoints() error {
82 82
 		ep := kvo.(*bridgeEndpoint)
83 83
 		n, ok := d.networks[ep.nid]
84 84
 		if !ok {
85
-			logrus.Debugf("Network (%s) not found for restored bridge endpoint (%s)", ep.nid[0:7], ep.id[0:7])
86
-			logrus.Debugf("Deleting stale bridge endpoint (%s) from store", ep.id[0:7])
85
+			logrus.Debugf("Network (%.7s) not found for restored bridge endpoint (%.7s)", ep.nid, ep.id)
86
+			logrus.Debugf("Deleting stale bridge endpoint (%.7s) from store", ep.id)
87 87
 			if err := d.storeDelete(ep); err != nil {
88
-				logrus.Debugf("Failed to delete stale bridge endpoint (%s) from store", ep.id[0:7])
88
+				logrus.Debugf("Failed to delete stale bridge endpoint (%.7s) from store", ep.id)
89 89
 			}
90 90
 			continue
91 91
 		}
92 92
 		n.endpoints[ep.id] = ep
93 93
 		n.restorePortAllocations(ep)
94
-		logrus.Debugf("Endpoint (%s) restored to network (%s)", ep.id[0:7], ep.nid[0:7])
94
+		logrus.Debugf("Endpoint (%.7s) restored to network (%.7s)", ep.id, ep.nid)
95 95
 	}
96 96
 
97 97
 	return nil
... ...
@@ -382,7 +382,7 @@ func (n *bridgeNetwork) restorePortAllocations(ep *bridgeEndpoint) {
382 382
 	ep.extConnConfig.PortBindings = ep.portMapping
383 383
 	_, err := n.allocatePorts(ep, n.config.DefaultBindingIP, n.driver.config.EnableUserlandProxy)
384 384
 	if err != nil {
385
-		logrus.Warnf("Failed to reserve existing port mapping for endpoint %s:%v", ep.id[0:7], err)
385
+		logrus.Warnf("Failed to reserve existing port mapping for endpoint %.7s:%v", ep.id, err)
386 386
 	}
387 387
 	ep.extConnConfig.PortBindings = tmp
388 388
 }
... ...
@@ -53,7 +53,7 @@ func (d *driver) CreateEndpoint(nid, eid string, ifInfo driverapi.InterfaceInfo,
53 53
 	}
54 54
 
55 55
 	if err := d.storeUpdate(ep); err != nil {
56
-		return fmt.Errorf("failed to save ipvlan endpoint %s to store: %v", ep.id[0:7], err)
56
+		return fmt.Errorf("failed to save ipvlan endpoint %.7s to store: %v", ep.id, err)
57 57
 	}
58 58
 
59 59
 	n.addEndpoint(ep)
... ...
@@ -82,7 +82,7 @@ func (d *driver) DeleteEndpoint(nid, eid string) error {
82 82
 	}
83 83
 
84 84
 	if err := d.storeDelete(ep); err != nil {
85
-		logrus.Warnf("Failed to remove ipvlan endpoint %s from store: %v", ep.id[0:7], err)
85
+		logrus.Warnf("Failed to remove ipvlan endpoint %.7s from store: %v", ep.id, err)
86 86
 	}
87 87
 	n.deleteEndpoint(ep.id)
88 88
 	return nil
... ...
@@ -117,7 +117,7 @@ func (d *driver) Join(nid, eid string, sboxKey string, jinfo driverapi.JoinInfo,
117 117
 		return err
118 118
 	}
119 119
 	if err = d.storeUpdate(ep); err != nil {
120
-		return fmt.Errorf("failed to save ipvlan endpoint %s to store: %v", ep.id[0:7], err)
120
+		return fmt.Errorf("failed to save ipvlan endpoint %.7s to store: %v", ep.id, err)
121 121
 	}
122 122
 
123 123
 	return nil
... ...
@@ -156,7 +156,7 @@ func (d *driver) DeleteNetwork(nid string) error {
156 156
 		}
157 157
 
158 158
 		if err := d.storeDelete(ep); err != nil {
159
-			logrus.Warnf("Failed to remove ipvlan endpoint %s from store: %v", ep.id[0:7], err)
159
+			logrus.Warnf("Failed to remove ipvlan endpoint %.7s from store: %v", ep.id, err)
160 160
 		}
161 161
 	}
162 162
 	// delete the *network
... ...
@@ -95,15 +95,15 @@ func (d *driver) populateEndpoints() error {
95 95
 		ep := kvo.(*endpoint)
96 96
 		n, ok := d.networks[ep.nid]
97 97
 		if !ok {
98
-			logrus.Debugf("Network (%s) not found for restored ipvlan endpoint (%s)", ep.nid[0:7], ep.id[0:7])
99
-			logrus.Debugf("Deleting stale ipvlan endpoint (%s) from store", ep.id[0:7])
98
+			logrus.Debugf("Network (%.7s) not found for restored ipvlan endpoint (%.7s)", ep.nid, ep.id)
99
+			logrus.Debugf("Deleting stale ipvlan endpoint (%.7s) from store", ep.id)
100 100
 			if err := d.storeDelete(ep); err != nil {
101
-				logrus.Debugf("Failed to delete stale ipvlan endpoint (%s) from store", ep.id[0:7])
101
+				logrus.Debugf("Failed to delete stale ipvlan endpoint (%.7s) from store", ep.id)
102 102
 			}
103 103
 			continue
104 104
 		}
105 105
 		n.endpoints[ep.id] = ep
106
-		logrus.Debugf("Endpoint (%s) restored to network (%s)", ep.id[0:7], ep.nid[0:7])
106
+		logrus.Debugf("Endpoint (%.7s) restored to network (%.7s)", ep.id, ep.nid)
107 107
 	}
108 108
 
109 109
 	return nil
... ...
@@ -58,7 +58,7 @@ func (d *driver) CreateEndpoint(nid, eid string, ifInfo driverapi.InterfaceInfo,
58 58
 	}
59 59
 
60 60
 	if err := d.storeUpdate(ep); err != nil {
61
-		return fmt.Errorf("failed to save macvlan endpoint %s to store: %v", ep.id[0:7], err)
61
+		return fmt.Errorf("failed to save macvlan endpoint %.7s to store: %v", ep.id, err)
62 62
 	}
63 63
 
64 64
 	n.addEndpoint(ep)
... ...
@@ -87,7 +87,7 @@ func (d *driver) DeleteEndpoint(nid, eid string) error {
87 87
 	}
88 88
 
89 89
 	if err := d.storeDelete(ep); err != nil {
90
-		logrus.Warnf("Failed to remove macvlan endpoint %s from store: %v", ep.id[0:7], err)
90
+		logrus.Warnf("Failed to remove macvlan endpoint %.7s from store: %v", ep.id, err)
91 91
 	}
92 92
 
93 93
 	n.deleteEndpoint(ep.id)
... ...
@@ -78,7 +78,7 @@ func (d *driver) Join(nid, eid string, sboxKey string, jinfo driverapi.JoinInfo,
78 78
 		return err
79 79
 	}
80 80
 	if err := d.storeUpdate(ep); err != nil {
81
-		return fmt.Errorf("failed to save macvlan endpoint %s to store: %v", ep.id[0:7], err)
81
+		return fmt.Errorf("failed to save macvlan endpoint %.7s to store: %v", ep.id, err)
82 82
 	}
83 83
 	return nil
84 84
 }
... ...
@@ -160,7 +160,7 @@ func (d *driver) DeleteNetwork(nid string) error {
160 160
 		}
161 161
 
162 162
 		if err := d.storeDelete(ep); err != nil {
163
-			logrus.Warnf("Failed to remove macvlan endpoint %s from store: %v", ep.id[0:7], err)
163
+			logrus.Warnf("Failed to remove macvlan endpoint %.7s from store: %v", ep.id, err)
164 164
 		}
165 165
 	}
166 166
 	// delete the *network
... ...
@@ -95,15 +95,15 @@ func (d *driver) populateEndpoints() error {
95 95
 		ep := kvo.(*endpoint)
96 96
 		n, ok := d.networks[ep.nid]
97 97
 		if !ok {
98
-			logrus.Debugf("Network (%s) not found for restored macvlan endpoint (%s)", ep.nid[0:7], ep.id[0:7])
99
-			logrus.Debugf("Deleting stale macvlan endpoint (%s) from store", ep.id[0:7])
98
+			logrus.Debugf("Network (%.7s) not found for restored macvlan endpoint (%.7s)", ep.nid, ep.id)
99
+			logrus.Debugf("Deleting stale macvlan endpoint (%.7s) from store", ep.id)
100 100
 			if err := d.storeDelete(ep); err != nil {
101
-				logrus.Debugf("Failed to delete stale macvlan endpoint (%s) from store", ep.id[0:7])
101
+				logrus.Debugf("Failed to delete stale macvlan endpoint (%.7s) from store", ep.id)
102 102
 			}
103 103
 			continue
104 104
 		}
105 105
 		n.endpoints[ep.id] = ep
106
-		logrus.Debugf("Endpoint (%s) restored to network (%s)", ep.id[0:7], ep.nid[0:7])
106
+		logrus.Debugf("Endpoint (%.7s) restored to network (%.7s)", ep.id, ep.nid)
107 107
 	}
108 108
 
109 109
 	return nil
... ...
@@ -78,7 +78,7 @@ func (e *encrMap) String() string {
78 78
 }
79 79
 
80 80
 func (d *driver) checkEncryption(nid string, rIP net.IP, vxlanID uint32, isLocal, add bool) error {
81
-	logrus.Debugf("checkEncryption(%s, %v, %d, %t)", nid[0:7], rIP, vxlanID, isLocal)
81
+	logrus.Debugf("checkEncryption(%.7s, %v, %d, %t)", nid, rIP, vxlanID, isLocal)
82 82
 
83 83
 	n := d.network(nid)
84 84
 	if n == nil || !n.secure {
... ...
@@ -101,7 +101,7 @@ func (d *driver) checkEncryption(nid string, rIP net.IP, vxlanID uint32, isLocal
101 101
 			}
102 102
 			return false
103 103
 		}); err != nil {
104
-			logrus.Warnf("Failed to retrieve list of participating nodes in overlay network %s: %v", nid[0:5], err)
104
+			logrus.Warnf("Failed to retrieve list of participating nodes in overlay network %.5s: %v", nid, err)
105 105
 		}
106 106
 	default:
107 107
 		if len(d.network(nid).endpoints) > 0 {
... ...
@@ -69,7 +69,7 @@ func (d *driver) Join(nid, eid string, sboxKey string, jinfo driverapi.JoinInfo,
69 69
 	ep.ifName = containerIfName
70 70
 
71 71
 	if err = d.writeEndpointToStore(ep); err != nil {
72
-		return fmt.Errorf("failed to update overlay endpoint %s to local data store: %v", ep.id[0:7], err)
72
+		return fmt.Errorf("failed to update overlay endpoint %.7s to local data store: %v", ep.id, err)
73 73
 	}
74 74
 
75 75
 	// Set the container interface and its peer MTU to 1450 to allow
... ...
@@ -1,72 +1,23 @@
1 1
 package overlay
2 2
 
3 3
 import (
4
-	"io/ioutil"
5
-	"path"
6 4
 	"strconv"
7
-	"strings"
8 5
 
9
-	"github.com/sirupsen/logrus"
6
+	"github.com/docker/libnetwork/osl/kernel"
10 7
 )
11 8
 
12
-type conditionalCheck func(val1, val2 string) bool
13
-
14
-type osValue struct {
15
-	value   string
16
-	checkFn conditionalCheck
17
-}
18
-
19
-var osConfig = map[string]osValue{
9
+var ovConfig = map[string]*kernel.OSValue{
20 10
 	"net.ipv4.neigh.default.gc_thresh1": {"8192", checkHigher},
21 11
 	"net.ipv4.neigh.default.gc_thresh2": {"49152", checkHigher},
22 12
 	"net.ipv4.neigh.default.gc_thresh3": {"65536", checkHigher},
23 13
 }
24 14
 
25
-func propertyIsValid(val1, val2 string, check conditionalCheck) bool {
26
-	if check == nil || check(val1, val2) {
27
-		return true
28
-	}
29
-	return false
30
-}
31
-
32 15
 func checkHigher(val1, val2 string) bool {
33 16
 	val1Int, _ := strconv.ParseInt(val1, 10, 32)
34 17
 	val2Int, _ := strconv.ParseInt(val2, 10, 32)
35 18
 	return val1Int < val2Int
36 19
 }
37 20
 
38
-// writeSystemProperty writes the value to a path under /proc/sys as determined from the key.
39
-// For e.g. net.ipv4.ip_forward translated to /proc/sys/net/ipv4/ip_forward.
40
-func writeSystemProperty(key, value string) error {
41
-	keyPath := strings.Replace(key, ".", "/", -1)
42
-	return ioutil.WriteFile(path.Join("/proc/sys", keyPath), []byte(value), 0644)
43
-}
44
-
45
-func readSystemProperty(key string) (string, error) {
46
-	keyPath := strings.Replace(key, ".", "/", -1)
47
-	value, err := ioutil.ReadFile(path.Join("/proc/sys", keyPath))
48
-	if err != nil {
49
-		return "", err
50
-	}
51
-	return string(value), nil
52
-}
53
-
54 21
 func applyOStweaks() {
55
-	for k, v := range osConfig {
56
-		// read the existing property from disk
57
-		oldv, err := readSystemProperty(k)
58
-		if err != nil {
59
-			logrus.Errorf("error reading the kernel parameter %s, error: %s", k, err)
60
-			continue
61
-		}
62
-
63
-		if propertyIsValid(oldv, v.value, v.checkFn) {
64
-			// write new prop value to disk
65
-			if err := writeSystemProperty(k, v.value); err != nil {
66
-				logrus.Errorf("error setting the kernel parameter %s = %s, (leaving as %s) error: %s", k, v.value, oldv, err)
67
-				continue
68
-			}
69
-			logrus.Debugf("updated kernel parameter %s = %s (was %s)", k, v.value, oldv)
70
-		}
71
-	}
22
+	kernel.ApplyOSTweaks(ovConfig)
72 23
 }
... ...
@@ -90,7 +90,7 @@ func (d *driver) CreateEndpoint(nid, eid string, ifInfo driverapi.InterfaceInfo,
90 90
 	n.addEndpoint(ep)
91 91
 
92 92
 	if err := d.writeEndpointToStore(ep); err != nil {
93
-		return fmt.Errorf("failed to update overlay endpoint %s to local store: %v", ep.id[0:7], err)
93
+		return fmt.Errorf("failed to update overlay endpoint %.7s to local store: %v", ep.id, err)
94 94
 	}
95 95
 
96 96
 	return nil
... ...
@@ -116,7 +116,7 @@ func (d *driver) DeleteEndpoint(nid, eid string) error {
116 116
 	n.deleteEndpoint(eid)
117 117
 
118 118
 	if err := d.deleteEndpointFromStore(ep); err != nil {
119
-		logrus.Warnf("Failed to delete overlay endpoint %s from local store: %v", ep.id[0:7], err)
119
+		logrus.Warnf("Failed to delete overlay endpoint %.7s from local store: %v", ep.id, err)
120 120
 	}
121 121
 
122 122
 	if ep.ifName == "" {
... ...
@@ -274,7 +274,7 @@ func (d *driver) DeleteNetwork(nid string) error {
274 274
 		}
275 275
 
276 276
 		if err := d.deleteEndpointFromStore(ep); err != nil {
277
-			logrus.Warnf("Failed to delete overlay endpoint %s from local store: %v", ep.id[0:7], err)
277
+			logrus.Warnf("Failed to delete overlay endpoint %.7s from local store: %v", ep.id, err)
278 278
 		}
279 279
 	}
280 280
 	// flush the peerDB entries
... ...
@@ -137,10 +137,10 @@ func (d *driver) restoreEndpoints() error {
137 137
 		ep := kvo.(*endpoint)
138 138
 		n := d.network(ep.nid)
139 139
 		if n == nil {
140
-			logrus.Debugf("Network (%s) not found for restored endpoint (%s)", ep.nid[0:7], ep.id[0:7])
141
-			logrus.Debugf("Deleting stale overlay endpoint (%s) from store", ep.id[0:7])
140
+			logrus.Debugf("Network (%.7s) not found for restored endpoint (%.7s)", ep.nid, ep.id)
141
+			logrus.Debugf("Deleting stale overlay endpoint (%.7s) from store", ep.id)
142 142
 			if err := d.deleteEndpointFromStore(ep); err != nil {
143
-				logrus.Debugf("Failed to delete stale overlay endpoint (%s) from store", ep.id[0:7])
143
+				logrus.Debugf("Failed to delete stale overlay endpoint (%.7s) from store", ep.id)
144 144
 			}
145 145
 			continue
146 146
 		}
... ...
@@ -80,7 +80,7 @@ func (n *network) removeEndpointWithAddress(addr *net.IPNet) {
80 80
 		_, err := hcsshim.HNSEndpointRequest("DELETE", networkEndpoint.profileID, "")
81 81
 
82 82
 		if err != nil {
83
-			logrus.Debugf("Failed to delete stale overlay endpoint (%s) from hns", networkEndpoint.id[0:7])
83
+			logrus.Debugf("Failed to delete stale overlay endpoint (%.7s) from hns", networkEndpoint.id)
84 84
 		}
85 85
 	}
86 86
 }
... ...
@@ -415,7 +415,7 @@ func (d *driver) DeleteNetwork(nid string) error {
415 415
 	// delele endpoints belong to this network
416 416
 	for _, ep := range n.endpoints {
417 417
 		if err := d.storeDelete(ep); err != nil {
418
-			logrus.Warnf("Failed to remove bridge endpoint %s from store: %v", ep.id[0:7], err)
418
+			logrus.Warnf("Failed to remove bridge endpoint %.7s from store: %v", ep.id, err)
419 419
 		}
420 420
 	}
421 421
 
... ...
@@ -704,7 +704,7 @@ func (d *driver) CreateEndpoint(nid, eid string, ifInfo driverapi.InterfaceInfo,
704 704
 	}
705 705
 
706 706
 	if err = d.storeUpdate(endpoint); err != nil {
707
-		logrus.Errorf("Failed to save endpoint %s to store: %v", endpoint.id[0:7], err)
707
+		logrus.Errorf("Failed to save endpoint %.7s to store: %v", endpoint.id, err)
708 708
 	}
709 709
 
710 710
 	return nil
... ...
@@ -731,7 +731,7 @@ func (d *driver) DeleteEndpoint(nid, eid string) error {
731 731
 	}
732 732
 
733 733
 	if err := d.storeDelete(ep); err != nil {
734
-		logrus.Warnf("Failed to remove bridge endpoint %s from store: %v", ep.id[0:7], err)
734
+		logrus.Warnf("Failed to remove bridge endpoint %.7s from store: %v", ep.id, err)
735 735
 	}
736 736
 	return nil
737 737
 }
... ...
@@ -64,7 +64,7 @@ func (d *driver) populateNetworks() error {
64 64
 		if err = d.createNetwork(ncfg); err != nil {
65 65
 			logrus.Warnf("could not create windows network for id %s hnsid %s while booting up from persistent state: %v", ncfg.ID, ncfg.HnsID, err)
66 66
 		}
67
-		logrus.Debugf("Network  %v (%s) restored", d.name, ncfg.ID[0:7])
67
+		logrus.Debugf("Network  %v (%.7s) restored", d.name, ncfg.ID)
68 68
 	}
69 69
 
70 70
 	return nil
... ...
@@ -87,15 +87,15 @@ func (d *driver) populateEndpoints() error {
87 87
 		}
88 88
 		n, ok := d.networks[ep.nid]
89 89
 		if !ok {
90
-			logrus.Debugf("Network (%s) not found for restored endpoint (%s)", ep.nid[0:7], ep.id[0:7])
91
-			logrus.Debugf("Deleting stale endpoint (%s) from store", ep.id[0:7])
90
+			logrus.Debugf("Network (%.7s) not found for restored endpoint (%.7s)", ep.nid, ep.id)
91
+			logrus.Debugf("Deleting stale endpoint (%.7s) from store", ep.id)
92 92
 			if err := d.storeDelete(ep); err != nil {
93
-				logrus.Debugf("Failed to delete stale endpoint (%s) from store", ep.id[0:7])
93
+				logrus.Debugf("Failed to delete stale endpoint (%.7s) from store", ep.id)
94 94
 			}
95 95
 			continue
96 96
 		}
97 97
 		n.endpoints[ep.id] = ep
98
-		logrus.Debugf("Endpoint (%s) restored to network (%s)", ep.id[0:7], ep.nid[0:7])
98
+		logrus.Debugf("Endpoint (%.7s) restored to network (%.7s)", ep.id, ep.nid)
99 99
 	}
100 100
 
101 101
 	return nil
... ...
@@ -203,6 +203,10 @@ func (a *Allocator) GetDefaultAddressSpaces() (string, string, error) {
203 203
 }
204 204
 
205 205
 // RequestPool returns an address pool along with its unique id.
206
+// addressSpace must be a valid address space name and must not be the empty string.
207
+// If pool is the empty string then the default predefined pool for addressSpace will be used, otherwise pool must be a valid IP address and length in CIDR notation.
208
+// If subPool is not empty, it must be a valid IP address and length in CIDR notation which is a sub-range of pool.
209
+// subPool must be empty if pool is empty.
206 210
 func (a *Allocator) RequestPool(addressSpace, pool, subPool string, options map[string]string, v6 bool) (string, *net.IPNet, map[string]string, error) {
207 211
 	logrus.Debugf("RequestPool(%s, %s, %s, %v, %t)", addressSpace, pool, subPool, options, v6)
208 212
 
... ...
@@ -283,8 +287,8 @@ retry:
283 283
 	return remove()
284 284
 }
285 285
 
286
-// Given the address space, returns the local or global PoolConfig based on the
287
-// address space is local or global. AddressSpace locality is being registered with IPAM out of band.
286
+// Given the address space, returns the local or global PoolConfig based on whether the
287
+// address space is local or global. AddressSpace locality is registered with IPAM out of band.
288 288
 func (a *Allocator) getAddrSpace(as string) (*addrSpace, error) {
289 289
 	a.Lock()
290 290
 	defer a.Unlock()
... ...
@@ -295,6 +299,8 @@ func (a *Allocator) getAddrSpace(as string) (*addrSpace, error) {
295 295
 	return aSpace, nil
296 296
 }
297 297
 
298
+// parsePoolRequest parses and validates a request to create a new pool under addressSpace and returns
299
+// a SubnetKey, network and range describing the request.
298 300
 func (a *Allocator) parsePoolRequest(addressSpace, pool, subPool string, v6 bool) (*SubnetKey, *net.IPNet, *AddressRange, error) {
299 301
 	var (
300 302
 		nw  *net.IPNet
... ...
@@ -257,6 +257,7 @@ func (aSpace *addrSpace) New() datastore.KVObject {
257 257
 	}
258 258
 }
259 259
 
260
+// updatePoolDBOnAdd returns a closure which will add the subnet k to the address space when executed.
260 261
 func (aSpace *addrSpace) updatePoolDBOnAdd(k SubnetKey, nw *net.IPNet, ipr *AddressRange, pdf bool) (func() error, error) {
261 262
 	aSpace.Lock()
262 263
 	defer aSpace.Unlock()
... ...
@@ -281,7 +282,7 @@ func (aSpace *addrSpace) updatePoolDBOnAdd(k SubnetKey, nw *net.IPNet, ipr *Addr
281 281
 		return func() error { return aSpace.alloc.insertBitMask(k, nw) }, nil
282 282
 	}
283 283
 
284
-	// This is a new non-master pool
284
+	// This is a new non-master pool (subPool)
285 285
 	p := &PoolData{
286 286
 		ParentKey: SubnetKey{AddressSpace: k.AddressSpace, Subnet: k.Subnet},
287 287
 		Pool:      nw,
... ...
@@ -1390,7 +1390,7 @@ func (n *network) addSvcRecords(eID, name, serviceID string, epIP, epIPv6 net.IP
1390 1390
 		return
1391 1391
 	}
1392 1392
 
1393
-	logrus.Debugf("%s (%s).addSvcRecords(%s, %s, %s, %t) %s sid:%s", eID, n.ID()[0:7], name, epIP, epIPv6, ipMapUpdate, method, serviceID)
1393
+	logrus.Debugf("%s (%.7s).addSvcRecords(%s, %s, %s, %t) %s sid:%s", eID, n.ID(), name, epIP, epIPv6, ipMapUpdate, method, serviceID)
1394 1394
 
1395 1395
 	c := n.getController()
1396 1396
 	c.Lock()
... ...
@@ -1426,7 +1426,7 @@ func (n *network) deleteSvcRecords(eID, name, serviceID string, epIP net.IP, epI
1426 1426
 		return
1427 1427
 	}
1428 1428
 
1429
-	logrus.Debugf("%s (%s).deleteSvcRecords(%s, %s, %s, %t) %s sid:%s ", eID, n.ID()[0:7], name, epIP, epIPv6, ipMapUpdate, method, serviceID)
1429
+	logrus.Debugf("%s (%.7s).deleteSvcRecords(%s, %s, %s, %t) %s sid:%s ", eID, n.ID(), name, epIP, epIPv6, ipMapUpdate, method, serviceID)
1430 1430
 
1431 1431
 	c := n.getController()
1432 1432
 	c.Lock()
... ...
@@ -2125,7 +2125,8 @@ func (n *network) lbEndpointName() string {
2125 2125
 
2126 2126
 func (n *network) createLoadBalancerSandbox() (retErr error) {
2127 2127
 	sandboxName := n.lbSandboxName()
2128
-	sbOptions := []SandboxOption{}
2128
+	// Mark the sandbox to be a load balancer
2129
+	sbOptions := []SandboxOption{OptionLoadBalancer()}
2129 2130
 	if n.ingress {
2130 2131
 		sbOptions = append(sbOptions, OptionIngress())
2131 2132
 	}
... ...
@@ -110,7 +110,6 @@ type tableEventMessage struct {
110 110
 	tname string
111 111
 	key   string
112 112
 	msg   []byte
113
-	node  string
114 113
 }
115 114
 
116 115
 func (m *tableEventMessage) Invalidates(other memberlist.Broadcast) bool {
... ...
@@ -168,7 +167,6 @@ func (nDB *NetworkDB) sendTableEvent(event TableEvent_Type, nid string, tname st
168 168
 		id:    nid,
169 169
 		tname: tname,
170 170
 		key:   key,
171
-		node:  nDB.config.NodeID,
172 171
 	})
173 172
 	return nil
174 173
 }
... ...
@@ -24,6 +24,9 @@ const (
24 24
 	retryInterval         = 1 * time.Second
25 25
 	nodeReapInterval      = 24 * time.Hour
26 26
 	nodeReapPeriod        = 2 * time.Hour
27
+	// considering a cluster with > 20 nodes and a drain speed of 100 msg/s
28
+	// the following is roughly 1 minute
29
+	maxQueueLenBroadcastOnSync = 500
27 30
 )
28 31
 
29 32
 type logWriter struct{}
... ...
@@ -52,7 +55,7 @@ func (l *logWriter) Write(p []byte) (int, error) {
52 52
 
53 53
 // SetKey adds a new key to the key ring
54 54
 func (nDB *NetworkDB) SetKey(key []byte) {
55
-	logrus.Debugf("Adding key %s", hex.EncodeToString(key)[0:5])
55
+	logrus.Debugf("Adding key %.5s", hex.EncodeToString(key))
56 56
 	nDB.Lock()
57 57
 	defer nDB.Unlock()
58 58
 	for _, dbKey := range nDB.config.Keys {
... ...
@@ -69,7 +72,7 @@ func (nDB *NetworkDB) SetKey(key []byte) {
69 69
 // SetPrimaryKey sets the given key as the primary key. This should have
70 70
 // been added apriori through SetKey
71 71
 func (nDB *NetworkDB) SetPrimaryKey(key []byte) {
72
-	logrus.Debugf("Primary Key %s", hex.EncodeToString(key)[0:5])
72
+	logrus.Debugf("Primary Key %.5s", hex.EncodeToString(key))
73 73
 	nDB.RLock()
74 74
 	defer nDB.RUnlock()
75 75
 	for _, dbKey := range nDB.config.Keys {
... ...
@@ -85,7 +88,7 @@ func (nDB *NetworkDB) SetPrimaryKey(key []byte) {
85 85
 // RemoveKey removes a key from the key ring. The key being removed
86 86
 // can't be the primary key
87 87
 func (nDB *NetworkDB) RemoveKey(key []byte) {
88
-	logrus.Debugf("Remove Key %s", hex.EncodeToString(key)[0:5])
88
+	logrus.Debugf("Remove Key %.5s", hex.EncodeToString(key))
89 89
 	nDB.Lock()
90 90
 	defer nDB.Unlock()
91 91
 	for i, dbKey := range nDB.config.Keys {
... ...
@@ -123,7 +126,7 @@ func (nDB *NetworkDB) clusterInit() error {
123 123
 	var err error
124 124
 	if len(nDB.config.Keys) > 0 {
125 125
 		for i, key := range nDB.config.Keys {
126
-			logrus.Debugf("Encryption key %d: %s", i+1, hex.EncodeToString(key)[0:5])
126
+			logrus.Debugf("Encryption key %d: %.5s", i+1, hex.EncodeToString(key))
127 127
 		}
128 128
 		nDB.keyring, err = memberlist.NewKeyring(nDB.config.Keys, nDB.config.Keys[0])
129 129
 		if err != nil {
... ...
@@ -285,18 +288,35 @@ func (nDB *NetworkDB) rejoinClusterBootStrap() {
285 285
 		return
286 286
 	}
287 287
 
288
+	myself, _ := nDB.nodes[nDB.config.NodeID]
288 289
 	bootStrapIPs := make([]string, 0, len(nDB.bootStrapIP))
289 290
 	for _, bootIP := range nDB.bootStrapIP {
290
-		for _, node := range nDB.nodes {
291
-			if node.Addr.Equal(bootIP) {
292
-				// One of the bootstrap nodes is part of the cluster, return
293
-				nDB.RUnlock()
294
-				return
291
+		// botostrap IPs are usually IP:port from the Join
292
+		var bootstrapIP net.IP
293
+		ipStr, _, err := net.SplitHostPort(bootIP)
294
+		if err != nil {
295
+			// try to parse it as an IP with port
296
+			// Note this seems to be the case for swarm that do not specify any port
297
+			ipStr = bootIP
298
+		}
299
+		bootstrapIP = net.ParseIP(ipStr)
300
+		if bootstrapIP != nil {
301
+			for _, node := range nDB.nodes {
302
+				if node.Addr.Equal(bootstrapIP) && !node.Addr.Equal(myself.Addr) {
303
+					// One of the bootstrap nodes (and not myself) is part of the cluster, return
304
+					nDB.RUnlock()
305
+					return
306
+				}
295 307
 			}
308
+			bootStrapIPs = append(bootStrapIPs, bootIP)
296 309
 		}
297
-		bootStrapIPs = append(bootStrapIPs, bootIP.String())
298 310
 	}
299 311
 	nDB.RUnlock()
312
+	if len(bootStrapIPs) == 0 {
313
+		// this will also avoid to call the Join with an empty list erasing the current bootstrap ip list
314
+		logrus.Debug("rejoinClusterBootStrap did not find any valid IP")
315
+		return
316
+	}
300 317
 	// None of the bootStrap nodes are in the cluster, call memberlist join
301 318
 	logrus.Debugf("rejoinClusterBootStrap, calling cluster join with bootStrap %v", bootStrapIPs)
302 319
 	ctx, cancel := context.WithTimeout(nDB.ctx, rejoinClusterDuration)
... ...
@@ -555,6 +575,7 @@ func (nDB *NetworkDB) bulkSync(nodes []string, all bool) ([]string, error) {
555 555
 
556 556
 	var err error
557 557
 	var networks []string
558
+	var success bool
558 559
 	for _, node := range nodes {
559 560
 		if node == nDB.config.NodeID {
560 561
 			continue
... ...
@@ -562,21 +583,25 @@ func (nDB *NetworkDB) bulkSync(nodes []string, all bool) ([]string, error) {
562 562
 		logrus.Debugf("%v(%v): Initiating bulk sync with node %v", nDB.config.Hostname, nDB.config.NodeID, node)
563 563
 		networks = nDB.findCommonNetworks(node)
564 564
 		err = nDB.bulkSyncNode(networks, node, true)
565
-		// if its periodic bulksync stop after the first successful sync
566
-		if !all && err == nil {
567
-			break
568
-		}
569 565
 		if err != nil {
570 566
 			err = fmt.Errorf("bulk sync to node %s failed: %v", node, err)
571 567
 			logrus.Warn(err.Error())
568
+		} else {
569
+			// bulk sync succeeded
570
+			success = true
571
+			// if its periodic bulksync stop after the first successful sync
572
+			if !all {
573
+				break
574
+			}
572 575
 		}
573 576
 	}
574 577
 
575
-	if err != nil {
576
-		return nil, err
578
+	if success {
579
+		// if at least one node sync succeeded
580
+		return networks, nil
577 581
 	}
578 582
 
579
-	return networks, nil
583
+	return nil, err
580 584
 }
581 585
 
582 586
 // Bulk sync all the table entries belonging to a set of networks to a
... ...
@@ -142,7 +142,7 @@ func (nDB *NetworkDB) handleNetworkEvent(nEvent *NetworkEvent) bool {
142 142
 	return true
143 143
 }
144 144
 
145
-func (nDB *NetworkDB) handleTableEvent(tEvent *TableEvent) bool {
145
+func (nDB *NetworkDB) handleTableEvent(tEvent *TableEvent, isBulkSync bool) bool {
146 146
 	// Update our local clock if the received messages has newer time.
147 147
 	nDB.tableClock.Witness(tEvent.LTime)
148 148
 
... ...
@@ -175,6 +175,14 @@ func (nDB *NetworkDB) handleTableEvent(tEvent *TableEvent) bool {
175 175
 			nDB.Unlock()
176 176
 			return false
177 177
 		}
178
+	} else if tEvent.Type == TableEventTypeDelete && !isBulkSync {
179
+		nDB.Unlock()
180
+		// We don't know the entry, the entry is being deleted and the message is an async message
181
+		// In this case the safest approach is to ignore it, it is possible that the queue grew so much to
182
+		// exceed the garbage collection time (the residual reap time that is in the message is not being
183
+		// updated, to avoid inserting too many messages in the queue).
184
+		// Instead the messages coming from TCP bulk sync are safe with the latest value for the garbage collection time
185
+		return false
178 186
 	}
179 187
 
180 188
 	e = &entry{
... ...
@@ -197,11 +205,17 @@ func (nDB *NetworkDB) handleTableEvent(tEvent *TableEvent) bool {
197 197
 	nDB.Unlock()
198 198
 
199 199
 	if err != nil && tEvent.Type == TableEventTypeDelete {
200
-		// If it is a delete event and we did not have a state for it, don't propagate to the application
200
+		// Again we don't know the entry but this is coming from a TCP sync so the message body is up to date.
201
+		// We had saved the state so to speed up convergence and be able to avoid accepting create events.
202
+		// Now we will rebroadcast the message if 2 conditions are met:
203
+		// 1) we had already synced this network (during the network join)
204
+		// 2) the residual reapTime is higher than 1/6 of the total reapTime.
201 205
 		// If the residual reapTime is lower or equal to 1/6 of the total reapTime don't bother broadcasting it around
202
-		// most likely the cluster is already aware of it, if not who will sync with this node will catch the state too.
203
-		// This also avoids that deletion of entries close to their garbage collection ends up circuling around forever
204
-		return e.reapTime > nDB.config.reapEntryInterval/6
206
+		// most likely the cluster is already aware of it
207
+		// This also reduce the possibility that deletion of entries close to their garbage collection ends up circuling around
208
+		// forever
209
+		//logrus.Infof("exiting on delete not knowing the obj with rebroadcast:%t", network.inSync)
210
+		return network.inSync && e.reapTime > nDB.config.reapEntryInterval/6
205 211
 	}
206 212
 
207 213
 	var op opType
... ...
@@ -215,7 +229,7 @@ func (nDB *NetworkDB) handleTableEvent(tEvent *TableEvent) bool {
215 215
 	}
216 216
 
217 217
 	nDB.broadcaster.Write(makeEvent(op, tEvent.TableName, tEvent.NetworkID, tEvent.Key, tEvent.Value))
218
-	return true
218
+	return network.inSync
219 219
 }
220 220
 
221 221
 func (nDB *NetworkDB) handleCompound(buf []byte, isBulkSync bool) {
... ...
@@ -244,7 +258,7 @@ func (nDB *NetworkDB) handleTableMessage(buf []byte, isBulkSync bool) {
244 244
 		return
245 245
 	}
246 246
 
247
-	if rebroadcast := nDB.handleTableEvent(&tEvent); rebroadcast {
247
+	if rebroadcast := nDB.handleTableEvent(&tEvent, isBulkSync); rebroadcast {
248 248
 		var err error
249 249
 		buf, err = encodeRawMessage(MessageTypeTableEvent, buf)
250 250
 		if err != nil {
... ...
@@ -261,12 +275,16 @@ func (nDB *NetworkDB) handleTableMessage(buf []byte, isBulkSync bool) {
261 261
 			return
262 262
 		}
263 263
 
264
+		// if the queue is over the threshold, avoid distributing information coming from TCP sync
265
+		if isBulkSync && n.tableBroadcasts.NumQueued() > maxQueueLenBroadcastOnSync {
266
+			return
267
+		}
268
+
264 269
 		n.tableBroadcasts.QueueBroadcast(&tableEventMessage{
265 270
 			msg:   buf,
266 271
 			id:    tEvent.NetworkID,
267 272
 			tname: tEvent.TableName,
268 273
 			key:   tEvent.Key,
269
-			node:  tEvent.NodeName,
270 274
 		})
271 275
 	}
272 276
 }
... ...
@@ -5,7 +5,6 @@ package networkdb
5 5
 import (
6 6
 	"context"
7 7
 	"fmt"
8
-	"net"
9 8
 	"os"
10 9
 	"strings"
11 10
 	"sync"
... ...
@@ -96,7 +95,7 @@ type NetworkDB struct {
96 96
 
97 97
 	// bootStrapIP is the list of IPs that can be used to bootstrap
98 98
 	// the gossip.
99
-	bootStrapIP []net.IP
99
+	bootStrapIP []string
100 100
 
101 101
 	// lastStatsTimestamp is the last timestamp when the stats got printed
102 102
 	lastStatsTimestamp time.Time
... ...
@@ -131,6 +130,9 @@ type network struct {
131 131
 	// Lamport time for the latest state of the entry.
132 132
 	ltime serf.LamportTime
133 133
 
134
+	// Gets set to true after the first bulk sync happens
135
+	inSync bool
136
+
134 137
 	// Node leave is in progress.
135 138
 	leaving bool
136 139
 
... ...
@@ -268,10 +270,8 @@ func New(c *Config) (*NetworkDB, error) {
268 268
 // instances passed by the caller in the form of addr:port
269 269
 func (nDB *NetworkDB) Join(members []string) error {
270 270
 	nDB.Lock()
271
-	nDB.bootStrapIP = make([]net.IP, 0, len(members))
272
-	for _, m := range members {
273
-		nDB.bootStrapIP = append(nDB.bootStrapIP, net.ParseIP(m))
274
-	}
271
+	nDB.bootStrapIP = append([]string(nil), members...)
272
+	logrus.Infof("The new bootstrap node list is:%v", nDB.bootStrapIP)
275 273
 	nDB.Unlock()
276 274
 	return nDB.clusterJoin(members)
277 275
 }
... ...
@@ -619,6 +619,7 @@ func (nDB *NetworkDB) JoinNetwork(nid string) error {
619 619
 	}
620 620
 	nDB.addNetworkNode(nid, nDB.config.NodeID)
621 621
 	networkNodes := nDB.networkNodes[nid]
622
+	n = nodeNetworks[nid]
622 623
 	nDB.Unlock()
623 624
 
624 625
 	if err := nDB.sendNetworkEvent(nid, NetworkEventTypeJoin, ltime); err != nil {
... ...
@@ -630,6 +631,12 @@ func (nDB *NetworkDB) JoinNetwork(nid string) error {
630 630
 		logrus.Errorf("Error bulk syncing while joining network %s: %v", nid, err)
631 631
 	}
632 632
 
633
+	// Mark the network as being synced
634
+	// note this is a best effort, we are not checking the result of the bulk sync
635
+	nDB.Lock()
636
+	n.inSync = true
637
+	nDB.Unlock()
638
+
633 639
 	return nil
634 640
 }
635 641
 
... ...
@@ -28,6 +28,7 @@ var NetDbPaths2Func = map[string]diagnostic.HTTPHandlerFunc{
28 28
 	"/deleteentry":  dbDeleteEntry,
29 29
 	"/getentry":     dbGetEntry,
30 30
 	"/gettable":     dbGetTable,
31
+	"/networkstats": dbNetworkStats,
31 32
 }
32 33
 
33 34
 func dbJoin(ctx interface{}, w http.ResponseWriter, r *http.Request) {
... ...
@@ -411,3 +412,41 @@ func dbGetTable(ctx interface{}, w http.ResponseWriter, r *http.Request) {
411 411
 	}
412 412
 	diagnostic.HTTPReply(w, diagnostic.FailCommand(fmt.Errorf("%s", dbNotAvailable)), json)
413 413
 }
414
+
415
+func dbNetworkStats(ctx interface{}, w http.ResponseWriter, r *http.Request) {
416
+	r.ParseForm()
417
+	diagnostic.DebugHTTPForm(r)
418
+	_, json := diagnostic.ParseHTTPFormOptions(r)
419
+
420
+	// audit logs
421
+	log := logrus.WithFields(logrus.Fields{"component": "diagnostic", "remoteIP": r.RemoteAddr, "method": common.CallerName(0), "url": r.URL.String()})
422
+	log.Info("network stats")
423
+
424
+	if len(r.Form["nid"]) < 1 {
425
+		rsp := diagnostic.WrongCommand(missingParameter, fmt.Sprintf("%s?nid=test", r.URL.Path))
426
+		log.Error("network stats failed, wrong input")
427
+		diagnostic.HTTPReply(w, rsp, json)
428
+		return
429
+	}
430
+
431
+	nDB, ok := ctx.(*NetworkDB)
432
+	if ok {
433
+		nDB.RLock()
434
+		networks := nDB.networks[nDB.config.NodeID]
435
+		network, ok := networks[r.Form["nid"][0]]
436
+
437
+		entries := -1
438
+		qLen := -1
439
+		if ok {
440
+			entries = network.entriesNumber
441
+			qLen = network.tableBroadcasts.NumQueued()
442
+		}
443
+		nDB.RUnlock()
444
+
445
+		rsp := diagnostic.CommandSucceed(&diagnostic.NetworkStatsResult{Entries: entries, QueueLen: qLen})
446
+		log.WithField("response", fmt.Sprintf("%+v", rsp)).Info("network stats done")
447
+		diagnostic.HTTPReply(w, rsp, json)
448
+		return
449
+	}
450
+	diagnostic.HTTPReply(w, diagnostic.FailCommand(fmt.Errorf("%s", dbNotAvailable)), json)
451
+}
414 452
new file mode 100644
... ...
@@ -0,0 +1,16 @@
0
+package kernel
1
+
2
+type conditionalCheck func(val1, val2 string) bool
3
+
4
+// OSValue represents a tuple, value defired, check function when to apply the value
5
+type OSValue struct {
6
+	Value   string
7
+	CheckFn conditionalCheck
8
+}
9
+
10
+func propertyIsValid(val1, val2 string, check conditionalCheck) bool {
11
+	if check == nil || check(val1, val2) {
12
+		return true
13
+	}
14
+	return false
15
+}
0 16
new file mode 100644
... ...
@@ -0,0 +1,47 @@
0
+package kernel
1
+
2
+import (
3
+	"io/ioutil"
4
+	"path"
5
+	"strings"
6
+
7
+	"github.com/sirupsen/logrus"
8
+)
9
+
10
+// writeSystemProperty writes the value to a path under /proc/sys as determined from the key.
11
+// For e.g. net.ipv4.ip_forward translated to /proc/sys/net/ipv4/ip_forward.
12
+func writeSystemProperty(key, value string) error {
13
+	keyPath := strings.Replace(key, ".", "/", -1)
14
+	return ioutil.WriteFile(path.Join("/proc/sys", keyPath), []byte(value), 0644)
15
+}
16
+
17
+// readSystemProperty reads the value from the path under /proc/sys and returns it
18
+func readSystemProperty(key string) (string, error) {
19
+	keyPath := strings.Replace(key, ".", "/", -1)
20
+	value, err := ioutil.ReadFile(path.Join("/proc/sys", keyPath))
21
+	if err != nil {
22
+		return "", err
23
+	}
24
+	return strings.TrimSpace(string(value)), nil
25
+}
26
+
27
+// ApplyOSTweaks applies the configuration values passed as arguments
28
+func ApplyOSTweaks(osConfig map[string]*OSValue) {
29
+	for k, v := range osConfig {
30
+		// read the existing property from disk
31
+		oldv, err := readSystemProperty(k)
32
+		if err != nil {
33
+			logrus.WithError(err).Errorf("error reading the kernel parameter %s", k)
34
+			continue
35
+		}
36
+
37
+		if propertyIsValid(oldv, v.Value, v.CheckFn) {
38
+			// write new prop value to disk
39
+			if err := writeSystemProperty(k, v.Value); err != nil {
40
+				logrus.WithError(err).Errorf("error setting the kernel parameter %s = %s, (leaving as %s)", k, v.Value, oldv)
41
+				continue
42
+			}
43
+			logrus.Debugf("updated kernel parameter %s = %s (was %s)", k, v.Value, oldv)
44
+		}
45
+	}
46
+}
0 47
new file mode 100644
... ...
@@ -0,0 +1,7 @@
0
+// +build !linux
1
+
2
+package kernel
3
+
4
+// ApplyOSTweaks applies the configuration values passed as arguments
5
+func ApplyOSTweaks(osConfig map[string]*OSValue) {
6
+}
... ...
@@ -16,6 +16,7 @@ import (
16 16
 
17 17
 	"github.com/docker/docker/pkg/reexec"
18 18
 	"github.com/docker/libnetwork/ns"
19
+	"github.com/docker/libnetwork/osl/kernel"
19 20
 	"github.com/docker/libnetwork/types"
20 21
 	"github.com/sirupsen/logrus"
21 22
 	"github.com/vishvananda/netlink"
... ...
@@ -29,13 +30,18 @@ func init() {
29 29
 }
30 30
 
31 31
 var (
32
-	once             sync.Once
33
-	garbagePathMap   = make(map[string]bool)
34
-	gpmLock          sync.Mutex
35
-	gpmWg            sync.WaitGroup
36
-	gpmCleanupPeriod = 60 * time.Second
37
-	gpmChan          = make(chan chan struct{})
38
-	prefix           = defaultPrefix
32
+	once               sync.Once
33
+	garbagePathMap     = make(map[string]bool)
34
+	gpmLock            sync.Mutex
35
+	gpmWg              sync.WaitGroup
36
+	gpmCleanupPeriod   = 60 * time.Second
37
+	gpmChan            = make(chan chan struct{})
38
+	prefix             = defaultPrefix
39
+	loadBalancerConfig = map[string]*kernel.OSValue{
40
+		// expires connection from the IPVS connection table when the backend is not available
41
+		// more info: https://github.com/torvalds/linux/blob/master/Documentation/networking/ipvs-sysctl.txt#L126:1
42
+		"net.ipv4.vs.expire_nodest_conn": {"1", nil},
43
+	}
39 44
 )
40 45
 
41 46
 // The networkNamespace type is the linux implementation of the Sandbox
... ...
@@ -630,3 +636,13 @@ func setIPv6(path, iface string, enable bool) error {
630 630
 	}
631 631
 	return nil
632 632
 }
633
+
634
+// ApplyOSTweaks applies linux configs on the sandbox
635
+func (n *networkNamespace) ApplyOSTweaks(types []SandboxType) {
636
+	for _, t := range types {
637
+		switch t {
638
+		case SandboxTypeLoadBalancer:
639
+			kernel.ApplyOSTweaks(loadBalancerConfig)
640
+		}
641
+	}
642
+}
... ...
@@ -7,6 +7,16 @@ import (
7 7
 	"github.com/docker/libnetwork/types"
8 8
 )
9 9
 
10
+// SandboxType specify the time of the sandbox, this can be used to apply special configs
11
+type SandboxType int
12
+
13
+const (
14
+	// SandboxTypeIngress indicates that the sandbox is for the ingress
15
+	SandboxTypeIngress = iota
16
+	// SandboxTypeLoadBalancer indicates that the sandbox is a load balancer
17
+	SandboxTypeLoadBalancer = iota
18
+)
19
+
10 20
 // Sandbox represents a network sandbox, identified by a specific key.  It
11 21
 // holds a list of Interfaces, routes etc, and more can be added dynamically.
12 22
 type Sandbox interface {
... ...
@@ -70,6 +80,9 @@ type Sandbox interface {
70 70
 
71 71
 	// restore sandbox
72 72
 	Restore(ifsopt map[string][]IfaceOption, routes []*types.StaticRoute, gw net.IP, gw6 net.IP) error
73
+
74
+	// ApplyOSTweaks applies operating system specific knobs on the sandbox
75
+	ApplyOSTweaks([]SandboxType)
73 76
 }
74 77
 
75 78
 // NeighborOptionSetter interface defines the option setter methods for interface options
... ...
@@ -83,6 +83,7 @@ type sandbox struct {
83 83
 	inDelete           bool
84 84
 	ingress            bool
85 85
 	ndotsSet           bool
86
+	oslTypes           []osl.SandboxType // slice of properties of this sandbox
86 87
 	sync.Mutex
87 88
 	// This mutex is used to serialize service related operation for an endpoint
88 89
 	// The lock is here because the endpoint is saved into the store so is not unique
... ...
@@ -1162,6 +1163,15 @@ func OptionPortMapping(portBindings []types.PortBinding) SandboxOption {
1162 1162
 func OptionIngress() SandboxOption {
1163 1163
 	return func(sb *sandbox) {
1164 1164
 		sb.ingress = true
1165
+		sb.oslTypes = append(sb.oslTypes, osl.SandboxTypeIngress)
1166
+	}
1167
+}
1168
+
1169
+// OptionLoadBalancer function returns an option setter for marking a
1170
+// sandbox as a load balancer sandbox.
1171
+func OptionLoadBalancer() SandboxOption {
1172
+	return func(sb *sandbox) {
1173
+		sb.oslTypes = append(sb.oslTypes, osl.SandboxTypeLoadBalancer)
1165 1174
 	}
1166 1175
 }
1167 1176
 
... ...
@@ -369,11 +369,13 @@ dnsOpt:
369 369
 						return fmt.Errorf("invalid ndots option %v", option)
370 370
 					}
371 371
 					if num, err := strconv.Atoi(parts[1]); err != nil {
372
-						return fmt.Errorf("invalid number for ndots option %v", option)
373
-					} else if num > 0 {
372
+						return fmt.Errorf("invalid number for ndots option: %v", parts[1])
373
+					} else if num >= 0 {
374 374
 						// if the user sets ndots, use the user setting
375 375
 						sb.ndotsSet = true
376 376
 						break dnsOpt
377
+					} else {
378
+						return fmt.Errorf("invalid number for ndots option: %v", num)
377 379
 					}
378 380
 				}
379 381
 			}
... ...
@@ -244,7 +244,7 @@ func (c *controller) sandboxCleanup(activeSandboxes map[string]interface{}) {
244 244
 		}
245 245
 		sb.osSbox, err = osl.NewSandbox(sb.Key(), create, isRestore)
246 246
 		if err != nil {
247
-			logrus.Errorf("failed to create osl sandbox while trying to restore sandbox %s%s: %v", sb.ID()[0:7], msg, err)
247
+			logrus.Errorf("failed to create osl sandbox while trying to restore sandbox %.7s%s: %v", sb.ID(), msg, err)
248 248
 			continue
249 249
 		}
250 250
 
... ...
@@ -43,7 +43,7 @@ func (sb *sandbox) populateLoadBalancers(ep *endpoint) {
43 43
 
44 44
 	if n.ingress {
45 45
 		if err := addRedirectRules(sb.Key(), eIP, ep.ingressPorts); err != nil {
46
-			logrus.Errorf("Failed to add redirect rules for ep %s (%s): %v", ep.Name(), ep.ID()[0:7], err)
46
+			logrus.Errorf("Failed to add redirect rules for ep %s (%.7s): %v", ep.Name(), ep.ID(), err)
47 47
 		}
48 48
 	}
49 49
 }
... ...
@@ -106,7 +106,7 @@ func (n *network) addLBBackend(ip net.IP, lb *loadBalancer) {
106 106
 
107 107
 	i, err := ipvs.New(sb.Key())
108 108
 	if err != nil {
109
-		logrus.Errorf("Failed to create an ipvs handle for sbox %s (%s,%s) for lb addition: %v", sb.ID()[0:7], sb.ContainerID()[0:7], sb.Key(), err)
109
+		logrus.Errorf("Failed to create an ipvs handle for sbox %.7s (%.7s,%s) for lb addition: %v", sb.ID(), sb.ContainerID(), sb.Key(), err)
110 110
 		return
111 111
 	}
112 112
 	defer i.Close()
... ...
@@ -142,14 +142,14 @@ func (n *network) addLBBackend(ip net.IP, lb *loadBalancer) {
142 142
 			}
143 143
 		}
144 144
 
145
-		logrus.Debugf("Creating service for vip %s fwMark %d ingressPorts %#v in sbox %s (%s)", lb.vip, lb.fwMark, lb.service.ingressPorts, sb.ID()[0:7], sb.ContainerID()[0:7])
145
+		logrus.Debugf("Creating service for vip %s fwMark %d ingressPorts %#v in sbox %.7s (%.7s)", lb.vip, lb.fwMark, lb.service.ingressPorts, sb.ID(), sb.ContainerID())
146 146
 		if err := invokeFWMarker(sb.Key(), lb.vip, lb.fwMark, lb.service.ingressPorts, eIP, false); err != nil {
147
-			logrus.Errorf("Failed to add firewall mark rule in sbox %s (%s): %v", sb.ID()[0:7], sb.ContainerID()[0:7], err)
147
+			logrus.Errorf("Failed to add firewall mark rule in sbox %.7s (%.7s): %v", sb.ID(), sb.ContainerID(), err)
148 148
 			return
149 149
 		}
150 150
 
151 151
 		if err := i.NewService(s); err != nil && err != syscall.EEXIST {
152
-			logrus.Errorf("Failed to create a new service for vip %s fwmark %d in sbox %s (%s): %v", lb.vip, lb.fwMark, sb.ID()[0:7], sb.ContainerID()[0:7], err)
152
+			logrus.Errorf("Failed to create a new service for vip %s fwmark %d in sbox %.7s (%.7s): %v", lb.vip, lb.fwMark, sb.ID(), sb.ContainerID(), err)
153 153
 			return
154 154
 		}
155 155
 	}
... ...
@@ -164,7 +164,7 @@ func (n *network) addLBBackend(ip net.IP, lb *loadBalancer) {
164 164
 	// destination.
165 165
 	s.SchedName = ""
166 166
 	if err := i.NewDestination(s, d); err != nil && err != syscall.EEXIST {
167
-		logrus.Errorf("Failed to create real server %s for vip %s fwmark %d in sbox %s (%s): %v", ip, lb.vip, lb.fwMark, sb.ID()[0:7], sb.ContainerID()[0:7], err)
167
+		logrus.Errorf("Failed to create real server %s for vip %s fwmark %d in sbox %.7s (%.7s): %v", ip, lb.vip, lb.fwMark, sb.ID(), sb.ContainerID(), err)
168 168
 	}
169 169
 }
170 170
 
... ...
@@ -189,7 +189,7 @@ func (n *network) rmLBBackend(ip net.IP, lb *loadBalancer, rmService bool, fullR
189 189
 
190 190
 	i, err := ipvs.New(sb.Key())
191 191
 	if err != nil {
192
-		logrus.Errorf("Failed to create an ipvs handle for sbox %s (%s,%s) for lb removal: %v", sb.ID()[0:7], sb.ContainerID()[0:7], sb.Key(), err)
192
+		logrus.Errorf("Failed to create an ipvs handle for sbox %.7s (%.7s,%s) for lb removal: %v", sb.ID(), sb.ContainerID(), sb.Key(), err)
193 193
 		return
194 194
 	}
195 195
 	defer i.Close()
... ...
@@ -207,19 +207,19 @@ func (n *network) rmLBBackend(ip net.IP, lb *loadBalancer, rmService bool, fullR
207 207
 
208 208
 	if fullRemove {
209 209
 		if err := i.DelDestination(s, d); err != nil && err != syscall.ENOENT {
210
-			logrus.Errorf("Failed to delete real server %s for vip %s fwmark %d in sbox %s (%s): %v", ip, lb.vip, lb.fwMark, sb.ID()[0:7], sb.ContainerID()[0:7], err)
210
+			logrus.Errorf("Failed to delete real server %s for vip %s fwmark %d in sbox %.7s (%.7s): %v", ip, lb.vip, lb.fwMark, sb.ID(), sb.ContainerID(), err)
211 211
 		}
212 212
 	} else {
213 213
 		d.Weight = 0
214 214
 		if err := i.UpdateDestination(s, d); err != nil && err != syscall.ENOENT {
215
-			logrus.Errorf("Failed to set LB weight of real server %s to 0 for vip %s fwmark %d in sbox %s (%s): %v", ip, lb.vip, lb.fwMark, sb.ID()[0:7], sb.ContainerID()[0:7], err)
215
+			logrus.Errorf("Failed to set LB weight of real server %s to 0 for vip %s fwmark %d in sbox %.7s (%.7s): %v", ip, lb.vip, lb.fwMark, sb.ID(), sb.ContainerID(), err)
216 216
 		}
217 217
 	}
218 218
 
219 219
 	if rmService {
220 220
 		s.SchedName = ipvs.RoundRobin
221 221
 		if err := i.DelService(s); err != nil && err != syscall.ENOENT {
222
-			logrus.Errorf("Failed to delete service for vip %s fwmark %d in sbox %s (%s): %v", lb.vip, lb.fwMark, sb.ID()[0:7], sb.ContainerID()[0:7], err)
222
+			logrus.Errorf("Failed to delete service for vip %s fwmark %d in sbox %.7s (%.7s): %v", lb.vip, lb.fwMark, sb.ID(), sb.ContainerID(), err)
223 223
 		}
224 224
 
225 225
 		if sb.ingress {
... ...
@@ -234,7 +234,7 @@ func (n *network) rmLBBackend(ip net.IP, lb *loadBalancer, rmService bool, fullR
234 234
 		}
235 235
 
236 236
 		if err := invokeFWMarker(sb.Key(), lb.vip, lb.fwMark, lb.service.ingressPorts, eIP, true); err != nil {
237
-			logrus.Errorf("Failed to delete firewall mark rule in sbox %s (%s): %v", sb.ID()[0:7], sb.ContainerID()[0:7], err)
237
+			logrus.Errorf("Failed to delete firewall mark rule in sbox %.7s (%.7s): %v", sb.ID(), sb.ContainerID(), err)
238 238
 		}
239 239
 
240 240
 		// Remove IP alias from the VIP to the endpoint
... ...
@@ -332,6 +332,8 @@ func CompareIPNet(a, b *net.IPNet) bool {
332 332
 }
333 333
 
334 334
 // GetMinimalIP returns the address in its shortest form
335
+// If ip contains an IPv4-mapped IPv6 address, the 4-octet form of the IPv4 address will be returned.
336
+// Otherwise ip is returned unchanged.
335 337
 func GetMinimalIP(ip net.IP) net.IP {
336 338
 	if ip != nil && ip.To4() != nil {
337 339
 		return ip.To4()
... ...
@@ -43,7 +43,7 @@ github.com/opencontainers/selinux b29023b86e4a69d1b46b7e7b4e2b6fda03f0b9cd
43 43
 github.com/samuel/go-zookeeper d0e0d8e11f318e000a8cc434616d69e329edc374
44 44
 github.com/seccomp/libseccomp-golang 32f571b70023028bd57d9288c20efbcb237f3ce0
45 45
 github.com/sirupsen/logrus v1.0.3
46
-github.com/stretchr/testify dab07ac62d4905d3e48d17dc549c684ac3b7c15a
46
+github.com/stretchr/testify v1.2.2
47 47
 github.com/syndtr/gocapability 33e07d32887e1e06b7c025f27ce52f62c7990bc0
48 48
 github.com/ugorji/go f1f1a805ed361a0e078bb537e4ea78cd37dcf065
49 49
 github.com/vishvananda/netlink b2de5d10e38ecce8607e6b438b6d174f389a004e
... ...
@@ -55,8 +55,8 @@ golang.org/x/sync fd80eb99c8f653c847d294a001bdf2a3a6f768f5
55 55
 github.com/pkg/errors 839d9e913e063e28dfd0e6c7b7512793e0a48be9
56 56
 github.com/ishidawataru/sctp 07191f837fedd2f13d1ec7b5f885f0f3ec54b1cb
57 57
 
58
-github.com/davecgh/go-spew 8991bc29aa16c548c550c7ff78260e27b9ab7c73
59
-github.com/pmezard/go-difflib 792786c7400a136282c1664665ae0a8db921c6c2
58
+github.com/davecgh/go-spew v1.1.0
59
+github.com/pmezard/go-difflib v1.0.0
60 60
 github.com/cyphar/filepath-securejoin v0.2.1
61 61
 github.com/hashicorp/errwrap 7554cd9344cec97297fa6649b055a8c98c2a1e55
62 62
 github.com/hashicorp/go-immutable-radix 7f3cd4390caab3250a57f30efdb2a65dd7649ecf