Browse code

Vendoring libnetwork b66c0385f30c6aa27b2957ed1072682c19a0b0b4

Signed-off-by: Alessandro Boch <aboch@docker.com>

Alessandro Boch authored on 2016/05/08 16:32:51
Showing 70 changed files
... ...
@@ -29,7 +29,7 @@ clone git github.com/RackSec/srslog 259aed10dfa74ea2961eddd1d9847619f6e98837
29 29
 clone git github.com/imdario/mergo 0.2.1
30 30
 
31 31
 #get libnetwork packages
32
-clone git github.com/docker/libnetwork v0.8.0-dev.1
32
+clone git github.com/docker/libnetwork b66c0385f30c6aa27b2957ed1072682c19a0b0b4
33 33
 clone git github.com/docker/go-events 2e7d352816128aa84f4d29b2a21d400133701a0d
34 34
 clone git github.com/armon/go-radix e39d623f12e8e41c7b5529e9a9dd67a1e2261f80
35 35
 clone git github.com/armon/go-metrics eb0af217e5e9747e41dd5303755356b62d28e3ec
... ...
@@ -1,5 +1,17 @@
1 1
 # Changelog
2 2
 
3
+## 0.8.0-dev.2 (2016-05-07)
4
+- Fix an issue which may arise during sandbox cleanup (https://github.com/docker/libnetwork/pull/1157)
5
+- Fix cleanup logic in case of ipv6 allocation failure
6
+- Don't add /etc/hosts record if container's ip is empty (--net=none)
7
+- Fix default gw logic for internal networks
8
+- Error when updating IPv6 gateway (https://github.com/docker/libnetwork/issues/1142)
9
+- Fixes https://github.com/docker/libnetwork/issues/1113
10
+- Fixes https://github.com/docker/libnetwork/issues/1069
11
+- Fxies https://github.com/docker/libnetwork/issues/1117
12
+- Increase the concurrent query rate-limit count
13
+- Changes to build libnetwork in Solaris
14
+
3 15
 ## 0.8.0-dev.1 (2016-04-16)
4 16
 - Fixes docker/docker#16964
5 17
 - Added maximum egress bandwidth qos for Windows
... ...
@@ -5,8 +5,8 @@ dockerargs = --privileged -v $(shell pwd):/go/src/github.com/docker/libnetwork -
5 5
 container_env = -e "INSIDECONTAINER=-incontainer=true"
6 6
 docker = docker run --rm -it ${dockerargs} $$EXTRA_ARGS ${container_env} ${build_image}
7 7
 ciargs = -e CIRCLECI -e "COVERALLS_TOKEN=$$COVERALLS_TOKEN" -e "INSIDECONTAINER=-incontainer=true"
8
-cidocker = docker run ${dockerargs} ${ciargs} ${container_env} ${build_image}
9
-CROSS_PLATFORMS = linux/amd64 linux/386 linux/arm windows/amd64 windows/386
8
+cidocker = docker run ${dockerargs} ${ciargs} $$EXTRA_ARGS ${container_env} ${build_image}
9
+CROSS_PLATFORMS = linux/amd64 linux/386 linux/arm windows/amd64
10 10
 
11 11
 all: ${build_image}.created build check integration-tests clean
12 12
 
... ...
@@ -102,4 +102,4 @@ circle-ci-check: ${build_image}.created
102 102
 circle-ci-build: ${build_image}.created
103 103
 	@${cidocker} make build-local
104 104
 
105
-circle-ci: circle-ci-check circle-ci-build integration-tests
105
+circle-ci: circle-ci-check circle-ci-cross circle-ci-build integration-tests
... ...
@@ -34,7 +34,7 @@ func main() {
34 34
 
35 35
 	// Create a network for containers to join.
36 36
 	// NewNetwork accepts Variadic optional arguments that libnetwork and Drivers can use.
37
-	network, err := controller.NewNetwork(networkType, "network1")
37
+	network, err := controller.NewNetwork(networkType, "network1", "")
38 38
 	if err != nil {
39 39
 		log.Fatalf("controller.NewNetwork: %s", err)
40 40
 	}
41 41
new file mode 100644
... ...
@@ -0,0 +1,369 @@
0
+package libnetwork
1
+
2
+import (
3
+	"fmt"
4
+	"net"
5
+	"os"
6
+	"strings"
7
+
8
+	"github.com/Sirupsen/logrus"
9
+	"github.com/docker/go-events"
10
+	"github.com/docker/libnetwork/datastore"
11
+	"github.com/docker/libnetwork/discoverapi"
12
+	"github.com/docker/libnetwork/driverapi"
13
+	"github.com/docker/libnetwork/networkdb"
14
+)
15
+
16
+type agent struct {
17
+	networkDB         *networkdb.NetworkDB
18
+	bindAddr          string
19
+	epTblCancel       func()
20
+	driverCancelFuncs map[string][]func()
21
+}
22
+
23
+func getBindAddr(ifaceName string) (string, error) {
24
+	iface, err := net.InterfaceByName(ifaceName)
25
+	if err != nil {
26
+		return "", fmt.Errorf("failed to find interface %s: %v", ifaceName, err)
27
+	}
28
+
29
+	addrs, err := iface.Addrs()
30
+	if err != nil {
31
+		return "", fmt.Errorf("failed to get interface addresses: %v", err)
32
+	}
33
+
34
+	for _, a := range addrs {
35
+		addr, ok := a.(*net.IPNet)
36
+		if !ok {
37
+			continue
38
+		}
39
+		addrIP := addr.IP
40
+
41
+		if addrIP.IsLinkLocalUnicast() {
42
+			continue
43
+		}
44
+
45
+		return addrIP.String(), nil
46
+	}
47
+
48
+	return "", fmt.Errorf("failed to get bind address")
49
+}
50
+
51
+func resolveAddr(addrOrInterface string) (string, error) {
52
+	// Try and see if this is a valid IP address
53
+	if net.ParseIP(addrOrInterface) != nil {
54
+		return addrOrInterface, nil
55
+	}
56
+
57
+	// If not a valid IP address, it should be a valid interface
58
+	return getBindAddr(addrOrInterface)
59
+}
60
+
61
+func (c *controller) agentInit(bindAddrOrInterface string) error {
62
+	if !c.cfg.Daemon.IsAgent {
63
+		return nil
64
+	}
65
+
66
+	bindAddr, err := resolveAddr(bindAddrOrInterface)
67
+	if err != nil {
68
+		return err
69
+	}
70
+
71
+	hostname, _ := os.Hostname()
72
+	nDB, err := networkdb.New(&networkdb.Config{
73
+		BindAddr: bindAddr,
74
+		NodeName: hostname,
75
+	})
76
+
77
+	if err != nil {
78
+		return err
79
+	}
80
+
81
+	ch, cancel := nDB.Watch("endpoint_table", "", "")
82
+
83
+	c.agent = &agent{
84
+		networkDB:         nDB,
85
+		bindAddr:          bindAddr,
86
+		epTblCancel:       cancel,
87
+		driverCancelFuncs: make(map[string][]func()),
88
+	}
89
+
90
+	go c.handleTableEvents(ch, c.handleEpTableEvent)
91
+	return nil
92
+}
93
+
94
+func (c *controller) agentJoin(remotes []string) error {
95
+	if c.agent == nil {
96
+		return nil
97
+	}
98
+
99
+	return c.agent.networkDB.Join(remotes)
100
+}
101
+
102
+func (c *controller) agentDriverNotify(d driverapi.Driver) {
103
+	if c.agent == nil {
104
+		return
105
+	}
106
+
107
+	d.DiscoverNew(discoverapi.NodeDiscovery, discoverapi.NodeDiscoveryData{
108
+		Address: c.agent.bindAddr,
109
+		Self:    true,
110
+	})
111
+}
112
+
113
+func (c *controller) agentClose() {
114
+	if c.agent == nil {
115
+		return
116
+	}
117
+
118
+	for _, cancelFuncs := range c.agent.driverCancelFuncs {
119
+		for _, cancel := range cancelFuncs {
120
+			cancel()
121
+		}
122
+	}
123
+	c.agent.epTblCancel()
124
+
125
+	c.agent.networkDB.Close()
126
+}
127
+
128
+func (n *network) isClusterEligible() bool {
129
+	if n.driverScope() != datastore.GlobalScope {
130
+		return false
131
+	}
132
+
133
+	c := n.getController()
134
+	if c.agent == nil {
135
+		return false
136
+	}
137
+
138
+	return true
139
+}
140
+
141
+func (n *network) joinCluster() error {
142
+	if !n.isClusterEligible() {
143
+		return nil
144
+	}
145
+
146
+	c := n.getController()
147
+	return c.agent.networkDB.JoinNetwork(n.ID())
148
+}
149
+
150
+func (n *network) leaveCluster() error {
151
+	if !n.isClusterEligible() {
152
+		return nil
153
+	}
154
+
155
+	c := n.getController()
156
+	return c.agent.networkDB.LeaveNetwork(n.ID())
157
+}
158
+
159
+func (ep *endpoint) addToCluster() error {
160
+	n := ep.getNetwork()
161
+	if !n.isClusterEligible() {
162
+		return nil
163
+	}
164
+
165
+	c := n.getController()
166
+	if !ep.isAnonymous() && ep.Iface().Address() != nil {
167
+		if err := c.addServiceBinding(ep.svcName, ep.svcID, n.ID(), ep.ID(), ep.Iface().Address().IP); err != nil {
168
+			return err
169
+		}
170
+
171
+		if err := c.agent.networkDB.CreateEntry("endpoint_table", n.ID(), ep.ID(), []byte(fmt.Sprintf("%s,%s,%s,%s", ep.Name(), ep.svcName,
172
+			ep.svcID, ep.Iface().Address().IP))); err != nil {
173
+			return err
174
+		}
175
+	}
176
+
177
+	for _, te := range ep.joinInfo.driverTableEntries {
178
+		if err := c.agent.networkDB.CreateEntry(te.tableName, n.ID(), te.key, te.value); err != nil {
179
+			return err
180
+		}
181
+	}
182
+
183
+	return nil
184
+}
185
+
186
+func (ep *endpoint) deleteFromCluster() error {
187
+	n := ep.getNetwork()
188
+	if !n.isClusterEligible() {
189
+		return nil
190
+	}
191
+
192
+	c := n.getController()
193
+	if !ep.isAnonymous() {
194
+		if ep.Iface().Address() != nil {
195
+			if err := c.rmServiceBinding(ep.svcName, ep.svcID, n.ID(), ep.ID(), ep.Iface().Address().IP); err != nil {
196
+				return err
197
+			}
198
+		}
199
+
200
+		if err := c.agent.networkDB.DeleteEntry("endpoint_table", n.ID(), ep.ID()); err != nil {
201
+			return err
202
+		}
203
+	}
204
+
205
+	if ep.joinInfo == nil {
206
+		return nil
207
+	}
208
+
209
+	for _, te := range ep.joinInfo.driverTableEntries {
210
+		if err := c.agent.networkDB.DeleteEntry(te.tableName, n.ID(), te.key); err != nil {
211
+			return err
212
+		}
213
+	}
214
+
215
+	return nil
216
+}
217
+
218
+func (n *network) addDriverWatches() {
219
+	if !n.isClusterEligible() {
220
+		return
221
+	}
222
+
223
+	c := n.getController()
224
+	for _, tableName := range n.driverTables {
225
+		ch, cancel := c.agent.networkDB.Watch(tableName, n.ID(), "")
226
+		c.Lock()
227
+		c.agent.driverCancelFuncs[n.ID()] = append(c.agent.driverCancelFuncs[n.ID()], cancel)
228
+		c.Unlock()
229
+
230
+		go c.handleTableEvents(ch, n.handleDriverTableEvent)
231
+		d, err := n.driver(false)
232
+		if err != nil {
233
+			logrus.Errorf("Could not resolve driver %s while walking driver tabl: %v", n.networkType, err)
234
+			return
235
+		}
236
+
237
+		c.agent.networkDB.WalkTable(tableName, func(nid, key string, value []byte) bool {
238
+			d.EventNotify(driverapi.Create, n.ID(), tableName, key, value)
239
+			return false
240
+		})
241
+	}
242
+}
243
+
244
+func (n *network) cancelDriverWatches() {
245
+	if !n.isClusterEligible() {
246
+		return
247
+	}
248
+
249
+	c := n.getController()
250
+	c.Lock()
251
+	cancelFuncs := c.agent.driverCancelFuncs[n.ID()]
252
+	delete(c.agent.driverCancelFuncs, n.ID())
253
+	c.Unlock()
254
+
255
+	for _, cancel := range cancelFuncs {
256
+		cancel()
257
+	}
258
+}
259
+
260
+func (c *controller) handleTableEvents(ch chan events.Event, fn func(events.Event)) {
261
+	for {
262
+		select {
263
+		case ev, ok := <-ch:
264
+			if !ok {
265
+				return
266
+			}
267
+
268
+			fn(ev)
269
+		}
270
+	}
271
+}
272
+
273
+func (n *network) handleDriverTableEvent(ev events.Event) {
274
+	d, err := n.driver(false)
275
+	if err != nil {
276
+		logrus.Errorf("Could not resolve driver %s while handling driver table event: %v", n.networkType, err)
277
+		return
278
+	}
279
+
280
+	var (
281
+		etype driverapi.EventType
282
+		tname string
283
+		key   string
284
+		value []byte
285
+	)
286
+
287
+	switch event := ev.(type) {
288
+	case networkdb.CreateEvent:
289
+		tname = event.Table
290
+		key = event.Key
291
+		value = event.Value
292
+		etype = driverapi.Create
293
+	case networkdb.DeleteEvent:
294
+		tname = event.Table
295
+		key = event.Key
296
+		value = event.Value
297
+		etype = driverapi.Delete
298
+	case networkdb.UpdateEvent:
299
+		tname = event.Table
300
+		key = event.Key
301
+		value = event.Value
302
+		etype = driverapi.Delete
303
+	}
304
+
305
+	d.EventNotify(etype, n.ID(), tname, key, value)
306
+}
307
+
308
+func (c *controller) handleEpTableEvent(ev events.Event) {
309
+	var (
310
+		nid   string
311
+		eid   string
312
+		value string
313
+		isAdd bool
314
+	)
315
+
316
+	switch event := ev.(type) {
317
+	case networkdb.CreateEvent:
318
+		nid = event.NetworkID
319
+		eid = event.Key
320
+		value = string(event.Value)
321
+		isAdd = true
322
+	case networkdb.DeleteEvent:
323
+		nid = event.NetworkID
324
+		eid = event.Key
325
+		value = string(event.Value)
326
+	case networkdb.UpdateEvent:
327
+		logrus.Errorf("Unexpected update service table event = %#v", event)
328
+	}
329
+
330
+	nw, err := c.NetworkByID(nid)
331
+	if err != nil {
332
+		logrus.Errorf("Could not find network %s while handling service table event: %v", nid, err)
333
+		return
334
+	}
335
+	n := nw.(*network)
336
+
337
+	vals := strings.Split(value, ",")
338
+	if len(vals) < 4 {
339
+		logrus.Errorf("Incorrect service table value = %s", value)
340
+		return
341
+	}
342
+
343
+	name := vals[0]
344
+	svcName := vals[1]
345
+	svcID := vals[2]
346
+	ip := net.ParseIP(vals[3])
347
+
348
+	if name == "" || ip == nil {
349
+		logrus.Errorf("Invalid endpoint name/ip received while handling service table event %s", value)
350
+		return
351
+	}
352
+
353
+	if isAdd {
354
+		if err := c.addServiceBinding(svcName, svcID, nid, eid, ip); err != nil {
355
+			logrus.Errorf("Failed adding service binding for value %s: %v", value, err)
356
+			return
357
+		}
358
+
359
+		n.addSvcRecords(name, ip, nil, true)
360
+	} else {
361
+		if err := c.rmServiceBinding(svcName, svcID, nid, eid, ip); err != nil {
362
+			logrus.Errorf("Failed adding service binding for value %s: %v", value, err)
363
+			return
364
+		}
365
+
366
+		n.deleteSvcRecords(name, ip, nil, true)
367
+	}
368
+}
... ...
@@ -370,6 +370,8 @@ func (h *Handle) set(ordinal, start, end uint64, any bool, release bool) (uint64
370 370
 
371 371
 // checks is needed because to cover the case where the number of bits is not a multiple of blockLen
372 372
 func (h *Handle) validateOrdinal(ordinal uint64) error {
373
+	h.Lock()
374
+	defer h.Unlock()
373 375
 	if ordinal >= h.bits {
374 376
 		return fmt.Errorf("bit does not belong to the sequence")
375 377
 	}
... ...
@@ -75,6 +75,10 @@ func (h *Handle) CopyTo(o datastore.KVObject) error {
75 75
 	defer h.Unlock()
76 76
 
77 77
 	dstH := o.(*Handle)
78
+	if h == dstH {
79
+		return nil
80
+	}
81
+	dstH.Lock()
78 82
 	dstH.bits = h.bits
79 83
 	dstH.unselected = h.unselected
80 84
 	dstH.head = h.head.getCopy()
... ...
@@ -83,6 +87,7 @@ func (h *Handle) CopyTo(o datastore.KVObject) error {
83 83
 	dstH.dbIndex = h.dbIndex
84 84
 	dstH.dbExists = h.dbExists
85 85
 	dstH.store = h.store
86
+	dstH.Unlock()
86 87
 
87 88
 	return nil
88 89
 }
... ...
@@ -22,9 +22,12 @@ type Config struct {
22 22
 // DaemonCfg represents libnetwork core configuration
23 23
 type DaemonCfg struct {
24 24
 	Debug          bool
25
+	IsAgent        bool
25 26
 	DataDir        string
26 27
 	DefaultNetwork string
27 28
 	DefaultDriver  string
29
+	Bind           string
30
+	Neighbors      []string
28 31
 	Labels         []string
29 32
 	DriverCfg      map[string]interface{}
30 33
 }
... ...
@@ -81,6 +84,27 @@ func ParseConfigOptions(cfgOptions ...Option) *Config {
81 81
 // to the controller
82 82
 type Option func(c *Config)
83 83
 
84
+// OptionBind function returns an option setter for setting a bind interface or address
85
+func OptionBind(bind string) Option {
86
+	return func(c *Config) {
87
+		c.Daemon.Bind = bind
88
+	}
89
+}
90
+
91
+// OptionAgent function returns an option setter for setting agent mode
92
+func OptionAgent() Option {
93
+	return func(c *Config) {
94
+		c.Daemon.IsAgent = true
95
+	}
96
+}
97
+
98
+// OptionNeighbors function returns an option setter for setting a list of neighbors to join.
99
+func OptionNeighbors(neighbors []string) Option {
100
+	return func(c *Config) {
101
+		c.Daemon.Neighbors = neighbors
102
+	}
103
+}
104
+
84 105
 // OptionDefaultNetwork function returns an option setter for a default network
85 106
 func OptionDefaultNetwork(dn string) Option {
86 107
 	return func(c *Config) {
... ...
@@ -15,7 +15,7 @@ create network namespaces and allocate interfaces for containers to use.
15 15
 
16 16
 	// Create a network for containers to join.
17 17
 	// NewNetwork accepts Variadic optional arguments that libnetwork and Drivers can make use of
18
-	network, err := controller.NewNetwork(networkType, "network1")
18
+	network, err := controller.NewNetwork(networkType, "network1", "")
19 19
 	if err != nil {
20 20
 		return
21 21
 	}
... ...
@@ -58,6 +58,7 @@ import (
58 58
 	"github.com/docker/libnetwork/datastore"
59 59
 	"github.com/docker/libnetwork/discoverapi"
60 60
 	"github.com/docker/libnetwork/driverapi"
61
+	"github.com/docker/libnetwork/drvregistry"
61 62
 	"github.com/docker/libnetwork/hostdiscovery"
62 63
 	"github.com/docker/libnetwork/ipamapi"
63 64
 	"github.com/docker/libnetwork/netlabel"
... ...
@@ -75,7 +76,7 @@ type NetworkController interface {
75 75
 	Config() config.Config
76 76
 
77 77
 	// Create a new network. The options parameter carries network specific options.
78
-	NewNetwork(networkType, name string, options ...NetworkOption) (Network, error)
78
+	NewNetwork(networkType, name string, id string, options ...NetworkOption) (Network, error)
79 79
 
80 80
 	// Networks returns the list of Network(s) managed by this controller.
81 81
 	Networks() []Network
... ...
@@ -119,55 +120,74 @@ type NetworkWalker func(nw Network) bool
119 119
 // When the function returns true, the walk will stop.
120 120
 type SandboxWalker func(sb Sandbox) bool
121 121
 
122
-type driverData struct {
123
-	driver     driverapi.Driver
124
-	capability driverapi.Capability
125
-}
126
-
127
-type ipamData struct {
128
-	driver     ipamapi.Ipam
129
-	capability *ipamapi.Capability
130
-	// default address spaces are provided by ipam driver at registration time
131
-	defaultLocalAddressSpace, defaultGlobalAddressSpace string
132
-}
133
-
134
-type driverTable map[string]*driverData
135
-type ipamTable map[string]*ipamData
136 122
 type sandboxTable map[string]*sandbox
137 123
 
138 124
 type controller struct {
139
-	id             string
140
-	drivers        driverTable
141
-	ipamDrivers    ipamTable
142
-	sandboxes      sandboxTable
143
-	cfg            *config.Config
144
-	stores         []datastore.DataStore
145
-	discovery      hostdiscovery.HostDiscovery
146
-	extKeyListener net.Listener
147
-	watchCh        chan *endpoint
148
-	unWatchCh      chan *endpoint
149
-	svcDb          map[string]svcInfo
150
-	nmap           map[string]*netWatch
151
-	defOsSbox      osl.Sandbox
152
-	sboxOnce       sync.Once
125
+	id              string
126
+	drvRegistry     *drvregistry.DrvRegistry
127
+	sandboxes       sandboxTable
128
+	cfg             *config.Config
129
+	stores          []datastore.DataStore
130
+	discovery       hostdiscovery.HostDiscovery
131
+	extKeyListener  net.Listener
132
+	watchCh         chan *endpoint
133
+	unWatchCh       chan *endpoint
134
+	svcRecords      map[string]svcInfo
135
+	nmap            map[string]*netWatch
136
+	serviceBindings map[string]*service
137
+	defOsSbox       osl.Sandbox
138
+	sboxOnce        sync.Once
139
+	agent           *agent
153 140
 	sync.Mutex
154 141
 }
155 142
 
143
+type initializer struct {
144
+	fn    drvregistry.InitFunc
145
+	ntype string
146
+}
147
+
156 148
 // New creates a new instance of network controller.
157 149
 func New(cfgOptions ...config.Option) (NetworkController, error) {
158 150
 	c := &controller{
159
-		id:          stringid.GenerateRandomID(),
160
-		cfg:         config.ParseConfigOptions(cfgOptions...),
161
-		sandboxes:   sandboxTable{},
162
-		drivers:     driverTable{},
163
-		ipamDrivers: ipamTable{},
164
-		svcDb:       make(map[string]svcInfo),
151
+		id:              stringid.GenerateRandomID(),
152
+		cfg:             config.ParseConfigOptions(cfgOptions...),
153
+		sandboxes:       sandboxTable{},
154
+		svcRecords:      make(map[string]svcInfo),
155
+		serviceBindings: make(map[string]*service),
156
+	}
157
+
158
+	if err := c.agentInit(c.cfg.Daemon.Bind); err != nil {
159
+		return nil, err
160
+	}
161
+
162
+	if err := c.agentJoin(c.cfg.Daemon.Neighbors); err != nil {
163
+		return nil, err
165 164
 	}
166 165
 
167 166
 	if err := c.initStores(); err != nil {
168 167
 		return nil, err
169 168
 	}
170 169
 
170
+	drvRegistry, err := drvregistry.New(c.getStore(datastore.LocalScope), c.getStore(datastore.GlobalScope), c.RegisterDriver, nil)
171
+	if err != nil {
172
+		return nil, err
173
+	}
174
+
175
+	for _, i := range getInitializers() {
176
+		var dcfg map[string]interface{}
177
+
178
+		// External plugins don't need config passed through daemon. They can
179
+		// bootstrap themselves
180
+		if i.ntype != "remote" {
181
+			dcfg = c.makeDriverConfig(i.ntype)
182
+		}
183
+
184
+		if err := drvRegistry.AddDriver(i.ntype, i.fn, dcfg); err != nil {
185
+			return nil, err
186
+		}
187
+	}
188
+	c.drvRegistry = drvRegistry
189
+
171 190
 	if c.cfg != nil && c.cfg.Cluster.Watcher != nil {
172 191
 		if err := c.initDiscovery(c.cfg.Cluster.Watcher); err != nil {
173 192
 			// Failing to initialize discovery is a bad situation to be in.
... ...
@@ -176,15 +196,6 @@ func New(cfgOptions ...config.Option) (NetworkController, error) {
176 176
 		}
177 177
 	}
178 178
 
179
-	if err := initDrivers(c); err != nil {
180
-		return nil, err
181
-	}
182
-
183
-	if err := initIpams(c, c.getStore(datastore.LocalScope),
184
-		c.getStore(datastore.GlobalScope)); err != nil {
185
-		return nil, err
186
-	}
187
-
188 179
 	c.sandboxCleanup()
189 180
 	c.cleanupLocalEndpoints()
190 181
 	c.networkCleanup()
... ...
@@ -196,8 +207,67 @@ func New(cfgOptions ...config.Option) (NetworkController, error) {
196 196
 	return c, nil
197 197
 }
198 198
 
199
+func (c *controller) makeDriverConfig(ntype string) map[string]interface{} {
200
+	if c.cfg == nil {
201
+		return nil
202
+	}
203
+
204
+	config := make(map[string]interface{})
205
+
206
+	for _, label := range c.cfg.Daemon.Labels {
207
+		if !strings.HasPrefix(netlabel.Key(label), netlabel.DriverPrefix+"."+ntype) {
208
+			continue
209
+		}
210
+
211
+		config[netlabel.Key(label)] = netlabel.Value(label)
212
+	}
213
+
214
+	drvCfg, ok := c.cfg.Daemon.DriverCfg[ntype]
215
+	if ok {
216
+		for k, v := range drvCfg.(map[string]interface{}) {
217
+			config[k] = v
218
+		}
219
+	}
220
+
221
+	for k, v := range c.cfg.Scopes {
222
+		if !v.IsValid() {
223
+			continue
224
+		}
225
+		config[netlabel.MakeKVClient(k)] = discoverapi.DatastoreConfigData{
226
+			Scope:    k,
227
+			Provider: v.Client.Provider,
228
+			Address:  v.Client.Address,
229
+			Config:   v.Client.Config,
230
+		}
231
+	}
232
+
233
+	return config
234
+}
235
+
199 236
 var procReloadConfig = make(chan (bool), 1)
200 237
 
238
+func (c *controller) processAgentConfig(cfg *config.Config) (bool, error) {
239
+	if c.cfg.Daemon.IsAgent == cfg.Daemon.IsAgent {
240
+		// Agent configuration not changed
241
+		return false, nil
242
+	}
243
+
244
+	c.Lock()
245
+	c.cfg = cfg
246
+	c.Unlock()
247
+
248
+	if err := c.agentInit(c.cfg.Daemon.Bind); err != nil {
249
+		return false, err
250
+	}
251
+
252
+	if err := c.agentJoin(c.cfg.Daemon.Neighbors); err != nil {
253
+		c.agentClose()
254
+		return false, err
255
+	}
256
+
257
+	return true, nil
258
+}
259
+
201 260
 func (c *controller) ReloadConfiguration(cfgOptions ...config.Option) error {
202 261
 	procReloadConfig <- true
203 262
 	defer func() { <-procReloadConfig }()
... ...
@@ -206,6 +276,16 @@ func (c *controller) ReloadConfiguration(cfgOptions ...config.Option) error {
206 206
 	// Refuse the configuration if it alters an existing datastore client configuration.
207 207
 	update := false
208 208
 	cfg := config.ParseConfigOptions(cfgOptions...)
209
+
210
+	isAgentConfig, err := c.processAgentConfig(cfg)
211
+	if err != nil {
212
+		return err
213
+	}
214
+
215
+	if isAgentConfig {
216
+		return nil
217
+	}
218
+
209 219
 	for s := range c.cfg.Scopes {
210 220
 		if _, ok := cfg.Scopes[s]; !ok {
211 221
 			return types.ForbiddenErrorf("cannot accept new configuration because it removes an existing datastore client")
... ...
@@ -228,16 +308,6 @@ func (c *controller) ReloadConfiguration(cfgOptions ...config.Option) error {
228 228
 		return nil
229 229
 	}
230 230
 
231
-	c.Lock()
232
-	c.cfg = cfg
233
-	c.Unlock()
234
-
235
-	if c.discovery == nil && c.cfg.Cluster.Watcher != nil {
236
-		if err := c.initDiscovery(c.cfg.Cluster.Watcher); err != nil {
237
-			log.Errorf("Failed to Initialize Discovery after configuration update: %v", err)
238
-		}
239
-	}
240
-
241 231
 	var dsConfig *discoverapi.DatastoreConfigData
242 232
 	for scope, sCfg := range cfg.Scopes {
243 233
 		if scope == datastore.LocalScope || !sCfg.IsValid() {
... ...
@@ -255,17 +325,25 @@ func (c *controller) ReloadConfiguration(cfgOptions ...config.Option) error {
255 255
 		return nil
256 256
 	}
257 257
 
258
-	for nm, id := range c.getIpamDrivers() {
259
-		err := id.driver.DiscoverNew(discoverapi.DatastoreConfig, *dsConfig)
258
+	c.drvRegistry.WalkIPAMs(func(name string, driver ipamapi.Ipam, cap *ipamapi.Capability) bool {
259
+		err := driver.DiscoverNew(discoverapi.DatastoreConfig, *dsConfig)
260 260
 		if err != nil {
261
-			log.Errorf("Failed to set datastore in driver %s: %v", nm, err)
261
+			log.Errorf("Failed to set datastore in driver %s: %v", name, err)
262 262
 		}
263
-	}
263
+		return false
264
+	})
264 265
 
265
-	for nm, id := range c.getNetDrivers() {
266
-		err := id.driver.DiscoverNew(discoverapi.DatastoreConfig, *dsConfig)
266
+	c.drvRegistry.WalkDrivers(func(name string, driver driverapi.Driver, capability driverapi.Capability) bool {
267
+		err := driver.DiscoverNew(discoverapi.DatastoreConfig, *dsConfig)
267 268
 		if err != nil {
268
-			log.Errorf("Failed to set datastore in driver %s: %v", nm, err)
269
+			log.Errorf("Failed to set datastore in driver %s: %v", name, err)
270
+		}
271
+		return false
272
+	})
273
+
274
+	if c.discovery == nil && c.cfg.Cluster.Watcher != nil {
275
+		if err := c.initDiscovery(c.cfg.Cluster.Watcher); err != nil {
276
+			log.Errorf("Failed to Initialize Discovery after configuration update: %v", err)
269 277
 		}
270 278
 	}
271 279
 
... ...
@@ -333,34 +411,30 @@ func (c *controller) hostLeaveCallback(nodes []net.IP) {
333 333
 }
334 334
 
335 335
 func (c *controller) processNodeDiscovery(nodes []net.IP, add bool) {
336
-	c.Lock()
337
-	drivers := []*driverData{}
338
-	for _, d := range c.drivers {
339
-		drivers = append(drivers, d)
340
-	}
341
-	c.Unlock()
342
-
343
-	for _, d := range drivers {
344
-		c.pushNodeDiscovery(d, nodes, add)
345
-	}
336
+	c.drvRegistry.WalkDrivers(func(name string, driver driverapi.Driver, capability driverapi.Capability) bool {
337
+		c.pushNodeDiscovery(driver, capability, nodes, add)
338
+		return false
339
+	})
346 340
 }
347 341
 
348
-func (c *controller) pushNodeDiscovery(d *driverData, nodes []net.IP, add bool) {
342
+func (c *controller) pushNodeDiscovery(d driverapi.Driver, cap driverapi.Capability, nodes []net.IP, add bool) {
349 343
 	var self net.IP
350 344
 	if c.cfg != nil {
351 345
 		addr := strings.Split(c.cfg.Cluster.Address, ":")
352 346
 		self = net.ParseIP(addr[0])
353 347
 	}
354
-	if d == nil || d.capability.DataScope != datastore.GlobalScope || nodes == nil {
348
+
349
+	if d == nil || cap.DataScope != datastore.GlobalScope || nodes == nil {
355 350
 		return
356 351
 	}
352
+
357 353
 	for _, node := range nodes {
358 354
 		nodeData := discoverapi.NodeDiscoveryData{Address: node.String(), Self: node.Equal(self)}
359 355
 		var err error
360 356
 		if add {
361
-			err = d.driver.DiscoverNew(discoverapi.NodeDiscovery, nodeData)
357
+			err = d.DiscoverNew(discoverapi.NodeDiscovery, nodeData)
362 358
 		} else {
363
-			err = d.driver.DiscoverDelete(discoverapi.NodeDiscovery, nodeData)
359
+			err = d.DiscoverDelete(discoverapi.NodeDiscovery, nodeData)
364 360
 		}
365 361
 		if err != nil {
366 362
 			log.Debugf("discovery notification error : %v", err)
... ...
@@ -378,73 +452,36 @@ func (c *controller) Config() config.Config {
378 378
 }
379 379
 
380 380
 func (c *controller) RegisterDriver(networkType string, driver driverapi.Driver, capability driverapi.Capability) error {
381
-	if !config.IsValidName(networkType) {
382
-		return ErrInvalidName(networkType)
383
-	}
384
-
385 381
 	c.Lock()
386
-	if _, ok := c.drivers[networkType]; ok {
387
-		c.Unlock()
388
-		return driverapi.ErrActiveRegistration(networkType)
389
-	}
390
-	dData := &driverData{driver, capability}
391
-	c.drivers[networkType] = dData
392 382
 	hd := c.discovery
393 383
 	c.Unlock()
394 384
 
395 385
 	if hd != nil {
396
-		c.pushNodeDiscovery(dData, hd.Fetch(), true)
386
+		c.pushNodeDiscovery(driver, capability, hd.Fetch(), true)
397 387
 	}
398 388
 
389
+	c.agentDriverNotify(driver)
399 390
 	return nil
400 391
 }
401 392
 
402
-func (c *controller) registerIpamDriver(name string, driver ipamapi.Ipam, caps *ipamapi.Capability) error {
403
-	if !config.IsValidName(name) {
404
-		return ErrInvalidName(name)
405
-	}
406
-
407
-	c.Lock()
408
-	_, ok := c.ipamDrivers[name]
409
-	c.Unlock()
410
-	if ok {
411
-		return types.ForbiddenErrorf("ipam driver %q already registered", name)
412
-	}
413
-	locAS, glbAS, err := driver.GetDefaultAddressSpaces()
414
-	if err != nil {
415
-		return types.InternalErrorf("ipam driver %q failed to return default address spaces: %v", name, err)
416
-	}
417
-	c.Lock()
418
-	c.ipamDrivers[name] = &ipamData{driver: driver, defaultLocalAddressSpace: locAS, defaultGlobalAddressSpace: glbAS, capability: caps}
419
-	c.Unlock()
420
-
421
-	log.Debugf("Registering ipam driver: %q", name)
422
-
423
-	return nil
424
-}
425
-
426
-func (c *controller) RegisterIpamDriver(name string, driver ipamapi.Ipam) error {
427
-	return c.registerIpamDriver(name, driver, &ipamapi.Capability{})
428
-}
429
-
430
-func (c *controller) RegisterIpamDriverWithCapabilities(name string, driver ipamapi.Ipam, caps *ipamapi.Capability) error {
431
-	return c.registerIpamDriver(name, driver, caps)
432
-}
433
-
434 393
 // NewNetwork creates a new network of the specified network type. The options
435 394
 // are network specific and modeled in a generic way.
436
-func (c *controller) NewNetwork(networkType, name string, options ...NetworkOption) (Network, error) {
395
+func (c *controller) NewNetwork(networkType, name string, id string, options ...NetworkOption) (Network, error) {
437 396
 	if !config.IsValidName(name) {
438 397
 		return nil, ErrInvalidName(name)
439 398
 	}
440 399
 
400
+	if id == "" {
401
+		id = stringid.GenerateRandomID()
402
+	}
403
+
441 404
 	// Construct the network object
442 405
 	network := &network{
443 406
 		name:        name,
444 407
 		networkType: networkType,
445 408
 		generic:     map[string]interface{}{netlabel.GenericData: make(map[string]string)},
446 409
 		ipamType:    ipamapi.DefaultIPAM,
447
-		id:          stringid.GenerateRandomID(),
410
+		id:          id,
448 411
 		ctrlr:       c,
449 412
 		persist:     true,
450 413
 		drvOnce:     &sync.Once{},
... ...
@@ -468,7 +505,8 @@ func (c *controller) NewNetwork(networkType, name string, options ...NetworkOpti
468 468
 		}
469 469
 	}()
470 470
 
471
-	if err = c.addNetwork(network); err != nil {
471
+	err = c.addNetwork(network)
472
+	if err != nil {
472 473
 		return nil, err
473 474
 	}
474 475
 	defer func() {
... ...
@@ -499,6 +537,12 @@ func (c *controller) NewNetwork(networkType, name string, options ...NetworkOpti
499 499
 		return nil, err
500 500
 	}
501 501
 
502
+	if err = network.joinCluster(); err != nil {
503
+		log.Errorf("Failed to join network %s into agent cluster: %v", name, err)
504
+	}
505
+
506
+	network.addDriverWatches()
507
+
502 508
 	return network, nil
503 509
 }
504 510
 
... ...
@@ -509,7 +553,7 @@ func (c *controller) addNetwork(n *network) error {
509 509
 	}
510 510
 
511 511
 	// Create the network
512
-	if err := d.CreateNetwork(n.id, n.generic, n.getIPData(4), n.getIPData(6)); err != nil {
512
+	if err := d.CreateNetwork(n.id, n.generic, n, n.getIPData(4), n.getIPData(6)); err != nil {
513 513
 		return err
514 514
 	}
515 515
 
... ...
@@ -745,78 +789,47 @@ func SandboxKeyWalker(out *Sandbox, key string) SandboxWalker {
745 745
 	}
746 746
 }
747 747
 
748
-func (c *controller) loadDriver(networkType string) (*driverData, error) {
748
+func (c *controller) loadDriver(networkType string) error {
749 749
 	// Plugins pkg performs lazy loading of plugins that acts as remote drivers.
750 750
 	// As per the design, this Get call will result in remote driver discovery if there is a corresponding plugin available.
751 751
 	_, err := plugins.Get(networkType, driverapi.NetworkPluginEndpointType)
752 752
 	if err != nil {
753 753
 		if err == plugins.ErrNotFound {
754
-			return nil, types.NotFoundErrorf(err.Error())
754
+			return types.NotFoundErrorf(err.Error())
755 755
 		}
756
-		return nil, err
757
-	}
758
-	c.Lock()
759
-	defer c.Unlock()
760
-	dd, ok := c.drivers[networkType]
761
-	if !ok {
762
-		return nil, ErrInvalidNetworkDriver(networkType)
756
+		return err
763 757
 	}
764
-	return dd, nil
758
+
759
+	return nil
765 760
 }
766 761
 
767
-func (c *controller) loadIpamDriver(name string) (*ipamData, error) {
762
+func (c *controller) loadIPAMDriver(name string) error {
768 763
 	if _, err := plugins.Get(name, ipamapi.PluginEndpointType); err != nil {
769 764
 		if err == plugins.ErrNotFound {
770
-			return nil, types.NotFoundErrorf(err.Error())
765
+			return types.NotFoundErrorf(err.Error())
771 766
 		}
772
-		return nil, err
773
-	}
774
-	c.Lock()
775
-	id, ok := c.ipamDrivers[name]
776
-	c.Unlock()
777
-	if !ok {
778
-		return nil, types.BadRequestErrorf("invalid ipam driver: %q", name)
767
+		return err
779 768
 	}
780
-	return id, nil
781
-}
782 769
 
783
-func (c *controller) getIPAM(name string) (id *ipamData, err error) {
784
-	var ok bool
785
-	c.Lock()
786
-	id, ok = c.ipamDrivers[name]
787
-	c.Unlock()
788
-	if !ok {
789
-		id, err = c.loadIpamDriver(name)
790
-	}
791
-	return id, err
770
+	return nil
792 771
 }
793 772
 
794
-func (c *controller) getIpamDriver(name string) (ipamapi.Ipam, error) {
795
-	id, err := c.getIPAM(name)
796
-	if err != nil {
797
-		return nil, err
798
-	}
799
-	return id.driver, nil
800
-}
773
+func (c *controller) getIPAMDriver(name string) (ipamapi.Ipam, *ipamapi.Capability, error) {
774
+	id, cap := c.drvRegistry.IPAM(name)
775
+	if id == nil {
776
+		// Might be a plugin name. Try loading it
777
+		if err := c.loadIPAMDriver(name); err != nil {
778
+			return nil, nil, err
779
+		}
801 780
 
802
-func (c *controller) getIpamDrivers() ipamTable {
803
-	c.Lock()
804
-	defer c.Unlock()
805
-	table := ipamTable{}
806
-	for i, d := range c.ipamDrivers {
807
-		table[i] = d
781
+		// Now that we resolved the plugin, try again looking up the registry
782
+		id, cap = c.drvRegistry.IPAM(name)
783
+		if id == nil {
784
+			return nil, nil, types.BadRequestErrorf("invalid ipam driver: %q", name)
785
+		}
808 786
 	}
809
-	return table
810
-}
811 787
 
812
-func (c *controller) getNetDrivers() driverTable {
813
-	c.Lock()
814
-	defer c.Unlock()
815
-	table := driverTable{}
816
-	for i, d := range c.drivers {
817
-		table[i] = d
818
-	}
819
-	return table
788
+	return id, cap, nil
820 789
 }
821 790
 
822 791
 func (c *controller) Stop() {
... ...
@@ -5,7 +5,6 @@ import (
5 5
 	"sync"
6 6
 
7 7
 	"github.com/docker/libkv/store"
8
-	"github.com/docker/libkv/store/boltdb"
9 8
 )
10 9
 
11 10
 type kvMap map[string]KVObject
... ...
@@ -42,9 +41,7 @@ func (c *cache) kmap(kvObject KVObject) (kvMap, error) {
42 42
 
43 43
 	kvList, err := c.ds.store.List(keyPrefix)
44 44
 	if err != nil {
45
-		// In case of BoltDB it may return ErrBoltBucketNotFound when no writes
46
-		// have ever happened on the db bucket. So check for both err codes
47
-		if err == store.ErrKeyNotFound || err == boltdb.ErrBoltBucketNotFound {
45
+		if err == store.ErrKeyNotFound {
48 46
 			// If the store doesn't have anything then there is nothing to
49 47
 			// populate in the cache. Just bail out.
50 48
 			goto out
... ...
@@ -9,10 +9,6 @@ import (
9 9
 
10 10
 	"github.com/docker/libkv"
11 11
 	"github.com/docker/libkv/store"
12
-	"github.com/docker/libkv/store/boltdb"
13
-	"github.com/docker/libkv/store/consul"
14
-	"github.com/docker/libkv/store/etcd"
15
-	"github.com/docker/libkv/store/zookeeper"
16 12
 	"github.com/docker/libnetwork/discoverapi"
17 13
 	"github.com/docker/libnetwork/types"
18 14
 )
... ...
@@ -148,13 +144,6 @@ func makeDefaultScopes() map[string]*ScopeCfg {
148 148
 var defaultRootChain = []string{"docker", "network", "v1.0"}
149 149
 var rootChain = defaultRootChain
150 150
 
151
-func init() {
152
-	consul.Register()
153
-	zookeeper.Register()
154
-	etcd.Register()
155
-	boltdb.Register()
156
-}
157
-
158 151
 // DefaultScopes returns a map of default scopes and it's config for clients to use.
159 152
 func DefaultScopes(dataDir string) map[string]*ScopeCfg {
160 153
 	if dataDir != "" {
... ...
@@ -411,6 +400,9 @@ func (ds *datastore) PutObjectAtomic(kvObject KVObject) error {
411 411
 
412 412
 	_, pair, err = ds.store.AtomicPut(Key(kvObject.Key()...), kvObjValue, previous, nil)
413 413
 	if err != nil {
414
+		if err == store.ErrKeyExists {
415
+			return ErrKeyModified
416
+		}
414 417
 		return err
415 418
 	}
416 419
 
... ...
@@ -571,6 +563,9 @@ func (ds *datastore) DeleteObjectAtomic(kvObject KVObject) error {
571 571
 	}
572 572
 
573 573
 	if _, err := ds.store.AtomicDelete(Key(kvObject.Key()...), previous); err != nil {
574
+		if err == store.ErrKeyExists {
575
+			return ErrKeyModified
576
+		}
574 577
 		return err
575 578
 	}
576 579
 
... ...
@@ -14,7 +14,7 @@ func (c *controller) createGWNetwork() (Network, error) {
14 14
 		bridge.EnableIPMasquerade: strconv.FormatBool(true),
15 15
 	}
16 16
 
17
-	n, err := c.NewNetwork("bridge", libnGWNetwork,
17
+	n, err := c.NewNetwork("bridge", libnGWNetwork, "",
18 18
 		NetworkOptionDriverOpts(netOption),
19 19
 		NetworkOptionEnableIPv6(false),
20 20
 	)
21 21
new file mode 100644
... ...
@@ -0,0 +1,7 @@
0
+package libnetwork
1
+
2
+import "github.com/docker/libnetwork/types"
3
+
4
+func (c *controller) createGWNetwork() (Network, error) {
5
+	return nil, types.NotImplementedErrorf("default gateway functionality is not implemented in solaris")
6
+}
... ...
@@ -13,10 +13,25 @@ const NetworkPluginEndpointType = "NetworkDriver"
13 13
 type Driver interface {
14 14
 	discoverapi.Discover
15 15
 
16
-	// CreateNetwork invokes the driver method to create a network passing
17
-	// the network id and network specific config. The config mechanism will
18
-	// eventually be replaced with labels which are yet to be introduced.
19
-	CreateNetwork(nid string, options map[string]interface{}, ipV4Data, ipV6Data []IPAMData) error
16
+	// NetworkAllocate invokes the driver method to allocate network
17
+	// specific resources passing network id and network specific config.
18
+	// It returns a key,value pair of network specific driver allocations
19
+	// to the caller.
20
+	NetworkAllocate(nid string, options map[string]string, ipV4Data, ipV6Data []IPAMData) (map[string]string, error)
21
+
22
+	// NetworkFree invokes the driver method to free network specific resources
23
+	// associated with a given network id.
24
+	NetworkFree(nid string) error
25
+
26
+	// CreateNetwork invokes the driver method to create a network
27
+	// passing the network id and network specific config. The
28
+	// config mechanism will eventually be replaced with labels
29
+	// which are yet to be introduced. The driver can return a
30
+	// list of table names for which it is interested in receiving
31
+	// notification when a CRUD operation is performed on any
32
+	// entry in that table. This will be ignored for local scope
33
+	// drivers.
34
+	CreateNetwork(nid string, options map[string]interface{}, nInfo NetworkInfo, ipV4Data, ipV6Data []IPAMData) error
20 35
 
21 36
 	// DeleteNetwork invokes the driver method to delete network passing
22 37
 	// the network id.
... ...
@@ -50,10 +65,24 @@ type Driver interface {
50 50
 	// programming that was done so far
51 51
 	RevokeExternalConnectivity(nid, eid string) error
52 52
 
53
+	// EventNotify notifies the driver when a CRUD operation has
54
+	// happened on a table of its interest as soon as this node
55
+	// receives such an event in the gossip layer. This method is
56
+	// only invoked for the global scope driver.
57
+	EventNotify(event EventType, nid string, tableName string, key string, value []byte)
58
+
53 59
 	// Type returns the the type of this driver, the network type this driver manages
54 60
 	Type() string
55 61
 }
56 62
 
63
+// NetworkInfo provides a go interface for drivers to provide network
64
+// specific information to libnetwork.
65
+type NetworkInfo interface {
66
+	// TableEventRegister registers driver interest in a given
67
+	// table name.
68
+	TableEventRegister(tableName string) error
69
+}
70
+
57 71
 // InterfaceInfo provides a go interface for drivers to retrive
58 72
 // network information to interface resources.
59 73
 type InterfaceInfo interface {
... ...
@@ -102,6 +131,10 @@ type JoinInfo interface {
102 102
 
103 103
 	// DisableGatewayService tells libnetwork not to provide Default GW for the container
104 104
 	DisableGatewayService()
105
+
106
+	// AddTableEntry adds a table entry to the gossip layer
107
+	// passing the table name, key and an opaque value.
108
+	AddTableEntry(tableName string, key string, value []byte) error
105 109
 }
106 110
 
107 111
 // DriverCallback provides a Callback interface for Drivers into LibNetwork
... ...
@@ -124,3 +157,15 @@ type IPAMData struct {
124 124
 	Gateway      *net.IPNet
125 125
 	AuxAddresses map[string]*net.IPNet
126 126
 }
127
+
128
+// EventType defines a type for the CRUD event
129
+type EventType uint8
130
+
131
+const (
132
+	// Create event is generated when a table entry is created,
133
+	Create EventType = 1 + iota
134
+	// Update event is generated when a table entry is updated.
135
+	Update
136
+	// Delete event is generated when a table entry is deleted.
137
+	Delete
138
+)
127 139
deleted file mode 100644
... ...
@@ -1,84 +0,0 @@
1
-package libnetwork
2
-
3
-import (
4
-	"strings"
5
-
6
-	"github.com/docker/libnetwork/discoverapi"
7
-	"github.com/docker/libnetwork/driverapi"
8
-	"github.com/docker/libnetwork/ipamapi"
9
-	"github.com/docker/libnetwork/netlabel"
10
-
11
-	builtinIpam "github.com/docker/libnetwork/ipams/builtin"
12
-	nullIpam "github.com/docker/libnetwork/ipams/null"
13
-	remoteIpam "github.com/docker/libnetwork/ipams/remote"
14
-)
15
-
16
-type initializer struct {
17
-	fn    func(driverapi.DriverCallback, map[string]interface{}) error
18
-	ntype string
19
-}
20
-
21
-func initDrivers(c *controller) error {
22
-	for _, i := range getInitializers() {
23
-		if err := i.fn(c, makeDriverConfig(c, i.ntype)); err != nil {
24
-			return err
25
-		}
26
-	}
27
-
28
-	return nil
29
-}
30
-
31
-func makeDriverConfig(c *controller, ntype string) map[string]interface{} {
32
-	if c.cfg == nil {
33
-		return nil
34
-	}
35
-
36
-	config := make(map[string]interface{})
37
-
38
-	for _, label := range c.cfg.Daemon.Labels {
39
-		if !strings.HasPrefix(netlabel.Key(label), netlabel.DriverPrefix+"."+ntype) {
40
-			continue
41
-		}
42
-
43
-		config[netlabel.Key(label)] = netlabel.Value(label)
44
-	}
45
-
46
-	drvCfg, ok := c.cfg.Daemon.DriverCfg[ntype]
47
-	if ok {
48
-		for k, v := range drvCfg.(map[string]interface{}) {
49
-			config[k] = v
50
-		}
51
-	}
52
-
53
-	// We don't send datastore configs to external plugins
54
-	if ntype == "remote" {
55
-		return config
56
-	}
57
-
58
-	for k, v := range c.cfg.Scopes {
59
-		if !v.IsValid() {
60
-			continue
61
-		}
62
-		config[netlabel.MakeKVClient(k)] = discoverapi.DatastoreConfigData{
63
-			Scope:    k,
64
-			Provider: v.Client.Provider,
65
-			Address:  v.Client.Address,
66
-			Config:   v.Client.Config,
67
-		}
68
-	}
69
-
70
-	return config
71
-}
72
-
73
-func initIpams(ic ipamapi.Callback, lDs, gDs interface{}) error {
74
-	for _, fn := range [](func(ipamapi.Callback, interface{}, interface{}) error){
75
-		builtinIpam.Init,
76
-		remoteIpam.Init,
77
-		nullIpam.Init,
78
-	} {
79
-		if err := fn(ic, lDs, gDs); err != nil {
80
-			return err
81
-		}
82
-	}
83
-	return nil
84
-}
... ...
@@ -535,8 +535,19 @@ func (d *driver) getNetworks() []*bridgeNetwork {
535 535
 	return ls
536 536
 }
537 537
 
538
+func (d *driver) NetworkAllocate(id string, option map[string]string, ipV4Data, ipV6Data []driverapi.IPAMData) (map[string]string, error) {
539
+	return nil, types.NotImplementedErrorf("not implemented")
540
+}
541
+
542
+func (d *driver) NetworkFree(id string) error {
543
+	return types.NotImplementedErrorf("not implemented")
544
+}
545
+
546
+func (d *driver) EventNotify(etype driverapi.EventType, nid, tableName, key string, value []byte) {
547
+}
548
+
538 549
 // Create a new network using bridge plugin
539
-func (d *driver) CreateNetwork(id string, option map[string]interface{}, ipV4Data, ipV6Data []driverapi.IPAMData) error {
550
+func (d *driver) CreateNetwork(id string, option map[string]interface{}, nInfo driverapi.NetworkInfo, ipV4Data, ipV6Data []driverapi.IPAMData) error {
540 551
 	if len(ipV4Data) == 0 || ipV4Data[0].Pool.String() == "0.0.0.0/0" {
541 552
 		return types.BadRequestErrorf("ipv4 pool is empty")
542 553
 	}
... ...
@@ -6,7 +6,6 @@ import (
6 6
 	"net"
7 7
 
8 8
 	"github.com/Sirupsen/logrus"
9
-	"github.com/docker/libkv/store/boltdb"
10 9
 	"github.com/docker/libnetwork/datastore"
11 10
 	"github.com/docker/libnetwork/discoverapi"
12 11
 	"github.com/docker/libnetwork/netlabel"
... ...
@@ -35,7 +34,7 @@ func (d *driver) initStore(option map[string]interface{}) error {
35 35
 
36 36
 func (d *driver) populateNetworks() error {
37 37
 	kvol, err := d.store.List(datastore.Key(bridgePrefix), &networkConfiguration{})
38
-	if err != nil && err != datastore.ErrKeyNotFound && err != boltdb.ErrBoltBucketNotFound {
38
+	if err != nil && err != datastore.ErrKeyNotFound {
39 39
 		return fmt.Errorf("failed to get bridge network configurations from store: %v", err)
40 40
 	}
41 41
 
... ...
@@ -24,7 +24,18 @@ func Init(dc driverapi.DriverCallback, config map[string]interface{}) error {
24 24
 	return dc.RegisterDriver(networkType, &driver{}, c)
25 25
 }
26 26
 
27
-func (d *driver) CreateNetwork(id string, option map[string]interface{}, ipV4Data, ipV6Data []driverapi.IPAMData) error {
27
+func (d *driver) NetworkAllocate(id string, option map[string]string, ipV4Data, ipV6Data []driverapi.IPAMData) (map[string]string, error) {
28
+	return nil, types.NotImplementedErrorf("not implemented")
29
+}
30
+
31
+func (d *driver) NetworkFree(id string) error {
32
+	return types.NotImplementedErrorf("not implemented")
33
+}
34
+
35
+func (d *driver) EventNotify(etype driverapi.EventType, nid, tableName, key string, value []byte) {
36
+}
37
+
38
+func (d *driver) CreateNetwork(id string, option map[string]interface{}, nInfo driverapi.NetworkInfo, ipV4Data, ipV6Data []driverapi.IPAMData) error {
28 39
 	d.Lock()
29 40
 	defer d.Unlock()
30 41
 
... ...
@@ -8,6 +8,7 @@ import (
8 8
 	"github.com/docker/libnetwork/discoverapi"
9 9
 	"github.com/docker/libnetwork/driverapi"
10 10
 	"github.com/docker/libnetwork/osl"
11
+	"github.com/docker/libnetwork/types"
11 12
 )
12 13
 
13 14
 const (
... ...
@@ -64,6 +65,14 @@ func Init(dc driverapi.DriverCallback, config map[string]interface{}) error {
64 64
 	return dc.RegisterDriver(ipvlanType, d, c)
65 65
 }
66 66
 
67
+func (d *driver) NetworkAllocate(id string, option map[string]string, ipV4Data, ipV6Data []driverapi.IPAMData) (map[string]string, error) {
68
+	return nil, types.NotImplementedErrorf("not implemented")
69
+}
70
+
71
+func (d *driver) NetworkFree(id string) error {
72
+	return types.NotImplementedErrorf("not implemented")
73
+}
74
+
67 75
 func (d *driver) EndpointOperInfo(nid, eid string) (map[string]interface{}, error) {
68 76
 	return make(map[string]interface{}, 0), nil
69 77
 }
... ...
@@ -89,3 +98,6 @@ func (d *driver) DiscoverNew(dType discoverapi.DiscoveryType, data interface{})
89 89
 func (d *driver) DiscoverDelete(dType discoverapi.DiscoveryType, data interface{}) error {
90 90
 	return nil
91 91
 }
92
+
93
+func (d *driver) EventNotify(etype driverapi.EventType, nid, tableName, key string, value []byte) {
94
+}
... ...
@@ -14,7 +14,7 @@ import (
14 14
 )
15 15
 
16 16
 // CreateNetwork the network for the specified driver type
17
-func (d *driver) CreateNetwork(nid string, option map[string]interface{}, ipV4Data, ipV6Data []driverapi.IPAMData) error {
17
+func (d *driver) CreateNetwork(nid string, option map[string]interface{}, nInfo driverapi.NetworkInfo, ipV4Data, ipV6Data []driverapi.IPAMData) error {
18 18
 	defer osl.InitOSContext()()
19 19
 	kv, err := kernel.GetKernelVersion()
20 20
 	if err != nil {
... ...
@@ -5,7 +5,6 @@ import (
5 5
 	"fmt"
6 6
 
7 7
 	"github.com/Sirupsen/logrus"
8
-	"github.com/docker/libkv/store/boltdb"
9 8
 	"github.com/docker/libnetwork/datastore"
10 9
 	"github.com/docker/libnetwork/discoverapi"
11 10
 	"github.com/docker/libnetwork/netlabel"
... ...
@@ -60,7 +59,7 @@ func (d *driver) initStore(option map[string]interface{}) error {
60 60
 // populateNetworks is invoked at driver init to recreate persistently stored networks
61 61
 func (d *driver) populateNetworks() error {
62 62
 	kvol, err := d.store.List(datastore.Key(ipvlanPrefix), &configuration{})
63
-	if err != nil && err != datastore.ErrKeyNotFound && err != boltdb.ErrBoltBucketNotFound {
63
+	if err != nil && err != datastore.ErrKeyNotFound {
64 64
 		return fmt.Errorf("failed to get ipvlan network configurations from store: %v", err)
65 65
 	}
66 66
 	// If empty it simply means no ipvlan networks have been created yet
... ...
@@ -8,6 +8,7 @@ import (
8 8
 	"github.com/docker/libnetwork/discoverapi"
9 9
 	"github.com/docker/libnetwork/driverapi"
10 10
 	"github.com/docker/libnetwork/osl"
11
+	"github.com/docker/libnetwork/types"
11 12
 )
12 13
 
13 14
 const (
... ...
@@ -66,6 +67,14 @@ func Init(dc driverapi.DriverCallback, config map[string]interface{}) error {
66 66
 	return dc.RegisterDriver(macvlanType, d, c)
67 67
 }
68 68
 
69
+func (d *driver) NetworkAllocate(id string, option map[string]string, ipV4Data, ipV6Data []driverapi.IPAMData) (map[string]string, error) {
70
+	return nil, types.NotImplementedErrorf("not implemented")
71
+}
72
+
73
+func (d *driver) NetworkFree(id string) error {
74
+	return types.NotImplementedErrorf("not implemented")
75
+}
76
+
69 77
 func (d *driver) EndpointOperInfo(nid, eid string) (map[string]interface{}, error) {
70 78
 	return make(map[string]interface{}, 0), nil
71 79
 }
... ...
@@ -91,3 +100,6 @@ func (d *driver) DiscoverNew(dType discoverapi.DiscoveryType, data interface{})
91 91
 func (d *driver) DiscoverDelete(dType discoverapi.DiscoveryType, data interface{}) error {
92 92
 	return nil
93 93
 }
94
+
95
+func (d *driver) EventNotify(etype driverapi.EventType, nid, tableName, key string, value []byte) {
96
+}
... ...
@@ -14,7 +14,7 @@ import (
14 14
 )
15 15
 
16 16
 // CreateNetwork the network for the specified driver type
17
-func (d *driver) CreateNetwork(nid string, option map[string]interface{}, ipV4Data, ipV6Data []driverapi.IPAMData) error {
17
+func (d *driver) CreateNetwork(nid string, option map[string]interface{}, nInfo driverapi.NetworkInfo, ipV4Data, ipV6Data []driverapi.IPAMData) error {
18 18
 	defer osl.InitOSContext()()
19 19
 	kv, err := kernel.GetKernelVersion()
20 20
 	if err != nil {
... ...
@@ -5,7 +5,6 @@ import (
5 5
 	"fmt"
6 6
 
7 7
 	"github.com/Sirupsen/logrus"
8
-	"github.com/docker/libkv/store/boltdb"
9 8
 	"github.com/docker/libnetwork/datastore"
10 9
 	"github.com/docker/libnetwork/discoverapi"
11 10
 	"github.com/docker/libnetwork/netlabel"
... ...
@@ -60,7 +59,7 @@ func (d *driver) initStore(option map[string]interface{}) error {
60 60
 // populateNetworks is invoked at driver init to recreate persistently stored networks
61 61
 func (d *driver) populateNetworks() error {
62 62
 	kvol, err := d.store.List(datastore.Key(macvlanPrefix), &configuration{})
63
-	if err != nil && err != datastore.ErrKeyNotFound && err != boltdb.ErrBoltBucketNotFound {
63
+	if err != nil && err != datastore.ErrKeyNotFound {
64 64
 		return fmt.Errorf("failed to get macvlan network configurations from store: %v", err)
65 65
 	}
66 66
 	// If empty it simply means no macvlan networks have been created yet
... ...
@@ -24,7 +24,18 @@ func Init(dc driverapi.DriverCallback, config map[string]interface{}) error {
24 24
 	return dc.RegisterDriver(networkType, &driver{}, c)
25 25
 }
26 26
 
27
-func (d *driver) CreateNetwork(id string, option map[string]interface{}, ipV4Data, ipV6Data []driverapi.IPAMData) error {
27
+func (d *driver) NetworkAllocate(id string, option map[string]string, ipV4Data, ipV6Data []driverapi.IPAMData) (map[string]string, error) {
28
+	return nil, types.NotImplementedErrorf("not implemented")
29
+}
30
+
31
+func (d *driver) NetworkFree(id string) error {
32
+	return types.NotImplementedErrorf("not implemented")
33
+}
34
+
35
+func (d *driver) EventNotify(etype driverapi.EventType, nid, tableName, key string, value []byte) {
36
+}
37
+
38
+func (d *driver) CreateNetwork(id string, option map[string]interface{}, nInfo driverapi.NetworkInfo, ipV4Data, ipV6Data []driverapi.IPAMData) error {
28 39
 	d.Lock()
29 40
 	defer d.Unlock()
30 41
 
... ...
@@ -21,14 +21,18 @@ func chainExists(cname string) bool {
21 21
 }
22 22
 
23 23
 func setupGlobalChain() {
24
-	if err := iptables.RawCombinedOutput("-N", globalChain); err != nil {
25
-		logrus.Errorf("could not create global overlay chain: %v", err)
26
-		return
24
+	// Because of an ungraceful shutdown, chain could already be present
25
+	if !chainExists(globalChain) {
26
+		if err := iptables.RawCombinedOutput("-N", globalChain); err != nil {
27
+			logrus.Errorf("could not create global overlay chain: %v", err)
28
+			return
29
+		}
27 30
 	}
28 31
 
29
-	if err := iptables.RawCombinedOutput("-A", globalChain, "-j", "RETURN"); err != nil {
30
-		logrus.Errorf("could not install default return chain in the overlay global chain: %v", err)
31
-		return
32
+	if !iptables.Exists(iptables.Filter, globalChain, "-j", "RETURN") {
33
+		if err := iptables.RawCombinedOutput("-A", globalChain, "-j", "RETURN"); err != nil {
34
+			logrus.Errorf("could not install default return chain in the overlay global chain: %v", err)
35
+		}
32 36
 	}
33 37
 }
34 38
 
... ...
@@ -3,6 +3,7 @@ package overlay
3 3
 import (
4 4
 	"fmt"
5 5
 	"net"
6
+	"strings"
6 7
 
7 8
 	log "github.com/Sirupsen/logrus"
8 9
 	"github.com/docker/libnetwork/driverapi"
... ...
@@ -104,11 +105,55 @@ func (d *driver) Join(nid, eid string, sboxKey string, jinfo driverapi.JoinInfo,
104 104
 
105 105
 	d.peerDbAdd(nid, eid, ep.addr.IP, ep.addr.Mask, ep.mac,
106 106
 		net.ParseIP(d.bindAddress), true)
107
+
108
+	if err := jinfo.AddTableEntry(ovPeerTable, eid, []byte(fmt.Sprintf("%s,%s,%s", ep.addr, ep.mac, d.bindAddress))); err != nil {
109
+		log.Errorf("overlay: Failed adding table entry to joininfo: %v", err)
110
+	}
111
+
107 112
 	d.pushLocalEndpointEvent("join", nid, eid)
108 113
 
109 114
 	return nil
110 115
 }
111 116
 
117
+func (d *driver) EventNotify(etype driverapi.EventType, nid, tableName, key string, value []byte) {
118
+	if tableName != ovPeerTable {
119
+		log.Errorf("Unexpected table notification for table %s received", tableName)
120
+		return
121
+	}
122
+
123
+	eid := key
124
+	values := strings.Split(string(value), ",")
125
+	if len(values) < 3 {
126
+		log.Errorf("Invalid value %s received through event notify", string(value))
127
+		return
128
+	}
129
+
130
+	addr, err := types.ParseCIDR(values[0])
131
+	if err != nil {
132
+		log.Errorf("Invalid peer IP %s received in event notify", values[0])
133
+		return
134
+	}
135
+
136
+	mac, err := net.ParseMAC(values[1])
137
+	if err != nil {
138
+		log.Errorf("Invalid mac %s received in event notify", values[1])
139
+		return
140
+	}
141
+
142
+	vtep := net.ParseIP(values[2])
143
+	if vtep == nil {
144
+		log.Errorf("Invalid VTEP %s received in event notify", values[2])
145
+		return
146
+	}
147
+
148
+	if etype == driverapi.Delete {
149
+		d.peerDelete(nid, eid, addr.IP, addr.Mask, mac, vtep, true)
150
+		return
151
+	}
152
+
153
+	d.peerAdd(nid, eid, addr.IP, addr.Mask, mac, vtep, true)
154
+}
155
+
112 156
 // Leave method is invoked when a Sandbox detaches from an endpoint.
113 157
 func (d *driver) Leave(nid, eid string) error {
114 158
 	if err := validateID(nid, eid); err != nil {
... ...
@@ -6,6 +6,7 @@ import (
6 6
 	"net"
7 7
 	"os"
8 8
 	"path/filepath"
9
+	"strconv"
9 10
 	"strings"
10 11
 	"sync"
11 12
 	"syscall"
... ...
@@ -13,6 +14,7 @@ import (
13 13
 	"github.com/Sirupsen/logrus"
14 14
 	"github.com/docker/libnetwork/datastore"
15 15
 	"github.com/docker/libnetwork/driverapi"
16
+	"github.com/docker/libnetwork/netlabel"
16 17
 	"github.com/docker/libnetwork/netutils"
17 18
 	"github.com/docker/libnetwork/osl"
18 19
 	"github.com/docker/libnetwork/resolvconf"
... ...
@@ -59,7 +61,15 @@ type network struct {
59 59
 	sync.Mutex
60 60
 }
61 61
 
62
-func (d *driver) CreateNetwork(id string, option map[string]interface{}, ipV4Data, ipV6Data []driverapi.IPAMData) error {
62
+func (d *driver) NetworkAllocate(id string, option map[string]string, ipV4Data, ipV6Data []driverapi.IPAMData) (map[string]string, error) {
63
+	return nil, types.NotImplementedErrorf("not implemented")
64
+}
65
+
66
+func (d *driver) NetworkFree(id string) error {
67
+	return types.NotImplementedErrorf("not implemented")
68
+}
69
+
70
+func (d *driver) CreateNetwork(id string, option map[string]interface{}, nInfo driverapi.NetworkInfo, ipV4Data, ipV6Data []driverapi.IPAMData) error {
63 71
 	if id == "" {
64 72
 		return fmt.Errorf("invalid network id")
65 73
 	}
... ...
@@ -81,12 +91,40 @@ func (d *driver) CreateNetwork(id string, option map[string]interface{}, ipV4Dat
81 81
 		subnets:   []*subnet{},
82 82
 	}
83 83
 
84
-	for _, ipd := range ipV4Data {
84
+	vnis := make([]uint32, 0, len(ipV4Data))
85
+	if gval, ok := option[netlabel.GenericData]; ok {
86
+		optMap := gval.(map[string]string)
87
+		if val, ok := optMap[netlabel.OverlayVxlanIDList]; ok {
88
+			logrus.Debugf("overlay: Received vxlan IDs: %s", val)
89
+			vniStrings := strings.Split(val, ",")
90
+			for _, vniStr := range vniStrings {
91
+				vni, err := strconv.Atoi(vniStr)
92
+				if err != nil {
93
+					return fmt.Errorf("invalid vxlan id value %q passed", vniStr)
94
+				}
95
+
96
+				vnis = append(vnis, uint32(vni))
97
+			}
98
+		}
99
+	}
100
+
101
+	// If we are getting vnis from libnetwork, either we get for
102
+	// all subnets or none.
103
+	if len(vnis) != 0 && len(vnis) < len(ipV4Data) {
104
+		return fmt.Errorf("insufficient vnis(%d) passed to overlay", len(vnis))
105
+	}
106
+
107
+	for i, ipd := range ipV4Data {
85 108
 		s := &subnet{
86 109
 			subnetIP: ipd.Pool,
87 110
 			gwIP:     ipd.Gateway,
88 111
 			once:     &sync.Once{},
89 112
 		}
113
+
114
+		if len(vnis) != 0 {
115
+			s.vni = vnis[i]
116
+		}
117
+
90 118
 		n.subnets = append(n.subnets, s)
91 119
 	}
92 120
 
... ...
@@ -94,8 +132,13 @@ func (d *driver) CreateNetwork(id string, option map[string]interface{}, ipV4Dat
94 94
 		return fmt.Errorf("failed to update data store for network %v: %v", n.id, err)
95 95
 	}
96 96
 
97
-	d.addNetwork(n)
97
+	if nInfo != nil {
98
+		if err := nInfo.TableEventRegister(ovPeerTable); err != nil {
99
+			return err
100
+		}
101
+	}
98 102
 
103
+	d.addNetwork(n)
99 104
 	return nil
100 105
 }
101 106
 
... ...
@@ -244,11 +287,21 @@ func setHostMode() {
244 244
 }
245 245
 
246 246
 func (n *network) generateVxlanName(s *subnet) string {
247
-	return "vx-" + fmt.Sprintf("%06x", n.vxlanID(s)) + "-" + n.id[:5]
247
+	id := n.id
248
+	if len(n.id) > 5 {
249
+		id = n.id[:5]
250
+	}
251
+
252
+	return "vx-" + fmt.Sprintf("%06x", n.vxlanID(s)) + "-" + id
248 253
 }
249 254
 
250 255
 func (n *network) generateBridgeName(s *subnet) string {
251
-	return "ov-" + fmt.Sprintf("%06x", n.vxlanID(s)) + "-" + n.id[:5]
256
+	id := n.id
257
+	if len(n.id) > 5 {
258
+		id = n.id[:5]
259
+	}
260
+
261
+	return "ov-" + fmt.Sprintf("%06x", n.vxlanID(s)) + "-" + id
252 262
 }
253 263
 
254 264
 func isOverlap(nw *net.IPNet) bool {
... ...
@@ -395,9 +448,10 @@ func (n *network) watchMiss(nlSock *nl.NetlinkSocket) {
395 395
 				continue
396 396
 			}
397 397
 
398
-			if neigh.IP.To16() != nil {
398
+			if neigh.IP.To4() == nil {
399 399
 				continue
400 400
 			}
401
+			logrus.Debugf("miss notification for dest IP, %v", neigh.IP.String())
401 402
 
402 403
 			if neigh.State&(netlink.NUD_STALE|netlink.NUD_INCOMPLETE) == 0 {
403 404
 				continue
... ...
@@ -575,32 +629,38 @@ func (n *network) DataScope() string {
575 575
 }
576 576
 
577 577
 func (n *network) writeToStore() error {
578
+	if n.driver.store == nil {
579
+		return nil
580
+	}
581
+
578 582
 	return n.driver.store.PutObjectAtomic(n)
579 583
 }
580 584
 
581 585
 func (n *network) releaseVxlanID() error {
582
-	if n.driver.store == nil {
583
-		return fmt.Errorf("no datastore configured. cannot release vxlan id")
584
-	}
585
-
586 586
 	if len(n.subnets) == 0 {
587 587
 		return nil
588 588
 	}
589 589
 
590
-	if err := n.driver.store.DeleteObjectAtomic(n); err != nil {
591
-		if err == datastore.ErrKeyModified || err == datastore.ErrKeyNotFound {
592
-			// In both the above cases we can safely assume that the key has been removed by some other
593
-			// instance and so simply get out of here
594
-			return nil
595
-		}
590
+	if n.driver.store != nil {
591
+		if err := n.driver.store.DeleteObjectAtomic(n); err != nil {
592
+			if err == datastore.ErrKeyModified || err == datastore.ErrKeyNotFound {
593
+				// In both the above cases we can safely assume that the key has been removed by some other
594
+				// instance and so simply get out of here
595
+				return nil
596
+			}
596 597
 
597
-		return fmt.Errorf("failed to delete network to vxlan id map: %v", err)
598
+			return fmt.Errorf("failed to delete network to vxlan id map: %v", err)
599
+		}
598 600
 	}
599 601
 
600 602
 	for _, s := range n.subnets {
601
-		n.driver.vxlanIdm.Release(uint64(n.vxlanID(s)))
603
+		if n.driver.vxlanIdm != nil {
604
+			n.driver.vxlanIdm.Release(uint64(n.vxlanID(s)))
605
+		}
606
+
602 607
 		n.setVxlanID(s, 0)
603 608
 	}
609
+
604 610
 	return nil
605 611
 }
606 612
 
... ...
@@ -611,7 +671,7 @@ func (n *network) obtainVxlanID(s *subnet) error {
611 611
 	}
612 612
 
613 613
 	if n.driver.store == nil {
614
-		return fmt.Errorf("no datastore configured. cannot obtain vxlan id")
614
+		return fmt.Errorf("no valid vxlan id and no datastore configured, cannot obtain vxlan id")
615 615
 	}
616 616
 
617 617
 	for {
... ...
@@ -88,7 +88,7 @@ func Fini(drv driverapi.Driver) {
88 88
 
89 89
 func (d *driver) configure() error {
90 90
 	if d.store == nil {
91
-		return types.NoServiceErrorf("datastore is not available")
91
+		return nil
92 92
 	}
93 93
 
94 94
 	if d.vxlanIdm == nil {
... ...
@@ -147,10 +147,14 @@ func (d *driver) nodeJoin(node string, self bool) {
147 147
 		d.Lock()
148 148
 		d.bindAddress = node
149 149
 		d.Unlock()
150
-		err := d.serfInit()
151
-		if err != nil {
152
-			logrus.Errorf("initializing serf instance failed: %v", err)
153
-			return
150
+
151
+		// If there is no cluster store there is no need to start serf.
152
+		if d.store != nil {
153
+			err := d.serfInit()
154
+			if err != nil {
155
+				logrus.Errorf("initializing serf instance failed: %v", err)
156
+				return
157
+			}
154 158
 		}
155 159
 	}
156 160
 
... ...
@@ -7,6 +7,8 @@ import (
7 7
 	"syscall"
8 8
 )
9 9
 
10
+const ovPeerTable = "overlay_peer_table"
11
+
10 12
 type peerKey struct {
11 13
 	peerIP  net.IP
12 14
 	peerMac net.HardwareAddr
... ...
@@ -83,7 +83,18 @@ func (d *driver) call(methodName string, arg interface{}, retVal maybeError) err
83 83
 	return nil
84 84
 }
85 85
 
86
-func (d *driver) CreateNetwork(id string, options map[string]interface{}, ipV4Data, ipV6Data []driverapi.IPAMData) error {
86
+func (d *driver) NetworkAllocate(id string, option map[string]string, ipV4Data, ipV6Data []driverapi.IPAMData) (map[string]string, error) {
87
+	return nil, types.NotImplementedErrorf("not implemented")
88
+}
89
+
90
+func (d *driver) NetworkFree(id string) error {
91
+	return types.NotImplementedErrorf("not implemented")
92
+}
93
+
94
+func (d *driver) EventNotify(etype driverapi.EventType, nid, tableName, key string, value []byte) {
95
+}
96
+
97
+func (d *driver) CreateNetwork(id string, options map[string]interface{}, nInfo driverapi.NetworkInfo, ipV4Data, ipV6Data []driverapi.IPAMData) error {
87 98
 	create := &api.CreateNetworkRequest{
88 99
 		NetworkID: id,
89 100
 		Options:   options,
... ...
@@ -149,8 +149,11 @@ func (c *networkConfiguration) processIPAM(id string, ipamV4Data, ipamV6Data []d
149 149
 	return nil
150 150
 }
151 151
 
152
+func (d *driver) EventNotify(etype driverapi.EventType, nid, tableName, key string, value []byte) {
153
+}
154
+
152 155
 // Create a new network
153
-func (d *driver) CreateNetwork(id string, option map[string]interface{}, ipV4Data, ipV6Data []driverapi.IPAMData) error {
156
+func (d *driver) CreateNetwork(id string, option map[string]interface{}, nInfo driverapi.NetworkInfo, ipV4Data, ipV6Data []driverapi.IPAMData) error {
154 157
 	if _, err := d.getNetwork(id); err == nil {
155 158
 		return types.ForbiddenErrorf("network %s exists", id)
156 159
 	}
... ...
@@ -414,6 +417,10 @@ func (d *driver) CreateEndpoint(nid, eid string, ifInfo driverapi.InterfaceInfo,
414 414
 	}
415 415
 	endpointStruct.Policies = append(endpointStruct.Policies, qosPolicies...)
416 416
 
417
+	if ifInfo.Address() != nil {
418
+		endpointStruct.IPAddress = ifInfo.Address().IP
419
+	}
420
+
417 421
 	configurationb, err := json.Marshal(endpointStruct)
418 422
 	if err != nil {
419 423
 		return err
... ...
@@ -449,8 +456,13 @@ func (d *driver) CreateEndpoint(nid, eid string, ifInfo driverapi.InterfaceInfo,
449 449
 	n.endpoints[eid] = endpoint
450 450
 	n.Unlock()
451 451
 
452
-	ifInfo.SetIPAddress(endpoint.addr)
453
-	ifInfo.SetMacAddress(endpoint.macAddress)
452
+	if ifInfo.Address() == nil {
453
+		ifInfo.SetIPAddress(endpoint.addr)
454
+	}
455
+
456
+	if macAddress == nil {
457
+		ifInfo.SetMacAddress(endpoint.macAddress)
458
+	}
454 459
 
455 460
 	return nil
456 461
 }
... ...
@@ -560,6 +572,14 @@ func (d *driver) RevokeExternalConnectivity(nid, eid string) error {
560 560
 	return nil
561 561
 }
562 562
 
563
+func (d *driver) NetworkAllocate(id string, option map[string]string, ipV4Data, ipV6Data []driverapi.IPAMData) (map[string]string, error) {
564
+	return nil, types.NotImplementedErrorf("not implemented")
565
+}
566
+
567
+func (d *driver) NetworkFree(id string) error {
568
+	return types.NotImplementedErrorf("not implemented")
569
+}
570
+
563 571
 func (d *driver) Type() string {
564 572
 	return d.name
565 573
 }
566 574
new file mode 100644
... ...
@@ -0,0 +1,5 @@
0
+package libnetwork
1
+
2
+func getInitializers() []initializer {
3
+	return []initializer{}
4
+}
0 5
new file mode 100644
... ...
@@ -0,0 +1,241 @@
0
+package drvregistry
1
+
2
+import (
3
+	"fmt"
4
+	"strings"
5
+	"sync"
6
+
7
+	"github.com/docker/libnetwork/driverapi"
8
+	"github.com/docker/libnetwork/ipamapi"
9
+	"github.com/docker/libnetwork/types"
10
+
11
+	builtinIpam "github.com/docker/libnetwork/ipams/builtin"
12
+	nullIpam "github.com/docker/libnetwork/ipams/null"
13
+	remoteIpam "github.com/docker/libnetwork/ipams/remote"
14
+)
15
+
16
+type driverData struct {
17
+	driver     driverapi.Driver
18
+	capability driverapi.Capability
19
+}
20
+
21
+type ipamData struct {
22
+	driver     ipamapi.Ipam
23
+	capability *ipamapi.Capability
24
+	// default address spaces are provided by ipam driver at registration time
25
+	defaultLocalAddressSpace, defaultGlobalAddressSpace string
26
+}
27
+
28
+type driverTable map[string]*driverData
29
+type ipamTable map[string]*ipamData
30
+
31
+// DrvRegistry holds the registry of all network drivers and IPAM drivers that it knows about.
32
+type DrvRegistry struct {
33
+	sync.Mutex
34
+	drivers     driverTable
35
+	ipamDrivers ipamTable
36
+	dfn         DriverNotifyFunc
37
+	ifn         IPAMNotifyFunc
38
+}
39
+
40
+// Functors definition
41
+
42
+// InitFunc defines the driver initialization function signature.
43
+type InitFunc func(driverapi.DriverCallback, map[string]interface{}) error
44
+
45
+// IPAMWalkFunc defines the IPAM driver table walker function signature.
46
+type IPAMWalkFunc func(name string, driver ipamapi.Ipam, cap *ipamapi.Capability) bool
47
+
48
+// DriverWalkFunc defines the network driver table walker function signature.
49
+type DriverWalkFunc func(name string, driver driverapi.Driver, capability driverapi.Capability) bool
50
+
51
+// IPAMNotifyFunc defines the notify function signature when a new IPAM driver gets registered.
52
+type IPAMNotifyFunc func(name string, driver ipamapi.Ipam, cap *ipamapi.Capability) error
53
+
54
+// DriverNotifyFunc defines the notify function signature when a new network driver gets registered.
55
+type DriverNotifyFunc func(name string, driver driverapi.Driver, capability driverapi.Capability) error
56
+
57
+// New retruns a new driver registry handle.
58
+func New(lDs, gDs interface{}, dfn DriverNotifyFunc, ifn IPAMNotifyFunc) (*DrvRegistry, error) {
59
+	r := &DrvRegistry{
60
+		drivers:     make(driverTable),
61
+		ipamDrivers: make(ipamTable),
62
+		dfn:         dfn,
63
+		ifn:         ifn,
64
+	}
65
+
66
+	if err := r.initIPAMs(lDs, gDs); err != nil {
67
+		return nil, err
68
+	}
69
+
70
+	return r, nil
71
+}
72
+
73
+// AddDriver adds a network driver to the registry.
74
+func (r *DrvRegistry) AddDriver(ntype string, fn InitFunc, config map[string]interface{}) error {
75
+	return fn(r, config)
76
+}
77
+
78
+// WalkIPAMs walks the IPAM drivers registered in the registry and invokes the passed walk function and each one of them.
79
+func (r *DrvRegistry) WalkIPAMs(ifn IPAMWalkFunc) {
80
+	type ipamVal struct {
81
+		name string
82
+		data *ipamData
83
+	}
84
+
85
+	r.Lock()
86
+	ivl := make([]ipamVal, 0, len(r.ipamDrivers))
87
+	for k, v := range r.ipamDrivers {
88
+		ivl = append(ivl, ipamVal{name: k, data: v})
89
+	}
90
+	r.Unlock()
91
+
92
+	for _, iv := range ivl {
93
+		if ifn(iv.name, iv.data.driver, iv.data.capability) {
94
+			break
95
+		}
96
+	}
97
+}
98
+
99
+// WalkDrivers walks the network drivers registered in the registry and invokes the passed walk function and each one of them.
100
+func (r *DrvRegistry) WalkDrivers(dfn DriverWalkFunc) {
101
+	type driverVal struct {
102
+		name string
103
+		data *driverData
104
+	}
105
+
106
+	r.Lock()
107
+	dvl := make([]driverVal, 0, len(r.drivers))
108
+	for k, v := range r.drivers {
109
+		dvl = append(dvl, driverVal{name: k, data: v})
110
+	}
111
+	r.Unlock()
112
+
113
+	for _, dv := range dvl {
114
+		if dfn(dv.name, dv.data.driver, dv.data.capability) {
115
+			break
116
+		}
117
+	}
118
+}
119
+
120
+// Driver returns the actual network driver instance and its capability  which registered with the passed name.
121
+func (r *DrvRegistry) Driver(name string) (driverapi.Driver, *driverapi.Capability) {
122
+	r.Lock()
123
+	defer r.Unlock()
124
+
125
+	d, ok := r.drivers[name]
126
+	if !ok {
127
+		return nil, nil
128
+	}
129
+
130
+	return d.driver, &d.capability
131
+}
132
+
133
+// IPAM returns the actual IPAM driver instance and its capability which registered with the passed name.
134
+func (r *DrvRegistry) IPAM(name string) (ipamapi.Ipam, *ipamapi.Capability) {
135
+	r.Lock()
136
+	defer r.Unlock()
137
+
138
+	i, ok := r.ipamDrivers[name]
139
+	if !ok {
140
+		return nil, nil
141
+	}
142
+
143
+	return i.driver, i.capability
144
+}
145
+
146
+// IPAMDefaultAddressSpaces returns the default address space strings for the passed IPAM driver name.
147
+func (r *DrvRegistry) IPAMDefaultAddressSpaces(name string) (string, string, error) {
148
+	r.Lock()
149
+	defer r.Unlock()
150
+
151
+	i, ok := r.ipamDrivers[name]
152
+	if !ok {
153
+		return "", "", fmt.Errorf("ipam %s not found", name)
154
+	}
155
+
156
+	return i.defaultLocalAddressSpace, i.defaultGlobalAddressSpace, nil
157
+}
158
+
159
+func (r *DrvRegistry) initIPAMs(lDs, gDs interface{}) error {
160
+	for _, fn := range [](func(ipamapi.Callback, interface{}, interface{}) error){
161
+		builtinIpam.Init,
162
+		remoteIpam.Init,
163
+		nullIpam.Init,
164
+	} {
165
+		if err := fn(r, lDs, gDs); err != nil {
166
+			return err
167
+		}
168
+	}
169
+
170
+	return nil
171
+}
172
+
173
+// RegisterDriver registers the network driver when it gets discovered.
174
+func (r *DrvRegistry) RegisterDriver(ntype string, driver driverapi.Driver, capability driverapi.Capability) error {
175
+	if strings.TrimSpace(ntype) == "" {
176
+		return fmt.Errorf("network type string cannot be empty")
177
+	}
178
+
179
+	r.Lock()
180
+	_, ok := r.drivers[ntype]
181
+	r.Unlock()
182
+
183
+	if ok {
184
+		return driverapi.ErrActiveRegistration(ntype)
185
+	}
186
+
187
+	if r.dfn != nil {
188
+		if err := r.dfn(ntype, driver, capability); err != nil {
189
+			return err
190
+		}
191
+	}
192
+
193
+	dData := &driverData{driver, capability}
194
+
195
+	r.Lock()
196
+	r.drivers[ntype] = dData
197
+	r.Unlock()
198
+
199
+	return nil
200
+}
201
+
202
+func (r *DrvRegistry) registerIpamDriver(name string, driver ipamapi.Ipam, caps *ipamapi.Capability) error {
203
+	if strings.TrimSpace(name) == "" {
204
+		return fmt.Errorf("ipam driver name string cannot be empty")
205
+	}
206
+
207
+	r.Lock()
208
+	_, ok := r.ipamDrivers[name]
209
+	r.Unlock()
210
+	if ok {
211
+		return types.ForbiddenErrorf("ipam driver %q already registered", name)
212
+	}
213
+
214
+	locAS, glbAS, err := driver.GetDefaultAddressSpaces()
215
+	if err != nil {
216
+		return types.InternalErrorf("ipam driver %q failed to return default address spaces: %v", name, err)
217
+	}
218
+
219
+	if r.ifn != nil {
220
+		if err := r.ifn(name, driver, caps); err != nil {
221
+			return err
222
+		}
223
+	}
224
+
225
+	r.Lock()
226
+	r.ipamDrivers[name] = &ipamData{driver: driver, defaultLocalAddressSpace: locAS, defaultGlobalAddressSpace: glbAS, capability: caps}
227
+	r.Unlock()
228
+
229
+	return nil
230
+}
231
+
232
+// RegisterIpamDriver registers the IPAM driver discovered with default capabilities.
233
+func (r *DrvRegistry) RegisterIpamDriver(name string, driver ipamapi.Ipam) error {
234
+	return r.registerIpamDriver(name, driver, &ipamapi.Capability{})
235
+}
236
+
237
+// RegisterIpamDriverWithCapabilities registers the IPAM driver discovered with specified capabilities.
238
+func (r *DrvRegistry) RegisterIpamDriverWithCapabilities(name string, driver ipamapi.Ipam, caps *ipamapi.Capability) error {
239
+	return r.registerIpamDriver(name, driver, caps)
240
+}
... ...
@@ -67,6 +67,8 @@ type endpoint struct {
67 67
 	ipamOptions       map[string]string
68 68
 	aliases           map[string]string
69 69
 	myAliases         []string
70
+	svcID             string
71
+	svcName           string
70 72
 	dbIndex           uint64
71 73
 	dbExists          bool
72 74
 	sync.Mutex
... ...
@@ -89,6 +91,9 @@ func (ep *endpoint) MarshalJSON() ([]byte, error) {
89 89
 	epMap["anonymous"] = ep.anonymous
90 90
 	epMap["disableResolution"] = ep.disableResolution
91 91
 	epMap["myAliases"] = ep.myAliases
92
+	epMap["svcName"] = ep.svcName
93
+	epMap["svcID"] = ep.svcID
94
+
92 95
 	return json.Marshal(epMap)
93 96
 }
94 97
 
... ...
@@ -172,6 +177,15 @@ func (ep *endpoint) UnmarshalJSON(b []byte) (err error) {
172 172
 	if l, ok := epMap["locator"]; ok {
173 173
 		ep.locator = l.(string)
174 174
 	}
175
+
176
+	if sn, ok := epMap["svcName"]; ok {
177
+		ep.svcName = sn.(string)
178
+	}
179
+
180
+	if si, ok := epMap["svcID"]; ok {
181
+		ep.svcID = si.(string)
182
+	}
183
+
175 184
 	ma, _ := json.Marshal(epMap["myAliases"])
176 185
 	var myAliases []string
177 186
 	json.Unmarshal(ma, &myAliases)
... ...
@@ -196,6 +210,8 @@ func (ep *endpoint) CopyTo(o datastore.KVObject) error {
196 196
 	dstEp.dbExists = ep.dbExists
197 197
 	dstEp.anonymous = ep.anonymous
198 198
 	dstEp.disableResolution = ep.disableResolution
199
+	dstEp.svcName = ep.svcName
200
+	dstEp.svcID = ep.svcID
199 201
 
200 202
 	if ep.iface != nil {
201 203
 		dstEp.iface = &endpointInterface{}
... ...
@@ -413,7 +429,9 @@ func (ep *endpoint) sbJoin(sb *sandbox, options ...EndpointOption) error {
413 413
 	}()
414 414
 
415 415
 	// Watch for service records
416
-	n.getController().watchSvcRecord(ep)
416
+	if !n.getController().cfg.Daemon.IsAgent {
417
+		n.getController().watchSvcRecord(ep)
418
+	}
417 419
 
418 420
 	address := ""
419 421
 	if ip := ep.getFirstInterfaceAddress(); ip != nil {
... ...
@@ -446,6 +464,10 @@ func (ep *endpoint) sbJoin(sb *sandbox, options ...EndpointOption) error {
446 446
 		return err
447 447
 	}
448 448
 
449
+	if e := ep.addToCluster(); e != nil {
450
+		log.Errorf("Could not update state for endpoint %s into cluster: %v", ep.Name(), e)
451
+	}
452
+
449 453
 	if sb.needDefaultGW() && sb.getEndpointInGWNetwork() == nil {
450 454
 		return sb.setupDefaultGW()
451 455
 	}
... ...
@@ -632,6 +654,10 @@ func (ep *endpoint) sbLeave(sb *sandbox, force bool, options ...EndpointOption)
632 632
 		return err
633 633
 	}
634 634
 
635
+	if e := ep.deleteFromCluster(); e != nil {
636
+		log.Errorf("Could not delete state for endpoint %s from cluster: %v", ep.Name(), e)
637
+	}
638
+
635 639
 	sb.deleteHostsEntries(n.getSvcRecords(ep))
636 640
 	if !sb.inDelete && sb.needDefaultGW() && sb.getEndpointInGWNetwork() == nil {
637 641
 		return sb.setupDefaultGW()
... ...
@@ -730,7 +756,9 @@ func (ep *endpoint) Delete(force bool) error {
730 730
 	}()
731 731
 
732 732
 	// unwatch for service records
733
-	n.getController().unWatchSvcRecord(ep)
733
+	if !n.getController().cfg.Daemon.IsAgent {
734
+		n.getController().unWatchSvcRecord(ep)
735
+	}
734 736
 
735 737
 	if err = ep.deleteEndpoint(force); err != nil && !force {
736 738
 		return err
... ...
@@ -863,6 +891,14 @@ func CreateOptionAlias(name string, alias string) EndpointOption {
863 863
 	}
864 864
 }
865 865
 
866
+// CreateOptionService function returns an option setter for setting service binding configuration
867
+func CreateOptionService(name, id string) EndpointOption {
868
+	return func(ep *endpoint) {
869
+		ep.svcName = name
870
+		ep.svcID = id
871
+	}
872
+}
873
+
866 874
 //CreateOptionMyAlias function returns an option setter for setting endpoint's self alias
867 875
 func CreateOptionMyAlias(alias string) EndpointOption {
868 876
 	return func(ep *endpoint) {
... ...
@@ -981,7 +1017,7 @@ func (ep *endpoint) releaseAddress() {
981 981
 
982 982
 	log.Debugf("Releasing addresses for endpoint %s's interface on network %s", ep.Name(), n.Name())
983 983
 
984
-	ipam, err := n.getController().getIpamDriver(n.ipamType)
984
+	ipam, _, err := n.getController().getIPAMDriver(n.ipamType)
985 985
 	if err != nil {
986 986
 		log.Warnf("Failed to retrieve ipam driver to release interface address on delete of endpoint %s (%s): %v", ep.Name(), ep.ID(), err)
987 987
 		return
... ...
@@ -143,9 +143,16 @@ type endpointJoinInfo struct {
143 143
 	gw                    net.IP
144 144
 	gw6                   net.IP
145 145
 	StaticRoutes          []*types.StaticRoute
146
+	driverTableEntries    []*tableEntry
146 147
 	disableGatewayService bool
147 148
 }
148 149
 
150
+type tableEntry struct {
151
+	tableName string
152
+	key       string
153
+	value     []byte
154
+}
155
+
149 156
 func (ep *endpoint) Info() EndpointInfo {
150 157
 	n, err := ep.getNetworkFromStore()
151 158
 	if err != nil {
... ...
@@ -292,6 +299,19 @@ func (ep *endpoint) AddStaticRoute(destination *net.IPNet, routeType int, nextHo
292 292
 	return nil
293 293
 }
294 294
 
295
+func (ep *endpoint) AddTableEntry(tableName, key string, value []byte) error {
296
+	ep.Lock()
297
+	defer ep.Unlock()
298
+
299
+	ep.joinInfo.driverTableEntries = append(ep.joinInfo.driverTableEntries, &tableEntry{
300
+		tableName: tableName,
301
+		key:       key,
302
+		value:     value,
303
+	})
304
+
305
+	return nil
306
+}
307
+
295 308
 func (ep *endpoint) Sandbox() Sandbox {
296 309
 	cnt, ok := ep.getSandbox()
297 310
 	if !ok {
... ...
@@ -3,6 +3,7 @@ package ipam
3 3
 import (
4 4
 	"fmt"
5 5
 	"net"
6
+	"sort"
6 7
 	"sync"
7 8
 
8 9
 	log "github.com/Sirupsen/logrus"
... ...
@@ -58,9 +59,6 @@ func NewAllocator(lcDs, glDs datastore.DataStore) (*Allocator, error) {
58 58
 		{localAddressSpace, lcDs},
59 59
 		{globalAddressSpace, glDs},
60 60
 	} {
61
-		if aspc.ds == nil {
62
-			continue
63
-		}
64 61
 		a.initializeAddressSpace(aspc.as, aspc.ds)
65 62
 	}
66 63
 
... ...
@@ -143,15 +141,22 @@ func (a *Allocator) checkConsistency(as string) {
143 143
 }
144 144
 
145 145
 func (a *Allocator) initializeAddressSpace(as string, ds datastore.DataStore) error {
146
+	scope := ""
147
+	if ds != nil {
148
+		scope = ds.Scope()
149
+	}
150
+
146 151
 	a.Lock()
147
-	if _, ok := a.addrSpaces[as]; ok {
148
-		a.Unlock()
149
-		return types.ForbiddenErrorf("tried to add an axisting address space: %s", as)
152
+	if currAS, ok := a.addrSpaces[as]; ok {
153
+		if currAS.ds != nil {
154
+			a.Unlock()
155
+			return types.ForbiddenErrorf("a datastore is already configured for the address space %s", as)
156
+		}
150 157
 	}
151 158
 	a.addrSpaces[as] = &addrSpace{
152 159
 		subnets: map[SubnetKey]*PoolData{},
153 160
 		id:      dsConfigKey + "/" + as,
154
-		scope:   ds.Scope(),
161
+		scope:   scope,
155 162
 		ds:      ds,
156 163
 		alloc:   a,
157 164
 	}
... ...
@@ -313,10 +318,6 @@ func (a *Allocator) insertBitMask(key SubnetKey, pool *net.IPNet) error {
313 313
 	//log.Debugf("Inserting bitmask (%s, %s)", key.String(), pool.String())
314 314
 
315 315
 	store := a.getStore(key.AddressSpace)
316
-	if store == nil {
317
-		return types.InternalErrorf("could not find store for address space %s while inserting bit mask", key.AddressSpace)
318
-	}
319
-
320 316
 	ipVer := getAddressVersion(pool.IP)
321 317
 	ones, bits := pool.Mask.Size()
322 318
 	numAddresses := uint64(1 << uint(bits-ones))
... ...
@@ -401,13 +402,6 @@ func (a *Allocator) getPredefinedPool(as string, ipV6 bool) (*net.IPNet, error)
401 401
 		}
402 402
 
403 403
 		if !aSpace.contains(as, nw) {
404
-			if as == localAddressSpace {
405
-				// Check if nw overlap with system routes, name servers
406
-				if _, err := ipamutils.FindAvailableNetwork([]*net.IPNet{nw}); err == nil {
407
-					return nw, nil
408
-				}
409
-				continue
410
-			}
411 404
 			return nw, nil
412 405
 		}
413 406
 	}
... ...
@@ -563,13 +557,18 @@ func (a *Allocator) getAddress(nw *net.IPNet, bitmask *bitseq.Handle, prefAddres
563 563
 func (a *Allocator) DumpDatabase() string {
564 564
 	a.Lock()
565 565
 	aspaces := make(map[string]*addrSpace, len(a.addrSpaces))
566
+	orderedAS := make([]string, 0, len(a.addrSpaces))
566 567
 	for as, aSpace := range a.addrSpaces {
568
+		orderedAS = append(orderedAS, as)
567 569
 		aspaces[as] = aSpace
568 570
 	}
569 571
 	a.Unlock()
570 572
 
573
+	sort.Strings(orderedAS)
574
+
571 575
 	var s string
572
-	for as, aSpace := range aspaces {
576
+	for _, as := range orderedAS {
577
+		aSpace := aspaces[as]
573 578
 		s = fmt.Sprintf("\n\n%s Config", as)
574 579
 		aSpace.Lock()
575 580
 		for k, config := range aSpace.subnets {
... ...
@@ -82,8 +82,10 @@ func (a *Allocator) getStore(as string) datastore.DataStore {
82 82
 
83 83
 func (a *Allocator) getAddressSpaceFromStore(as string) (*addrSpace, error) {
84 84
 	store := a.getStore(as)
85
+
86
+	// IPAM may not have a valid store. In such cases it is just in-memory state.
85 87
 	if store == nil {
86
-		return nil, types.InternalErrorf("store for address space %s not found", as)
88
+		return nil, nil
87 89
 	}
88 90
 
89 91
 	pc := &addrSpace{id: dsConfigKey + "/" + as, ds: store, alloc: a}
... ...
@@ -100,8 +102,10 @@ func (a *Allocator) getAddressSpaceFromStore(as string) (*addrSpace, error) {
100 100
 
101 101
 func (a *Allocator) writeToStore(aSpace *addrSpace) error {
102 102
 	store := aSpace.store()
103
+
104
+	// IPAM may not have a valid store. In such cases it is just in-memory state.
103 105
 	if store == nil {
104
-		return types.InternalErrorf("invalid store while trying to write %s address space", aSpace.DataScope())
106
+		return nil
105 107
 	}
106 108
 
107 109
 	err := store.PutObjectAtomic(aSpace)
... ...
@@ -114,8 +118,10 @@ func (a *Allocator) writeToStore(aSpace *addrSpace) error {
114 114
 
115 115
 func (a *Allocator) deleteFromStore(aSpace *addrSpace) error {
116 116
 	store := aSpace.store()
117
+
118
+	// IPAM may not have a valid store. In such cases it is just in-memory state.
117 119
 	if store == nil {
118
-		return types.InternalErrorf("invalid store while trying to delete %s address space", aSpace.DataScope())
120
+		return nil
119 121
 	}
120 122
 
121 123
 	return store.DeleteObjectAtomic(aSpace)
... ...
@@ -1,4 +1,4 @@
1
-// +build linux freebsd
1
+// +build linux freebsd solaris darwin
2 2
 
3 3
 package builtin
4 4
 
5 5
deleted file mode 100644
... ...
@@ -1,22 +0,0 @@
1
-// Package ipamutils provides utililty functions for ipam management
2
-package ipamutils
3
-
4
-import (
5
-	"net"
6
-
7
-	"github.com/docker/libnetwork/types"
8
-)
9
-
10
-// ElectInterfaceAddresses looks for an interface on the OS with the specified name
11
-// and returns its IPv4 and IPv6 addresses in CIDR form. If the interface does not exist,
12
-// it chooses from a predifined list the first IPv4 address which does not conflict
13
-// with other interfaces on the system.
14
-func ElectInterfaceAddresses(name string) (*net.IPNet, []*net.IPNet, error) {
15
-	return nil, nil, types.NotImplementedErrorf("not supported on freebsd")
16
-}
17
-
18
-// FindAvailableNetwork returns a network from the passed list which does not
19
-// overlap with existing interfaces in the system
20
-func FindAvailableNetwork(list []*net.IPNet) (*net.IPNet, error) {
21
-	return nil, types.NotImplementedErrorf("not supported on freebsd")
22
-}
23 1
deleted file mode 100644
... ...
@@ -1,76 +0,0 @@
1
-// Package ipamutils provides utililty functions for ipam management
2
-package ipamutils
3
-
4
-import (
5
-	"fmt"
6
-	"net"
7
-
8
-	"github.com/docker/libnetwork/netutils"
9
-	"github.com/docker/libnetwork/osl"
10
-	"github.com/docker/libnetwork/resolvconf"
11
-	"github.com/vishvananda/netlink"
12
-)
13
-
14
-// ElectInterfaceAddresses looks for an interface on the OS with the specified name
15
-// and returns its IPv4 and IPv6 addresses in CIDR form. If the interface does not exist,
16
-// it chooses from a predifined list the first IPv4 address which does not conflict
17
-// with other interfaces on the system.
18
-func ElectInterfaceAddresses(name string) (*net.IPNet, []*net.IPNet, error) {
19
-	var (
20
-		v4Net  *net.IPNet
21
-		v6Nets []*net.IPNet
22
-		err    error
23
-	)
24
-
25
-	InitNetworks()
26
-
27
-	defer osl.InitOSContext()()
28
-
29
-	link, _ := netlink.LinkByName(name)
30
-	if link != nil {
31
-		v4addr, err := netlink.AddrList(link, netlink.FAMILY_V4)
32
-		if err != nil {
33
-			return nil, nil, err
34
-		}
35
-		v6addr, err := netlink.AddrList(link, netlink.FAMILY_V6)
36
-		if err != nil {
37
-			return nil, nil, err
38
-		}
39
-		if len(v4addr) > 0 {
40
-			v4Net = v4addr[0].IPNet
41
-		}
42
-		for _, nlAddr := range v6addr {
43
-			v6Nets = append(v6Nets, nlAddr.IPNet)
44
-		}
45
-	}
46
-
47
-	if link == nil || v4Net == nil {
48
-		// Choose from predifined broad networks
49
-		v4Net, err = FindAvailableNetwork(PredefinedBroadNetworks)
50
-		if err != nil {
51
-			return nil, nil, err
52
-		}
53
-	}
54
-
55
-	return v4Net, v6Nets, nil
56
-}
57
-
58
-// FindAvailableNetwork returns a network from the passed list which does not
59
-// overlap with existing interfaces in the system
60
-func FindAvailableNetwork(list []*net.IPNet) (*net.IPNet, error) {
61
-	// We don't check for an error here, because we don't really care if we
62
-	// can't read /etc/resolv.conf. So instead we skip the append if resolvConf
63
-	// is nil. It either doesn't exist, or we can't read it for some reason.
64
-	var nameservers []string
65
-	if rc, err := resolvconf.Get(); err == nil {
66
-		nameservers = resolvconf.GetNameserversAsCIDR(rc.Content)
67
-	}
68
-	for _, nw := range list {
69
-		if err := netutils.CheckNameserverOverlaps(nameservers, nw); err == nil {
70
-			if err := netutils.CheckRouteOverlaps(nw); err == nil {
71
-				return nw, nil
72
-			}
73
-		}
74
-	}
75
-	return nil, fmt.Errorf("no available network")
76
-}
77 1
deleted file mode 100644
... ...
@@ -1,22 +0,0 @@
1
-// Package ipamutils provides utililty functions for ipam management
2
-package ipamutils
3
-
4
-import (
5
-	"net"
6
-
7
-	"github.com/docker/libnetwork/types"
8
-)
9
-
10
-// ElectInterfaceAddresses looks for an interface on the OS with the specified name
11
-// and returns its IPv4 and IPv6 addresses in CIDR form. If the interface does not exist,
12
-// it chooses from a predifined list the first IPv4 address which does not conflict
13
-// with other interfaces on the system.
14
-func ElectInterfaceAddresses(name string) (*net.IPNet, []*net.IPNet, error) {
15
-	return nil, nil, types.NotImplementedErrorf("not supported on windows")
16
-}
17
-
18
-// FindAvailableNetwork returns a network from the passed list which does not
19
-// overlap with existing interfaces in the system
20
-func FindAvailableNetwork(list []*net.IPNet) (*net.IPNet, error) {
21
-	return nil, types.NotImplementedErrorf("not supported on windows")
22
-}
... ...
@@ -39,6 +39,9 @@ const (
39 39
 	// OverlayNeighborIP constant represents overlay driver neighbor IP
40 40
 	OverlayNeighborIP = DriverPrefix + ".overlay.neighbor_ip"
41 41
 
42
+	// OverlayVxlanIDList constant represents a list of VXLAN Ids as csv
43
+	OverlayVxlanIDList = DriverPrefix + ".overlay.vxlanid_list"
44
+
42 45
 	// Gateway represents the gateway for the network
43 46
 	Gateway = Prefix + ".gateway"
44 47
 
... ...
@@ -14,13 +14,6 @@ import (
14 14
 	"github.com/docker/libnetwork/types"
15 15
 )
16 16
 
17
-// constants for the IP address type
18
-const (
19
-	IP = iota // IPv4 and IPv6
20
-	IPv4
21
-	IPv6
22
-)
23
-
24 17
 var (
25 18
 	// ErrNetworkOverlapsWithNameservers preformatted error
26 19
 	ErrNetworkOverlapsWithNameservers = errors.New("requested network overlaps with nameserver")
27 20
new file mode 100644
... ...
@@ -0,0 +1,21 @@
0
+package netutils
1
+
2
+import (
3
+	"net"
4
+
5
+	"github.com/docker/libnetwork/types"
6
+)
7
+
8
+// ElectInterfaceAddresses looks for an interface on the OS with the specified name
9
+// and returns its IPv4 and IPv6 addresses in CIDR form. If the interface does not exist,
10
+// it chooses from a predifined list the first IPv4 address which does not conflict
11
+// with other interfaces on the system.
12
+func ElectInterfaceAddresses(name string) (*net.IPNet, []*net.IPNet, error) {
13
+	return nil, nil, types.NotImplementedErrorf("not supported on freebsd")
14
+}
15
+
16
+// FindAvailableNetwork returns a network from the passed list which does not
17
+// overlap with existing interfaces in the system
18
+func FindAvailableNetwork(list []*net.IPNet) (*net.IPNet, error) {
19
+	return nil, types.NotImplementedErrorf("not supported on freebsd")
20
+}
... ...
@@ -4,9 +4,13 @@
4 4
 package netutils
5 5
 
6 6
 import (
7
+	"fmt"
7 8
 	"net"
8 9
 	"strings"
9 10
 
11
+	"github.com/docker/libnetwork/ipamutils"
12
+	"github.com/docker/libnetwork/osl"
13
+	"github.com/docker/libnetwork/resolvconf"
10 14
 	"github.com/docker/libnetwork/types"
11 15
 	"github.com/vishvananda/netlink"
12 16
 )
... ...
@@ -48,3 +52,66 @@ func GenerateIfaceName(prefix string, len int) (string, error) {
48 48
 	}
49 49
 	return "", types.InternalErrorf("could not generate interface name")
50 50
 }
51
+
52
+// ElectInterfaceAddresses looks for an interface on the OS with the
53
+// specified name and returns its IPv4 and IPv6 addresses in CIDR
54
+// form. If the interface does not exist, it chooses from a predifined
55
+// list the first IPv4 address which does not conflict with other
56
+// interfaces on the system.
57
+func ElectInterfaceAddresses(name string) (*net.IPNet, []*net.IPNet, error) {
58
+	var (
59
+		v4Net  *net.IPNet
60
+		v6Nets []*net.IPNet
61
+		err    error
62
+	)
63
+
64
+	defer osl.InitOSContext()()
65
+
66
+	link, _ := netlink.LinkByName(name)
67
+	if link != nil {
68
+		v4addr, err := netlink.AddrList(link, netlink.FAMILY_V4)
69
+		if err != nil {
70
+			return nil, nil, err
71
+		}
72
+		v6addr, err := netlink.AddrList(link, netlink.FAMILY_V6)
73
+		if err != nil {
74
+			return nil, nil, err
75
+		}
76
+		if len(v4addr) > 0 {
77
+			v4Net = v4addr[0].IPNet
78
+		}
79
+		for _, nlAddr := range v6addr {
80
+			v6Nets = append(v6Nets, nlAddr.IPNet)
81
+		}
82
+	}
83
+
84
+	if link == nil || v4Net == nil {
85
+		// Choose from predifined broad networks
86
+		v4Net, err = FindAvailableNetwork(ipamutils.PredefinedBroadNetworks)
87
+		if err != nil {
88
+			return nil, nil, err
89
+		}
90
+	}
91
+
92
+	return v4Net, v6Nets, nil
93
+}
94
+
95
+// FindAvailableNetwork returns a network from the passed list which does not
96
+// overlap with existing interfaces in the system
97
+func FindAvailableNetwork(list []*net.IPNet) (*net.IPNet, error) {
98
+	// We don't check for an error here, because we don't really care if we
99
+	// can't read /etc/resolv.conf. So instead we skip the append if resolvConf
100
+	// is nil. It either doesn't exist, or we can't read it for some reason.
101
+	var nameservers []string
102
+	if rc, err := resolvconf.Get(); err == nil {
103
+		nameservers = resolvconf.GetNameserversAsCIDR(rc.Content)
104
+	}
105
+	for _, nw := range list {
106
+		if err := CheckNameserverOverlaps(nameservers, nw); err == nil {
107
+			if err := CheckRouteOverlaps(nw); err == nil {
108
+				return nw, nil
109
+			}
110
+		}
111
+	}
112
+	return nil, fmt.Errorf("no available network")
113
+}
51 114
new file mode 100644
... ...
@@ -0,0 +1,32 @@
0
+package netutils
1
+
2
+// Solaris: TODO
3
+
4
+import (
5
+	"net"
6
+
7
+	"github.com/docker/libnetwork/ipamutils"
8
+)
9
+
10
+// ElectInterfaceAddresses looks for an interface on the OS with the specified name
11
+// and returns its IPv4 and IPv6 addresses in CIDR form. If the interface does not exist,
12
+// it chooses from a predifined list the first IPv4 address which does not conflict
13
+// with other interfaces on the system.
14
+func ElectInterfaceAddresses(name string) (*net.IPNet, []*net.IPNet, error) {
15
+	var (
16
+		v4Net *net.IPNet
17
+		err   error
18
+	)
19
+
20
+	v4Net, err = FindAvailableNetwork(ipamutils.PredefinedBroadNetworks)
21
+	if err != nil {
22
+		return nil, nil, err
23
+	}
24
+	return v4Net, nil, nil
25
+}
26
+
27
+// FindAvailableNetwork returns a network from the passed list which does not
28
+// overlap with existing interfaces in the system
29
+func FindAvailableNetwork(list []*net.IPNet) (*net.IPNet, error) {
30
+	return list[0], nil
31
+}
0 32
new file mode 100644
... ...
@@ -0,0 +1,21 @@
0
+package netutils
1
+
2
+import (
3
+	"net"
4
+
5
+	"github.com/docker/libnetwork/types"
6
+)
7
+
8
+// ElectInterfaceAddresses looks for an interface on the OS with the specified name
9
+// and returns its IPv4 and IPv6 addresses in CIDR form. If the interface does not exist,
10
+// it chooses from a predifined list the first IPv4 address which does not conflict
11
+// with other interfaces on the system.
12
+func ElectInterfaceAddresses(name string) (*net.IPNet, []*net.IPNet, error) {
13
+	return nil, nil, types.NotImplementedErrorf("not supported on windows")
14
+}
15
+
16
+// FindAvailableNetwork returns a network from the passed list which does not
17
+// overlap with existing interfaces in the system
18
+func FindAvailableNetwork(list []*net.IPNet) (*net.IPNet, error) {
19
+	return nil, types.NotImplementedErrorf("not supported on windows")
20
+}
... ...
@@ -171,6 +171,7 @@ type network struct {
171 171
 	drvOnce      *sync.Once
172 172
 	internal     bool
173 173
 	inDelete     bool
174
+	driverTables []string
174 175
 	sync.Mutex
175 176
 }
176 177
 
... ...
@@ -620,49 +621,62 @@ func (n *network) processOptions(options ...NetworkOption) {
620 620
 	}
621 621
 }
622 622
 
623
-func (n *network) driverScope() string {
623
+func (n *network) resolveDriver(name string, load bool) (driverapi.Driver, *driverapi.Capability, error) {
624 624
 	c := n.getController()
625 625
 
626
-	c.Lock()
627 626
 	// Check if a driver for the specified network type is available
628
-	dd, ok := c.drivers[n.networkType]
629
-	c.Unlock()
627
+	d, cap := c.drvRegistry.Driver(name)
628
+	if d == nil {
629
+		if load {
630
+			var err error
631
+			err = c.loadDriver(name)
632
+			if err != nil {
633
+				return nil, nil, err
634
+			}
630 635
 
631
-	if !ok {
632
-		var err error
633
-		dd, err = c.loadDriver(n.networkType)
634
-		if err != nil {
635
-			// If driver could not be resolved simply return an empty string
636
-			return ""
636
+			d, cap = c.drvRegistry.Driver(name)
637
+			if d == nil {
638
+				return nil, nil, fmt.Errorf("could not resolve driver %s in registry", name)
639
+			}
640
+		} else {
641
+			// don't fail if driver loading is not required
642
+			return nil, nil, nil
637 643
 		}
638 644
 	}
639 645
 
640
-	return dd.capability.DataScope
646
+	return d, cap, nil
641 647
 }
642 648
 
643
-func (n *network) driver(load bool) (driverapi.Driver, error) {
644
-	c := n.getController()
649
+func (n *network) driverScope() string {
650
+	_, cap, err := n.resolveDriver(n.networkType, true)
651
+	if err != nil {
652
+		// If driver could not be resolved simply return an empty string
653
+		return ""
654
+	}
645 655
 
646
-	c.Lock()
647
-	// Check if a driver for the specified network type is available
648
-	dd, ok := c.drivers[n.networkType]
649
-	c.Unlock()
656
+	return cap.DataScope
657
+}
650 658
 
651
-	if !ok && load {
652
-		var err error
653
-		dd, err = c.loadDriver(n.networkType)
654
-		if err != nil {
655
-			return nil, err
656
-		}
657
-	} else if !ok {
658
-		// don't fail if driver loading is not required
659
-		return nil, nil
659
+func (n *network) driver(load bool) (driverapi.Driver, error) {
660
+	d, cap, err := n.resolveDriver(n.networkType, load)
661
+	if err != nil {
662
+		return nil, err
660 663
 	}
661 664
 
665
+	c := n.getController()
662 666
 	n.Lock()
663
-	n.scope = dd.capability.DataScope
667
+	// If load is not required, driver, cap and err may all be nil
668
+	if cap != nil {
669
+		n.scope = cap.DataScope
670
+	}
671
+	if c.cfg.Daemon.IsAgent {
672
+		// If we are running in agent mode then all networks
673
+		// in libnetwork are local scope regardless of the
674
+		// backing driver.
675
+		n.scope = datastore.LocalScope
676
+	}
664 677
 	n.Unlock()
665
-	return dd.driver, nil
678
+	return d, nil
666 679
 }
667 680
 
668 681
 func (n *network) Delete() error {
... ...
@@ -717,6 +731,12 @@ func (n *network) delete(force bool) error {
717 717
 		return fmt.Errorf("error deleting network from store: %v", err)
718 718
 	}
719 719
 
720
+	n.cancelDriverWatches()
721
+
722
+	if err = n.leaveCluster(); err != nil {
723
+		log.Errorf("Failed leaving network %s from the agent cluster: %v", n.Name(), err)
724
+	}
725
+
720 726
 	return nil
721 727
 }
722 728
 
... ...
@@ -786,12 +806,12 @@ func (n *network) CreateEndpoint(name string, options ...EndpointOption) (Endpoi
786 786
 		}
787 787
 	}
788 788
 
789
-	ipam, err := n.getController().getIPAM(n.ipamType)
789
+	ipam, cap, err := n.getController().getIPAMDriver(n.ipamType)
790 790
 	if err != nil {
791 791
 		return nil, err
792 792
 	}
793 793
 
794
-	if ipam.capability.RequiresMACAddress {
794
+	if cap.RequiresMACAddress {
795 795
 		if ep.iface.mac == nil {
796 796
 			ep.iface.mac = netutils.GenerateRandomMAC()
797 797
 		}
... ...
@@ -801,7 +821,7 @@ func (n *network) CreateEndpoint(name string, options ...EndpointOption) (Endpoi
801 801
 		ep.ipamOptions[netlabel.MacAddress] = ep.iface.mac.String()
802 802
 	}
803 803
 
804
-	if err = ep.assignAddress(ipam.driver, true, n.enableIPv6 && !n.postIPv6); err != nil {
804
+	if err = ep.assignAddress(ipam, true, n.enableIPv6 && !n.postIPv6); err != nil {
805 805
 		return nil, err
806 806
 	}
807 807
 	defer func() {
... ...
@@ -821,7 +841,7 @@ func (n *network) CreateEndpoint(name string, options ...EndpointOption) (Endpoi
821 821
 		}
822 822
 	}()
823 823
 
824
-	if err = ep.assignAddress(ipam.driver, false, n.enableIPv6 && n.postIPv6); err != nil {
824
+	if err = ep.assignAddress(ipam, false, n.enableIPv6 && n.postIPv6); err != nil {
825 825
 		return nil, err
826 826
 	}
827 827
 
... ...
@@ -985,14 +1005,14 @@ func (n *network) addSvcRecords(name string, epIP net.IP, epIPv6 net.IP, ipMapUp
985 985
 	c := n.getController()
986 986
 	c.Lock()
987 987
 	defer c.Unlock()
988
-	sr, ok := c.svcDb[n.ID()]
988
+	sr, ok := c.svcRecords[n.ID()]
989 989
 	if !ok {
990 990
 		sr = svcInfo{
991 991
 			svcMap:     make(map[string][]net.IP),
992 992
 			svcIPv6Map: make(map[string][]net.IP),
993 993
 			ipMap:      make(map[string]string),
994 994
 		}
995
-		c.svcDb[n.ID()] = sr
995
+		c.svcRecords[n.ID()] = sr
996 996
 	}
997 997
 
998 998
 	if ipMapUpdate {
... ...
@@ -1012,7 +1032,7 @@ func (n *network) deleteSvcRecords(name string, epIP net.IP, epIPv6 net.IP, ipMa
1012 1012
 	c := n.getController()
1013 1013
 	c.Lock()
1014 1014
 	defer c.Unlock()
1015
-	sr, ok := c.svcDb[n.ID()]
1015
+	sr, ok := c.svcRecords[n.ID()]
1016 1016
 	if !ok {
1017 1017
 		return
1018 1018
 	}
... ...
@@ -1037,7 +1057,7 @@ func (n *network) getSvcRecords(ep *endpoint) []etchosts.Record {
1037 1037
 	defer n.Unlock()
1038 1038
 
1039 1039
 	var recs []etchosts.Record
1040
-	sr, _ := n.ctrlr.svcDb[n.id]
1040
+	sr, _ := n.ctrlr.svcRecords[n.id]
1041 1041
 
1042 1042
 	for h, ip := range sr.svcMap {
1043 1043
 		if ep != nil && strings.Split(h, ".")[0] == ep.Name() {
... ...
@@ -1065,7 +1085,7 @@ func (n *network) ipamAllocate() error {
1065 1065
 		return nil
1066 1066
 	}
1067 1067
 
1068
-	ipam, err := n.getController().getIpamDriver(n.ipamType)
1068
+	ipam, _, err := n.getController().getIPAMDriver(n.ipamType)
1069 1069
 	if err != nil {
1070 1070
 		return err
1071 1071
 	}
... ...
@@ -1091,7 +1111,53 @@ func (n *network) ipamAllocate() error {
1091 1091
 		return nil
1092 1092
 	}
1093 1093
 
1094
-	return n.ipamAllocateVersion(6, ipam)
1094
+	err = n.ipamAllocateVersion(6, ipam)
1095
+	if err != nil {
1096
+		return err
1097
+	}
1098
+
1099
+	return nil
1100
+}
1101
+
1102
+func (n *network) requestPoolHelper(ipam ipamapi.Ipam, addressSpace, preferredPool, subPool string, options map[string]string, v6 bool) (string, *net.IPNet, map[string]string, error) {
1103
+	for {
1104
+		poolID, pool, meta, err := ipam.RequestPool(addressSpace, preferredPool, subPool, options, v6)
1105
+		if err != nil {
1106
+			return "", nil, nil, err
1107
+		}
1108
+
1109
+		// If the network belongs to global scope or the pool was
1110
+		// explicitely chosen or it is invalid, do not perform the overlap check.
1111
+		if n.Scope() == datastore.GlobalScope || preferredPool != "" || !types.IsIPNetValid(pool) {
1112
+			return poolID, pool, meta, nil
1113
+		}
1114
+
1115
+		// Check for overlap and if none found, we have found the right pool.
1116
+		if _, err := netutils.FindAvailableNetwork([]*net.IPNet{pool}); err == nil {
1117
+			return poolID, pool, meta, nil
1118
+		}
1119
+
1120
+		// Pool obtained in this iteration is
1121
+		// overlapping. Hold onto the pool and don't release
1122
+		// it yet, because we don't want ipam to give us back
1123
+		// the same pool over again. But make sure we still do
1124
+		// a deferred release when we have either obtained a
1125
+		// non-overlapping pool or ran out of pre-defined
1126
+		// pools.
1127
+		defer func() {
1128
+			if err := ipam.ReleasePool(poolID); err != nil {
1129
+				log.Warnf("Failed to release overlapping pool %s while returning from pool request helper for network %s", pool, n.Name())
1130
+			}
1131
+		}()
1132
+
1133
+		// If this is a preferred pool request and the network
1134
+		// is local scope and there is a overlap, we fail the
1135
+		// network creation right here. The pool will be
1136
+		// released in the defer.
1137
+		if preferredPool != "" {
1138
+			return "", nil, nil, fmt.Errorf("requested subnet %s overlaps in the host", preferredPool)
1139
+		}
1140
+	}
1095 1141
 }
1096 1142
 
1097 1143
 func (n *network) ipamAllocateVersion(ipVer int, ipam ipamapi.Ipam) error {
... ...
@@ -1130,7 +1196,7 @@ func (n *network) ipamAllocateVersion(ipVer int, ipam ipamapi.Ipam) error {
1130 1130
 		d := &IpamInfo{}
1131 1131
 		(*infoList)[i] = d
1132 1132
 
1133
-		d.PoolID, d.Pool, d.Meta, err = ipam.RequestPool(n.addrSpace, cfg.PreferredPool, cfg.SubPool, n.ipamOptions, ipVer == 6)
1133
+		d.PoolID, d.Pool, d.Meta, err = n.requestPoolHelper(ipam, n.addrSpace, cfg.PreferredPool, cfg.SubPool, n.ipamOptions, ipVer == 6)
1134 1134
 		if err != nil {
1135 1135
 			return err
1136 1136
 		}
... ...
@@ -1189,7 +1255,7 @@ func (n *network) ipamRelease() {
1189 1189
 	if n.Type() == "host" || n.Type() == "null" {
1190 1190
 		return
1191 1191
 	}
1192
-	ipam, err := n.getController().getIpamDriver(n.ipamType)
1192
+	ipam, _, err := n.getController().getIPAMDriver(n.ipamType)
1193 1193
 	if err != nil {
1194 1194
 		log.Warnf("Failed to retrieve ipam driver to release address pool(s) on delete of network %s (%s): %v", n.Name(), n.ID(), err)
1195 1195
 		return
... ...
@@ -1279,17 +1345,14 @@ func (n *network) getIPData(ipVer int) []driverapi.IPAMData {
1279 1279
 }
1280 1280
 
1281 1281
 func (n *network) deriveAddressSpace() (string, error) {
1282
-	c := n.getController()
1283
-	c.Lock()
1284
-	ipd, ok := c.ipamDrivers[n.ipamType]
1285
-	c.Unlock()
1286
-	if !ok {
1287
-		return "", types.NotFoundErrorf("could not find ipam driver %s to get default address space", n.ipamType)
1282
+	local, global, err := n.getController().drvRegistry.IPAMDefaultAddressSpaces(n.ipamType)
1283
+	if err != nil {
1284
+		return "", types.NotFoundErrorf("failed to get default address space: %v", err)
1288 1285
 	}
1289 1286
 	if n.DataScope() == datastore.GlobalScope {
1290
-		return ipd.defaultGlobalAddressSpace, nil
1287
+		return global, nil
1291 1288
 	}
1292
-	return ipd.defaultLocalAddressSpace, nil
1289
+	return local, nil
1293 1290
 }
1294 1291
 
1295 1292
 func (n *network) Info() NetworkInfo {
... ...
@@ -1382,3 +1445,11 @@ func (n *network) Labels() map[string]string {
1382 1382
 
1383 1383
 	return lbls
1384 1384
 }
1385
+
1386
+func (n *network) TableEventRegister(tableName string) error {
1387
+	n.Lock()
1388
+	defer n.Unlock()
1389
+
1390
+	n.driverTables = append(n.driverTables, tableName)
1391
+	return nil
1392
+}
1385 1393
new file mode 100644
... ...
@@ -0,0 +1,127 @@
0
+package networkdb
1
+
2
+import (
3
+	"github.com/hashicorp/memberlist"
4
+	"github.com/hashicorp/serf/serf"
5
+)
6
+
7
+type networkEventType uint8
8
+
9
+const (
10
+	networkJoin networkEventType = 1 + iota
11
+	networkLeave
12
+)
13
+
14
+type networkEventData struct {
15
+	Event     networkEventType
16
+	LTime     serf.LamportTime
17
+	NodeName  string
18
+	NetworkID string
19
+}
20
+
21
+type networkEventMessage struct {
22
+	id   string
23
+	node string
24
+	msg  []byte
25
+}
26
+
27
+func (m *networkEventMessage) Invalidates(other memberlist.Broadcast) bool {
28
+	otherm := other.(*networkEventMessage)
29
+	return m.id == otherm.id && m.node == otherm.node
30
+}
31
+
32
+func (m *networkEventMessage) Message() []byte {
33
+	return m.msg
34
+}
35
+
36
+func (m *networkEventMessage) Finished() {
37
+}
38
+
39
+func (nDB *NetworkDB) sendNetworkEvent(nid string, event networkEventType, ltime serf.LamportTime) error {
40
+	nEvent := networkEventData{
41
+		Event:     event,
42
+		LTime:     ltime,
43
+		NodeName:  nDB.config.NodeName,
44
+		NetworkID: nid,
45
+	}
46
+
47
+	raw, err := encodeMessage(networkEventMsg, &nEvent)
48
+	if err != nil {
49
+		return err
50
+	}
51
+
52
+	nDB.networkBroadcasts.QueueBroadcast(&networkEventMessage{
53
+		msg:  raw,
54
+		id:   nid,
55
+		node: nDB.config.NodeName,
56
+	})
57
+	return nil
58
+}
59
+
60
+type tableEventType uint8
61
+
62
+const (
63
+	tableEntryCreate tableEventType = 1 + iota
64
+	tableEntryUpdate
65
+	tableEntryDelete
66
+)
67
+
68
+type tableEventData struct {
69
+	Event     tableEventType
70
+	LTime     serf.LamportTime
71
+	NetworkID string
72
+	TableName string
73
+	NodeName  string
74
+	Value     []byte
75
+	Key       string
76
+}
77
+
78
+type tableEventMessage struct {
79
+	id    string
80
+	tname string
81
+	key   string
82
+	msg   []byte
83
+	node  string
84
+}
85
+
86
+func (m *tableEventMessage) Invalidates(other memberlist.Broadcast) bool {
87
+	otherm := other.(*tableEventMessage)
88
+	return m.id == otherm.id && m.tname == otherm.tname && m.key == otherm.key
89
+}
90
+
91
+func (m *tableEventMessage) Message() []byte {
92
+	return m.msg
93
+}
94
+
95
+func (m *tableEventMessage) Finished() {
96
+}
97
+
98
+func (nDB *NetworkDB) sendTableEvent(event tableEventType, nid string, tname string, key string, entry *entry) error {
99
+	tEvent := tableEventData{
100
+		Event:     event,
101
+		LTime:     entry.ltime,
102
+		NodeName:  nDB.config.NodeName,
103
+		NetworkID: nid,
104
+		TableName: tname,
105
+		Key:       key,
106
+		Value:     entry.value,
107
+	}
108
+
109
+	raw, err := encodeMessage(tableEventMsg, &tEvent)
110
+	if err != nil {
111
+		return err
112
+	}
113
+
114
+	nDB.RLock()
115
+	broadcastQ := nDB.networks[nDB.config.NodeName][nid].tableBroadcasts
116
+	nDB.RUnlock()
117
+
118
+	broadcastQ.QueueBroadcast(&tableEventMessage{
119
+		msg:   raw,
120
+		id:    nid,
121
+		tname: tname,
122
+		key:   key,
123
+		node:  nDB.config.NodeName,
124
+	})
125
+	return nil
126
+}
0 127
new file mode 100644
... ...
@@ -0,0 +1,446 @@
0
+package networkdb
1
+
2
+import (
3
+	"crypto/rand"
4
+	"fmt"
5
+	"math/big"
6
+	rnd "math/rand"
7
+	"strings"
8
+	"time"
9
+
10
+	"github.com/Sirupsen/logrus"
11
+	"github.com/hashicorp/memberlist"
12
+	"github.com/hashicorp/serf/serf"
13
+)
14
+
15
+const reapInterval = 2 * time.Second
16
+
17
+type logWriter struct{}
18
+
19
+func (l *logWriter) Write(p []byte) (int, error) {
20
+	str := string(p)
21
+
22
+	switch {
23
+	case strings.Contains(str, "[WARN]"):
24
+		logrus.Warn(str)
25
+	case strings.Contains(str, "[DEBUG]"):
26
+		logrus.Debug(str)
27
+	case strings.Contains(str, "[INFO]"):
28
+		logrus.Info(str)
29
+	case strings.Contains(str, "[ERR]"):
30
+		logrus.Warn(str)
31
+	}
32
+
33
+	return len(p), nil
34
+}
35
+
36
+func (nDB *NetworkDB) clusterInit() error {
37
+	config := memberlist.DefaultLANConfig()
38
+	config.Name = nDB.config.NodeName
39
+	config.BindAddr = nDB.config.BindAddr
40
+
41
+	if nDB.config.BindPort != 0 {
42
+		config.BindPort = nDB.config.BindPort
43
+	}
44
+
45
+	config.ProtocolVersion = memberlist.ProtocolVersionMax
46
+	config.Delegate = &delegate{nDB: nDB}
47
+	config.Events = &eventDelegate{nDB: nDB}
48
+	config.LogOutput = &logWriter{}
49
+
50
+	nDB.networkBroadcasts = &memberlist.TransmitLimitedQueue{
51
+		NumNodes: func() int {
52
+			return len(nDB.nodes)
53
+		},
54
+		RetransmitMult: config.RetransmitMult,
55
+	}
56
+
57
+	mlist, err := memberlist.Create(config)
58
+	if err != nil {
59
+		return fmt.Errorf("failed to create memberlist: %v", err)
60
+	}
61
+
62
+	nDB.stopCh = make(chan struct{})
63
+	nDB.memberlist = mlist
64
+	nDB.mConfig = config
65
+
66
+	for _, trigger := range []struct {
67
+		interval time.Duration
68
+		fn       func()
69
+	}{
70
+		{reapInterval, nDB.reapState},
71
+		{config.GossipInterval, nDB.gossip},
72
+		{config.PushPullInterval, nDB.bulkSyncTables},
73
+	} {
74
+		t := time.NewTicker(trigger.interval)
75
+		go nDB.triggerFunc(trigger.interval, t.C, nDB.stopCh, trigger.fn)
76
+		nDB.tickers = append(nDB.tickers, t)
77
+	}
78
+
79
+	return nil
80
+}
81
+
82
+func (nDB *NetworkDB) clusterJoin(members []string) error {
83
+	mlist := nDB.memberlist
84
+
85
+	if _, err := mlist.Join(members); err != nil {
86
+		return fmt.Errorf("could not join node to memberlist: %v", err)
87
+	}
88
+
89
+	return nil
90
+}
91
+
92
+func (nDB *NetworkDB) clusterLeave() error {
93
+	mlist := nDB.memberlist
94
+
95
+	if err := mlist.Leave(time.Second); err != nil {
96
+		return err
97
+	}
98
+
99
+	close(nDB.stopCh)
100
+
101
+	for _, t := range nDB.tickers {
102
+		t.Stop()
103
+	}
104
+
105
+	return mlist.Shutdown()
106
+}
107
+
108
+func (nDB *NetworkDB) triggerFunc(stagger time.Duration, C <-chan time.Time, stop <-chan struct{}, f func()) {
109
+	// Use a random stagger to avoid syncronizing
110
+	randStagger := time.Duration(uint64(rnd.Int63()) % uint64(stagger))
111
+	select {
112
+	case <-time.After(randStagger):
113
+	case <-stop:
114
+		return
115
+	}
116
+	for {
117
+		select {
118
+		case <-C:
119
+			f()
120
+		case <-stop:
121
+			return
122
+		}
123
+	}
124
+}
125
+
126
+func (nDB *NetworkDB) reapState() {
127
+	nDB.reapNetworks()
128
+	nDB.reapTableEntries()
129
+}
130
+
131
+func (nDB *NetworkDB) reapNetworks() {
132
+	now := time.Now()
133
+	nDB.Lock()
134
+	for name, nn := range nDB.networks {
135
+		for id, n := range nn {
136
+			if n.leaving && now.Sub(n.leaveTime) > reapInterval {
137
+				delete(nn, id)
138
+				nDB.deleteNetworkNode(id, name)
139
+			}
140
+		}
141
+	}
142
+	nDB.Unlock()
143
+}
144
+
145
+func (nDB *NetworkDB) reapTableEntries() {
146
+	var paths []string
147
+
148
+	now := time.Now()
149
+
150
+	nDB.RLock()
151
+	nDB.indexes[byTable].Walk(func(path string, v interface{}) bool {
152
+		entry, ok := v.(*entry)
153
+		if !ok {
154
+			return false
155
+		}
156
+
157
+		if !entry.deleting || now.Sub(entry.deleteTime) <= reapInterval {
158
+			return false
159
+		}
160
+
161
+		paths = append(paths, path)
162
+		return false
163
+	})
164
+	nDB.RUnlock()
165
+
166
+	nDB.Lock()
167
+	for _, path := range paths {
168
+		params := strings.Split(path[1:], "/")
169
+		tname := params[0]
170
+		nid := params[1]
171
+		key := params[2]
172
+
173
+		if _, ok := nDB.indexes[byTable].Delete(fmt.Sprintf("/%s/%s/%s", tname, nid, key)); !ok {
174
+			logrus.Errorf("Could not delete entry in table %s with network id %s and key %s as it does not exist", tname, nid, key)
175
+		}
176
+
177
+		if _, ok := nDB.indexes[byNetwork].Delete(fmt.Sprintf("/%s/%s/%s", nid, tname, key)); !ok {
178
+			logrus.Errorf("Could not delete entry in network %s with table name %s and key %s as it does not exist", nid, tname, key)
179
+		}
180
+	}
181
+	nDB.Unlock()
182
+}
183
+
184
+func (nDB *NetworkDB) gossip() {
185
+	networkNodes := make(map[string][]string)
186
+	nDB.RLock()
187
+	for nid := range nDB.networks[nDB.config.NodeName] {
188
+		networkNodes[nid] = nDB.networkNodes[nid]
189
+
190
+	}
191
+	nDB.RUnlock()
192
+
193
+	for nid, nodes := range networkNodes {
194
+		mNodes := nDB.mRandomNodes(3, nodes)
195
+		bytesAvail := udpSendBuf - compoundHeaderOverhead
196
+
197
+		nDB.RLock()
198
+		broadcastQ := nDB.networks[nDB.config.NodeName][nid].tableBroadcasts
199
+		nDB.RUnlock()
200
+
201
+		if broadcastQ == nil {
202
+			logrus.Errorf("Invalid broadcastQ encountered while gossiping for network %s", nid)
203
+			continue
204
+		}
205
+
206
+		msgs := broadcastQ.GetBroadcasts(compoundOverhead, bytesAvail)
207
+		if len(msgs) == 0 {
208
+			continue
209
+		}
210
+
211
+		// Create a compound message
212
+		compound := makeCompoundMessage(msgs)
213
+
214
+		for _, node := range mNodes {
215
+			nDB.RLock()
216
+			mnode := nDB.nodes[node]
217
+			nDB.RUnlock()
218
+
219
+			if mnode == nil {
220
+				break
221
+			}
222
+
223
+			// Send the compound message
224
+			if err := nDB.memberlist.SendToUDP(mnode, compound.Bytes()); err != nil {
225
+				logrus.Errorf("Failed to send gossip to %s: %s", mnode.Addr, err)
226
+			}
227
+		}
228
+	}
229
+}
230
+
231
+type bulkSyncMessage struct {
232
+	LTime       serf.LamportTime
233
+	Unsolicited bool
234
+	NodeName    string
235
+	Networks    []string
236
+	Payload     []byte
237
+}
238
+
239
+func (nDB *NetworkDB) bulkSyncTables() {
240
+	var networks []string
241
+	nDB.RLock()
242
+	for nid := range nDB.networks[nDB.config.NodeName] {
243
+		networks = append(networks, nid)
244
+	}
245
+	nDB.RUnlock()
246
+
247
+	for {
248
+		if len(networks) == 0 {
249
+			break
250
+		}
251
+
252
+		nid := networks[0]
253
+		networks = networks[1:]
254
+
255
+		completed, err := nDB.bulkSync(nid, false)
256
+		if err != nil {
257
+			logrus.Errorf("periodic bulk sync failure for network %s: %v", nid, err)
258
+			continue
259
+		}
260
+
261
+		// Remove all the networks for which we have
262
+		// successfully completed bulk sync in this iteration.
263
+		updatedNetworks := make([]string, 0, len(networks))
264
+		for _, nid := range networks {
265
+			for _, completedNid := range completed {
266
+				if nid == completedNid {
267
+					continue
268
+				}
269
+
270
+				updatedNetworks = append(updatedNetworks, nid)
271
+			}
272
+		}
273
+
274
+		networks = updatedNetworks
275
+	}
276
+}
277
+
278
+func (nDB *NetworkDB) bulkSync(nid string, all bool) ([]string, error) {
279
+	nDB.RLock()
280
+	nodes := nDB.networkNodes[nid]
281
+	nDB.RUnlock()
282
+
283
+	if !all {
284
+		// If not all, then just pick one.
285
+		nodes = nDB.mRandomNodes(1, nodes)
286
+	}
287
+
288
+	logrus.Debugf("%s: Initiating bulk sync with nodes %v", nDB.config.NodeName, nodes)
289
+	var err error
290
+	var networks []string
291
+	for _, node := range nodes {
292
+		if node == nDB.config.NodeName {
293
+			continue
294
+		}
295
+
296
+		networks = nDB.findCommonNetworks(node)
297
+		err = nDB.bulkSyncNode(networks, node, true)
298
+		if err != nil {
299
+			err = fmt.Errorf("bulk sync failed on node %s: %v", node, err)
300
+		}
301
+	}
302
+
303
+	if err != nil {
304
+		return nil, err
305
+	}
306
+
307
+	return networks, nil
308
+}
309
+
310
+// Bulk sync all the table entries belonging to a set of networks to a
311
+// single peer node. It can be unsolicited or can be in response to an
312
+// unsolicited bulk sync
313
+func (nDB *NetworkDB) bulkSyncNode(networks []string, node string, unsolicited bool) error {
314
+	var msgs [][]byte
315
+
316
+	logrus.Debugf("%s: Initiating bulk sync for networks %v with node %s", nDB.config.NodeName, networks, node)
317
+
318
+	nDB.RLock()
319
+	mnode := nDB.nodes[node]
320
+	if mnode == nil {
321
+		nDB.RUnlock()
322
+		return nil
323
+	}
324
+
325
+	for _, nid := range networks {
326
+		nDB.indexes[byNetwork].WalkPrefix(fmt.Sprintf("/%s", nid), func(path string, v interface{}) bool {
327
+			entry, ok := v.(*entry)
328
+			if !ok {
329
+				return false
330
+			}
331
+
332
+			params := strings.Split(path[1:], "/")
333
+			tEvent := tableEventData{
334
+				Event:     tableEntryCreate,
335
+				LTime:     entry.ltime,
336
+				NodeName:  entry.node,
337
+				NetworkID: nid,
338
+				TableName: params[1],
339
+				Key:       params[2],
340
+				Value:     entry.value,
341
+			}
342
+
343
+			msg, err := encodeMessage(tableEventMsg, &tEvent)
344
+			if err != nil {
345
+				logrus.Errorf("Encode failure during bulk sync: %#v", tEvent)
346
+				return false
347
+			}
348
+
349
+			msgs = append(msgs, msg)
350
+			return false
351
+		})
352
+	}
353
+	nDB.RUnlock()
354
+
355
+	// Create a compound message
356
+	compound := makeCompoundMessage(msgs)
357
+
358
+	bsm := bulkSyncMessage{
359
+		LTime:       nDB.tableClock.Time(),
360
+		Unsolicited: unsolicited,
361
+		NodeName:    nDB.config.NodeName,
362
+		Networks:    networks,
363
+		Payload:     compound.Bytes(),
364
+	}
365
+
366
+	buf, err := encodeMessage(bulkSyncMsg, &bsm)
367
+	if err != nil {
368
+		return fmt.Errorf("failed to encode bulk sync message: %v", err)
369
+	}
370
+
371
+	nDB.Lock()
372
+	ch := make(chan struct{})
373
+	nDB.bulkSyncAckTbl[node] = ch
374
+	nDB.Unlock()
375
+
376
+	err = nDB.memberlist.SendToTCP(mnode, buf)
377
+	if err != nil {
378
+		nDB.Lock()
379
+		delete(nDB.bulkSyncAckTbl, node)
380
+		nDB.Unlock()
381
+
382
+		return fmt.Errorf("failed to send a TCP message during bulk sync: %v", err)
383
+	}
384
+
385
+	startTime := time.Now()
386
+	select {
387
+	case <-time.After(30 * time.Second):
388
+		logrus.Errorf("Bulk sync to node %s timed out", node)
389
+	case <-ch:
390
+		nDB.Lock()
391
+		delete(nDB.bulkSyncAckTbl, node)
392
+		nDB.Unlock()
393
+
394
+		logrus.Debugf("%s: Bulk sync to node %s took %s", nDB.config.NodeName, node, time.Now().Sub(startTime))
395
+	}
396
+
397
+	return nil
398
+}
399
+
400
+// Returns a random offset between 0 and n
401
+func randomOffset(n int) int {
402
+	if n == 0 {
403
+		return 0
404
+	}
405
+
406
+	val, err := rand.Int(rand.Reader, big.NewInt(int64(n)))
407
+	if err != nil {
408
+		logrus.Errorf("Failed to get a random offset: %v", err)
409
+		return 0
410
+	}
411
+
412
+	return int(val.Int64())
413
+}
414
+
415
+// mRandomNodes is used to select up to m random nodes. It is possible
416
+// that less than m nodes are returned.
417
+func (nDB *NetworkDB) mRandomNodes(m int, nodes []string) []string {
418
+	n := len(nodes)
419
+	mNodes := make([]string, 0, m)
420
+OUTER:
421
+	// Probe up to 3*n times, with large n this is not necessary
422
+	// since k << n, but with small n we want search to be
423
+	// exhaustive
424
+	for i := 0; i < 3*n && len(mNodes) < m; i++ {
425
+		// Get random node
426
+		idx := randomOffset(n)
427
+		node := nodes[idx]
428
+
429
+		if node == nDB.config.NodeName {
430
+			continue
431
+		}
432
+
433
+		// Check if we have this node already
434
+		for j := 0; j < len(mNodes); j++ {
435
+			if node == mNodes[j] {
436
+				continue OUTER
437
+			}
438
+		}
439
+
440
+		// Append the node
441
+		mNodes = append(mNodes, node)
442
+	}
443
+
444
+	return mNodes
445
+}
0 446
new file mode 100644
... ...
@@ -0,0 +1,315 @@
0
+package networkdb
1
+
2
+import (
3
+	"fmt"
4
+	"time"
5
+
6
+	"github.com/Sirupsen/logrus"
7
+	"github.com/hashicorp/serf/serf"
8
+)
9
+
10
+type networkData struct {
11
+	LTime    serf.LamportTime
12
+	ID       string
13
+	NodeName string
14
+	Leaving  bool
15
+}
16
+
17
+type networkPushPull struct {
18
+	LTime    serf.LamportTime
19
+	Networks []networkData
20
+}
21
+
22
+type delegate struct {
23
+	nDB *NetworkDB
24
+}
25
+
26
+func (d *delegate) NodeMeta(limit int) []byte {
27
+	return []byte{}
28
+}
29
+
30
+func (nDB *NetworkDB) handleNetworkEvent(nEvent *networkEventData) bool {
31
+	// Update our local clock if the received messages has newer
32
+	// time.
33
+	nDB.networkClock.Witness(nEvent.LTime)
34
+
35
+	nDB.Lock()
36
+	defer nDB.Unlock()
37
+
38
+	nodeNetworks, ok := nDB.networks[nEvent.NodeName]
39
+	if !ok {
40
+		// We haven't heard about this node at all.  Ignore the leave
41
+		if nEvent.Event == networkLeave {
42
+			return false
43
+		}
44
+
45
+		nodeNetworks = make(map[string]*network)
46
+		nDB.networks[nEvent.NodeName] = nodeNetworks
47
+	}
48
+
49
+	if n, ok := nodeNetworks[nEvent.NetworkID]; ok {
50
+		// We have the latest state. Ignore the event
51
+		// since it is stale.
52
+		if n.ltime >= nEvent.LTime {
53
+			return false
54
+		}
55
+
56
+		n.ltime = nEvent.LTime
57
+		n.leaving = nEvent.Event == networkLeave
58
+		if n.leaving {
59
+			n.leaveTime = time.Now()
60
+		}
61
+
62
+		return true
63
+	}
64
+
65
+	if nEvent.Event == networkLeave {
66
+		return false
67
+	}
68
+
69
+	// This remote network join is being seen the first time.
70
+	nodeNetworks[nEvent.NetworkID] = &network{
71
+		id:    nEvent.NetworkID,
72
+		ltime: nEvent.LTime,
73
+	}
74
+
75
+	nDB.networkNodes[nEvent.NetworkID] = append(nDB.networkNodes[nEvent.NetworkID], nEvent.NodeName)
76
+	return true
77
+}
78
+
79
+func (nDB *NetworkDB) handleTableEvent(tEvent *tableEventData) bool {
80
+	// Update our local clock if the received messages has newer
81
+	// time.
82
+	nDB.tableClock.Witness(tEvent.LTime)
83
+
84
+	if entry, err := nDB.getEntry(tEvent.TableName, tEvent.NetworkID, tEvent.Key); err == nil {
85
+		// We have the latest state. Ignore the event
86
+		// since it is stale.
87
+		if entry.ltime >= tEvent.LTime {
88
+			return false
89
+		}
90
+	}
91
+
92
+	entry := &entry{
93
+		ltime:    tEvent.LTime,
94
+		node:     tEvent.NodeName,
95
+		value:    tEvent.Value,
96
+		deleting: tEvent.Event == tableEntryDelete,
97
+	}
98
+
99
+	if entry.deleting {
100
+		entry.deleteTime = time.Now()
101
+	}
102
+
103
+	nDB.Lock()
104
+	nDB.indexes[byTable].Insert(fmt.Sprintf("/%s/%s/%s", tEvent.TableName, tEvent.NetworkID, tEvent.Key), entry)
105
+	nDB.indexes[byNetwork].Insert(fmt.Sprintf("/%s/%s/%s", tEvent.NetworkID, tEvent.TableName, tEvent.Key), entry)
106
+	nDB.Unlock()
107
+
108
+	var op opType
109
+	switch tEvent.Event {
110
+	case tableEntryCreate:
111
+		op = opCreate
112
+	case tableEntryUpdate:
113
+		op = opUpdate
114
+	case tableEntryDelete:
115
+		op = opDelete
116
+	}
117
+
118
+	nDB.broadcaster.Write(makeEvent(op, tEvent.TableName, tEvent.NetworkID, tEvent.Key, tEvent.Value))
119
+	return true
120
+}
121
+
122
+func (nDB *NetworkDB) handleCompound(buf []byte) {
123
+	// Decode the parts
124
+	trunc, parts, err := decodeCompoundMessage(buf[1:])
125
+	if err != nil {
126
+		logrus.Errorf("Failed to decode compound request: %v", err)
127
+		return
128
+	}
129
+
130
+	// Log any truncation
131
+	if trunc > 0 {
132
+		logrus.Warnf("Compound request had %d truncated messages", trunc)
133
+	}
134
+
135
+	// Handle each message
136
+	for _, part := range parts {
137
+		nDB.handleMessage(part)
138
+	}
139
+}
140
+
141
+func (nDB *NetworkDB) handleTableMessage(buf []byte) {
142
+	var tEvent tableEventData
143
+	if err := decodeMessage(buf[1:], &tEvent); err != nil {
144
+		logrus.Errorf("Error decoding table event message: %v", err)
145
+		return
146
+	}
147
+
148
+	if rebroadcast := nDB.handleTableEvent(&tEvent); rebroadcast {
149
+		// Copy the buffer since we cannot rely on the slice not changing
150
+		newBuf := make([]byte, len(buf))
151
+		copy(newBuf, buf)
152
+
153
+		nDB.RLock()
154
+		n, ok := nDB.networks[nDB.config.NodeName][tEvent.NetworkID]
155
+		nDB.RUnlock()
156
+
157
+		if !ok {
158
+			return
159
+		}
160
+
161
+		broadcastQ := n.tableBroadcasts
162
+		broadcastQ.QueueBroadcast(&tableEventMessage{
163
+			msg:   newBuf,
164
+			id:    tEvent.NetworkID,
165
+			tname: tEvent.TableName,
166
+			key:   tEvent.Key,
167
+			node:  nDB.config.NodeName,
168
+		})
169
+	}
170
+}
171
+
172
+func (nDB *NetworkDB) handleNetworkMessage(buf []byte) {
173
+	var nEvent networkEventData
174
+	if err := decodeMessage(buf[1:], &nEvent); err != nil {
175
+		logrus.Errorf("Error decoding network event message: %v", err)
176
+		return
177
+	}
178
+
179
+	if rebroadcast := nDB.handleNetworkEvent(&nEvent); rebroadcast {
180
+		// Copy the buffer since it we cannot rely on the slice not changing
181
+		newBuf := make([]byte, len(buf))
182
+		copy(newBuf, buf)
183
+
184
+		nDB.networkBroadcasts.QueueBroadcast(&networkEventMessage{
185
+			msg:  newBuf,
186
+			id:   nEvent.NetworkID,
187
+			node: nEvent.NodeName,
188
+		})
189
+	}
190
+}
191
+
192
+func (nDB *NetworkDB) handleBulkSync(buf []byte) {
193
+	var bsm bulkSyncMessage
194
+	if err := decodeMessage(buf[1:], &bsm); err != nil {
195
+		logrus.Errorf("Error decoding bulk sync message: %v", err)
196
+		return
197
+	}
198
+
199
+	if bsm.LTime > 0 {
200
+		nDB.tableClock.Witness(bsm.LTime)
201
+	}
202
+
203
+	nDB.handleMessage(bsm.Payload)
204
+
205
+	// Don't respond to a bulk sync which was not unsolicited
206
+	if !bsm.Unsolicited {
207
+		nDB.RLock()
208
+		ch, ok := nDB.bulkSyncAckTbl[bsm.NodeName]
209
+		nDB.RUnlock()
210
+		if ok {
211
+			close(ch)
212
+		}
213
+
214
+		return
215
+	}
216
+
217
+	if err := nDB.bulkSyncNode(bsm.Networks, bsm.NodeName, false); err != nil {
218
+		logrus.Errorf("Error in responding to bulk sync from node %s: %v", nDB.nodes[bsm.NodeName].Addr, err)
219
+	}
220
+}
221
+
222
+func (nDB *NetworkDB) handleMessage(buf []byte) {
223
+	msgType := messageType(buf[0])
224
+
225
+	switch msgType {
226
+	case networkEventMsg:
227
+		nDB.handleNetworkMessage(buf)
228
+	case tableEventMsg:
229
+		nDB.handleTableMessage(buf)
230
+	case compoundMsg:
231
+		nDB.handleCompound(buf)
232
+	case bulkSyncMsg:
233
+		nDB.handleBulkSync(buf)
234
+	default:
235
+		logrus.Errorf("%s: unknown message type %d payload = %v", nDB.config.NodeName, msgType, buf[:8])
236
+	}
237
+}
238
+
239
+func (d *delegate) NotifyMsg(buf []byte) {
240
+	if len(buf) == 0 {
241
+		return
242
+	}
243
+
244
+	d.nDB.handleMessage(buf)
245
+}
246
+
247
+func (d *delegate) GetBroadcasts(overhead, limit int) [][]byte {
248
+	return d.nDB.networkBroadcasts.GetBroadcasts(overhead, limit)
249
+}
250
+
251
+func (d *delegate) LocalState(join bool) []byte {
252
+	d.nDB.RLock()
253
+	defer d.nDB.RUnlock()
254
+
255
+	pp := networkPushPull{
256
+		LTime: d.nDB.networkClock.Time(),
257
+	}
258
+
259
+	for name, nn := range d.nDB.networks {
260
+		for _, n := range nn {
261
+			pp.Networks = append(pp.Networks, networkData{
262
+				LTime:    n.ltime,
263
+				ID:       n.id,
264
+				NodeName: name,
265
+				Leaving:  n.leaving,
266
+			})
267
+		}
268
+	}
269
+
270
+	buf, err := encodeMessage(networkPushPullMsg, &pp)
271
+	if err != nil {
272
+		logrus.Errorf("Failed to encode local network state: %v", err)
273
+		return nil
274
+	}
275
+
276
+	return buf
277
+}
278
+
279
+func (d *delegate) MergeRemoteState(buf []byte, isJoin bool) {
280
+	if len(buf) == 0 {
281
+		logrus.Error("zero byte remote network state received")
282
+		return
283
+	}
284
+
285
+	if messageType(buf[0]) != networkPushPullMsg {
286
+		logrus.Errorf("Invalid message type %v received from remote", buf[0])
287
+	}
288
+
289
+	pp := networkPushPull{}
290
+	if err := decodeMessage(buf[1:], &pp); err != nil {
291
+		logrus.Errorf("Failed to decode remote network state: %v", err)
292
+		return
293
+	}
294
+
295
+	if pp.LTime > 0 {
296
+		d.nDB.networkClock.Witness(pp.LTime)
297
+	}
298
+
299
+	for _, n := range pp.Networks {
300
+		nEvent := &networkEventData{
301
+			LTime:     n.LTime,
302
+			NodeName:  n.NodeName,
303
+			NetworkID: n.ID,
304
+			Event:     networkJoin,
305
+		}
306
+
307
+		if n.Leaving {
308
+			nEvent.Event = networkLeave
309
+		}
310
+
311
+		d.nDB.handleNetworkEvent(nEvent)
312
+	}
313
+
314
+}
0 315
new file mode 100644
... ...
@@ -0,0 +1,23 @@
0
+package networkdb
1
+
2
+import "github.com/hashicorp/memberlist"
3
+
4
+type eventDelegate struct {
5
+	nDB *NetworkDB
6
+}
7
+
8
+func (e *eventDelegate) NotifyJoin(n *memberlist.Node) {
9
+	e.nDB.Lock()
10
+	e.nDB.nodes[n.Name] = n
11
+	e.nDB.Unlock()
12
+}
13
+
14
+func (e *eventDelegate) NotifyLeave(n *memberlist.Node) {
15
+	e.nDB.deleteNodeTableEntries(n.Name)
16
+	e.nDB.Lock()
17
+	delete(e.nDB.nodes, n.Name)
18
+	e.nDB.Unlock()
19
+}
20
+
21
+func (e *eventDelegate) NotifyUpdate(n *memberlist.Node) {
22
+}
0 23
new file mode 100644
... ...
@@ -0,0 +1,122 @@
0
+package networkdb
1
+
2
+import (
3
+	"bytes"
4
+	"encoding/binary"
5
+	"fmt"
6
+
7
+	"github.com/hashicorp/go-msgpack/codec"
8
+)
9
+
10
+type messageType uint8
11
+
12
+const (
13
+	// For network join/leave event message
14
+	networkEventMsg messageType = 1 + iota
15
+
16
+	// For pushing/pulling network/node association state
17
+	networkPushPullMsg
18
+
19
+	// For table entry CRUD event message
20
+	tableEventMsg
21
+
22
+	// For building a compound message which packs many different
23
+	// message types together
24
+	compoundMsg
25
+
26
+	// For syncing table entries in bulk b/w nodes.
27
+	bulkSyncMsg
28
+)
29
+
30
+const (
31
+	// Max udp message size chosen to avoid network packet
32
+	// fragmentation.
33
+	udpSendBuf = 1400
34
+
35
+	// Compound message header overhead 1 byte(message type) + 4
36
+	// bytes (num messages)
37
+	compoundHeaderOverhead = 5
38
+
39
+	// Overhead for each embedded message in a compound message 2
40
+	// bytes (len of embedded message)
41
+	compoundOverhead = 2
42
+)
43
+
44
+func decodeMessage(buf []byte, out interface{}) error {
45
+	var handle codec.MsgpackHandle
46
+	return codec.NewDecoder(bytes.NewReader(buf), &handle).Decode(out)
47
+}
48
+
49
+func encodeMessage(t messageType, msg interface{}) ([]byte, error) {
50
+	buf := bytes.NewBuffer(nil)
51
+	buf.WriteByte(uint8(t))
52
+
53
+	handle := codec.MsgpackHandle{}
54
+	encoder := codec.NewEncoder(buf, &handle)
55
+	err := encoder.Encode(msg)
56
+	return buf.Bytes(), err
57
+}
58
+
59
+// makeCompoundMessage takes a list of messages and generates
60
+// a single compound message containing all of them
61
+func makeCompoundMessage(msgs [][]byte) *bytes.Buffer {
62
+	// Create a local buffer
63
+	buf := bytes.NewBuffer(nil)
64
+
65
+	// Write out the type
66
+	buf.WriteByte(uint8(compoundMsg))
67
+
68
+	// Write out the number of message
69
+	binary.Write(buf, binary.BigEndian, uint32(len(msgs)))
70
+
71
+	// Add the message lengths
72
+	for _, m := range msgs {
73
+		binary.Write(buf, binary.BigEndian, uint16(len(m)))
74
+	}
75
+
76
+	// Append the messages
77
+	for _, m := range msgs {
78
+		buf.Write(m)
79
+	}
80
+
81
+	return buf
82
+}
83
+
84
+// decodeCompoundMessage splits a compound message and returns
85
+// the slices of individual messages. Also returns the number
86
+// of truncated messages and any potential error
87
+func decodeCompoundMessage(buf []byte) (trunc int, parts [][]byte, err error) {
88
+	if len(buf) < 1 {
89
+		err = fmt.Errorf("missing compound length byte")
90
+		return
91
+	}
92
+	numParts := binary.BigEndian.Uint32(buf[0:4])
93
+	buf = buf[4:]
94
+
95
+	// Check we have enough bytes
96
+	if len(buf) < int(numParts*2) {
97
+		err = fmt.Errorf("truncated len slice")
98
+		return
99
+	}
100
+
101
+	// Decode the lengths
102
+	lengths := make([]uint16, numParts)
103
+	for i := 0; i < int(numParts); i++ {
104
+		lengths[i] = binary.BigEndian.Uint16(buf[i*2 : i*2+2])
105
+	}
106
+	buf = buf[numParts*2:]
107
+
108
+	// Split each message
109
+	for idx, msgLen := range lengths {
110
+		if len(buf) < int(msgLen) {
111
+			trunc = int(numParts) - idx
112
+			return
113
+		}
114
+
115
+		// Extract the slice, seek past on the buffer
116
+		slice := buf[:msgLen]
117
+		buf = buf[msgLen:]
118
+		parts = append(parts, slice)
119
+	}
120
+	return
121
+}
0 122
new file mode 100644
... ...
@@ -0,0 +1,424 @@
0
+package networkdb
1
+
2
+import (
3
+	"fmt"
4
+	"strings"
5
+	"sync"
6
+	"time"
7
+
8
+	"github.com/Sirupsen/logrus"
9
+	"github.com/armon/go-radix"
10
+	"github.com/docker/go-events"
11
+	"github.com/hashicorp/memberlist"
12
+	"github.com/hashicorp/serf/serf"
13
+)
14
+
15
+const (
16
+	byTable int = 1 + iota
17
+	byNetwork
18
+)
19
+
20
+// NetworkDB instance drives the networkdb cluster and acts the broker
21
+// for cluster-scoped and network-scoped gossip and watches.
22
+type NetworkDB struct {
23
+	sync.RWMutex
24
+
25
+	// NetworkDB configuration.
26
+	config *Config
27
+
28
+	// local copy of memberlist config that we use to driver
29
+	// network scoped gossip and bulk sync.
30
+	mConfig *memberlist.Config
31
+
32
+	// All the tree index (byTable, byNetwork) that we maintain
33
+	// the db.
34
+	indexes map[int]*radix.Tree
35
+
36
+	// Memberlist we use to drive the cluster.
37
+	memberlist *memberlist.Memberlist
38
+
39
+	// List of all peer nodes in the cluster not-limited to any
40
+	// network.
41
+	nodes map[string]*memberlist.Node
42
+
43
+	// A multi-dimensional map of network/node attachmemts. The
44
+	// first key is a node name and the second key is a network ID
45
+	// for the network that node is participating in.
46
+	networks map[string]map[string]*network
47
+
48
+	// A map of nodes which are participating in a given
49
+	// network. The key is a network ID.
50
+
51
+	networkNodes map[string][]string
52
+
53
+	// A table of ack channels for every node from which we are
54
+	// waiting for an ack.
55
+	bulkSyncAckTbl map[string]chan struct{}
56
+
57
+	// Global lamport clock for node network attach events.
58
+	networkClock serf.LamportClock
59
+
60
+	// Global lamport clock for table events.
61
+	tableClock serf.LamportClock
62
+
63
+	// Broadcast queue for network event gossip.
64
+	networkBroadcasts *memberlist.TransmitLimitedQueue
65
+
66
+	// A central stop channel to stop all go routines running on
67
+	// behalf of the NetworkDB instance.
68
+	stopCh chan struct{}
69
+
70
+	// A central broadcaster for all local watchers watching table
71
+	// events.
72
+	broadcaster *events.Broadcaster
73
+
74
+	// List of all tickers which needed to be stopped when
75
+	// cleaning up.
76
+	tickers []*time.Ticker
77
+}
78
+
79
+// network describes the node/network attachment.
80
+type network struct {
81
+	// Network ID
82
+	id string
83
+
84
+	// Lamport time for the latest state of the entry.
85
+	ltime serf.LamportTime
86
+
87
+	// Node leave is in progress.
88
+	leaving bool
89
+
90
+	// The time this node knew about the node's network leave.
91
+	leaveTime time.Time
92
+
93
+	// The broadcast queue for table event gossip. This is only
94
+	// initialized for this node's network attachment entries.
95
+	tableBroadcasts *memberlist.TransmitLimitedQueue
96
+}
97
+
98
+// Config represents the configuration of the networdb instance and
99
+// can be passed by the caller.
100
+type Config struct {
101
+	// NodeName is the cluster wide unique name for this node.
102
+	NodeName string
103
+
104
+	// BindAddr is the local node's IP address that we bind to for
105
+	// cluster communication.
106
+	BindAddr string
107
+
108
+	// BindPort is the local node's port to which we bind to for
109
+	// cluster communication.
110
+	BindPort int
111
+}
112
+
113
+// entry defines a table entry
114
+type entry struct {
115
+	// node from which this entry was learned.
116
+	node string
117
+
118
+	// Lamport time for the most recent update to the entry
119
+	ltime serf.LamportTime
120
+
121
+	// Opaque value store in the entry
122
+	value []byte
123
+
124
+	// Deleting the entry is in progress. All entries linger in
125
+	// the cluster for certain amount of time after deletion.
126
+	deleting bool
127
+
128
+	// The wall clock time when this node learned about this deletion.
129
+	deleteTime time.Time
130
+}
131
+
132
+// New creates a new instance of NetworkDB using the Config passed by
133
+// the caller.
134
+func New(c *Config) (*NetworkDB, error) {
135
+	nDB := &NetworkDB{
136
+		config:         c,
137
+		indexes:        make(map[int]*radix.Tree),
138
+		networks:       make(map[string]map[string]*network),
139
+		nodes:          make(map[string]*memberlist.Node),
140
+		networkNodes:   make(map[string][]string),
141
+		bulkSyncAckTbl: make(map[string]chan struct{}),
142
+		broadcaster:    events.NewBroadcaster(),
143
+	}
144
+
145
+	nDB.indexes[byTable] = radix.New()
146
+	nDB.indexes[byNetwork] = radix.New()
147
+
148
+	if err := nDB.clusterInit(); err != nil {
149
+		return nil, err
150
+	}
151
+
152
+	return nDB, nil
153
+}
154
+
155
+// Join joins this NetworkDB instance with a list of peer NetworkDB
156
+// instances passed by the caller in the form of addr:port
157
+func (nDB *NetworkDB) Join(members []string) error {
158
+	return nDB.clusterJoin(members)
159
+}
160
+
161
+// Close destroys this NetworkDB instance by leave the cluster,
162
+// stopping timers, canceling goroutines etc.
163
+func (nDB *NetworkDB) Close() {
164
+	if err := nDB.clusterLeave(); err != nil {
165
+		logrus.Errorf("Could not close DB %s: %v", nDB.config.NodeName, err)
166
+	}
167
+}
168
+
169
+// GetEntry retrieves the value of a table entry in a given (network,
170
+// table, key) tuple
171
+func (nDB *NetworkDB) GetEntry(tname, nid, key string) ([]byte, error) {
172
+	entry, err := nDB.getEntry(tname, nid, key)
173
+	if err != nil {
174
+		return nil, err
175
+	}
176
+
177
+	return entry.value, nil
178
+}
179
+
180
+func (nDB *NetworkDB) getEntry(tname, nid, key string) (*entry, error) {
181
+	nDB.RLock()
182
+	defer nDB.RUnlock()
183
+
184
+	e, ok := nDB.indexes[byTable].Get(fmt.Sprintf("/%s/%s/%s", tname, nid, key))
185
+	if !ok {
186
+		return nil, fmt.Errorf("could not get entry in table %s with network id %s and key %s", tname, nid, key)
187
+	}
188
+
189
+	return e.(*entry), nil
190
+}
191
+
192
+// CreateEntry creates a table entry in NetworkDB for given (network,
193
+// table, key) tuple and if the NetworkDB is part of the cluster
194
+// propogates this event to the cluster. It is an error to create an
195
+// entry for the same tuple for which there is already an existing
196
+// entry.
197
+func (nDB *NetworkDB) CreateEntry(tname, nid, key string, value []byte) error {
198
+	if _, err := nDB.GetEntry(tname, nid, key); err == nil {
199
+		return fmt.Errorf("cannot create entry as the entry in table %s with network id %s and key %s already exists", tname, nid, key)
200
+	}
201
+
202
+	entry := &entry{
203
+		ltime: nDB.tableClock.Increment(),
204
+		node:  nDB.config.NodeName,
205
+		value: value,
206
+	}
207
+
208
+	if err := nDB.sendTableEvent(tableEntryCreate, nid, tname, key, entry); err != nil {
209
+		return fmt.Errorf("cannot send table create event: %v", err)
210
+	}
211
+
212
+	nDB.Lock()
213
+	nDB.indexes[byTable].Insert(fmt.Sprintf("/%s/%s/%s", tname, nid, key), entry)
214
+	nDB.indexes[byNetwork].Insert(fmt.Sprintf("/%s/%s/%s", nid, tname, key), entry)
215
+	nDB.Unlock()
216
+
217
+	nDB.broadcaster.Write(makeEvent(opCreate, tname, nid, key, value))
218
+	return nil
219
+}
220
+
221
+// UpdateEntry updates a table entry in NetworkDB for given (network,
222
+// table, key) tuple and if the NetworkDB is part of the cluster
223
+// propogates this event to the cluster. It is an error to update a
224
+// non-existent entry.
225
+func (nDB *NetworkDB) UpdateEntry(tname, nid, key string, value []byte) error {
226
+	if _, err := nDB.GetEntry(tname, nid, key); err != nil {
227
+		return fmt.Errorf("cannot update entry as the entry in table %s with network id %s and key %s does not exist", tname, nid, key)
228
+	}
229
+
230
+	entry := &entry{
231
+		ltime: nDB.tableClock.Increment(),
232
+		node:  nDB.config.NodeName,
233
+		value: value,
234
+	}
235
+
236
+	if err := nDB.sendTableEvent(tableEntryUpdate, nid, tname, key, entry); err != nil {
237
+		return fmt.Errorf("cannot send table update event: %v", err)
238
+	}
239
+
240
+	nDB.Lock()
241
+	nDB.indexes[byTable].Insert(fmt.Sprintf("/%s/%s/%s", tname, nid, key), entry)
242
+	nDB.indexes[byNetwork].Insert(fmt.Sprintf("/%s/%s/%s", nid, tname, key), entry)
243
+	nDB.Unlock()
244
+
245
+	nDB.broadcaster.Write(makeEvent(opUpdate, tname, nid, key, value))
246
+	return nil
247
+}
248
+
249
+// DeleteEntry deletes a table entry in NetworkDB for given (network,
250
+// table, key) tuple and if the NetworkDB is part of the cluster
251
+// propogates this event to the cluster.
252
+func (nDB *NetworkDB) DeleteEntry(tname, nid, key string) error {
253
+	value, err := nDB.GetEntry(tname, nid, key)
254
+	if err != nil {
255
+		return fmt.Errorf("cannot delete entry as the entry in table %s with network id %s and key %s does not exist", tname, nid, key)
256
+	}
257
+
258
+	entry := &entry{
259
+		ltime:      nDB.tableClock.Increment(),
260
+		node:       nDB.config.NodeName,
261
+		value:      value,
262
+		deleting:   true,
263
+		deleteTime: time.Now(),
264
+	}
265
+
266
+	if err := nDB.sendTableEvent(tableEntryDelete, nid, tname, key, entry); err != nil {
267
+		return fmt.Errorf("cannot send table delete event: %v", err)
268
+	}
269
+
270
+	nDB.Lock()
271
+	nDB.indexes[byTable].Insert(fmt.Sprintf("/%s/%s/%s", tname, nid, key), entry)
272
+	nDB.indexes[byNetwork].Insert(fmt.Sprintf("/%s/%s/%s", nid, tname, key), entry)
273
+	nDB.Unlock()
274
+
275
+	nDB.broadcaster.Write(makeEvent(opDelete, tname, nid, key, value))
276
+	return nil
277
+}
278
+
279
+func (nDB *NetworkDB) deleteNodeTableEntries(node string) {
280
+	nDB.Lock()
281
+	nDB.indexes[byTable].Walk(func(path string, v interface{}) bool {
282
+		oldEntry := v.(*entry)
283
+		if oldEntry.node != node {
284
+			return false
285
+		}
286
+
287
+		params := strings.Split(path[1:], "/")
288
+		tname := params[0]
289
+		nid := params[1]
290
+		key := params[2]
291
+
292
+		entry := &entry{
293
+			ltime:      oldEntry.ltime,
294
+			node:       node,
295
+			value:      oldEntry.value,
296
+			deleting:   true,
297
+			deleteTime: time.Now(),
298
+		}
299
+
300
+		nDB.indexes[byTable].Insert(fmt.Sprintf("/%s/%s/%s", tname, nid, key), entry)
301
+		nDB.indexes[byNetwork].Insert(fmt.Sprintf("/%s/%s/%s", nid, tname, key), entry)
302
+		return false
303
+	})
304
+	nDB.Unlock()
305
+}
306
+
307
+// WalkTable walks a single table in NetworkDB and invokes the passed
308
+// function for each entry in the table passing the network, key,
309
+// value. The walk stops if the passed function returns a true.
310
+func (nDB *NetworkDB) WalkTable(tname string, fn func(string, string, []byte) bool) error {
311
+	nDB.RLock()
312
+	values := make(map[string]interface{})
313
+	nDB.indexes[byTable].WalkPrefix(fmt.Sprintf("/%s", tname), func(path string, v interface{}) bool {
314
+		values[path] = v
315
+		return false
316
+	})
317
+	nDB.RUnlock()
318
+
319
+	for k, v := range values {
320
+		params := strings.Split(k[1:], "/")
321
+		nid := params[1]
322
+		key := params[2]
323
+		if fn(nid, key, v.(*entry).value) {
324
+			return nil
325
+		}
326
+	}
327
+
328
+	return nil
329
+}
330
+
331
+// JoinNetwork joins this node to a given network and propogates this
332
+// event across the cluster. This triggers this node joining the
333
+// sub-cluster of this network and participates in the network-scoped
334
+// gossip and bulk sync for this network.
335
+func (nDB *NetworkDB) JoinNetwork(nid string) error {
336
+	ltime := nDB.networkClock.Increment()
337
+
338
+	nDB.Lock()
339
+	nodeNetworks, ok := nDB.networks[nDB.config.NodeName]
340
+	if !ok {
341
+		nodeNetworks = make(map[string]*network)
342
+		nDB.networks[nDB.config.NodeName] = nodeNetworks
343
+	}
344
+	nodeNetworks[nid] = &network{id: nid, ltime: ltime}
345
+	nodeNetworks[nid].tableBroadcasts = &memberlist.TransmitLimitedQueue{
346
+		NumNodes: func() int {
347
+			return len(nDB.networkNodes[nid])
348
+		},
349
+		RetransmitMult: 4,
350
+	}
351
+	nDB.networkNodes[nid] = append(nDB.networkNodes[nid], nDB.config.NodeName)
352
+	nDB.Unlock()
353
+
354
+	if err := nDB.sendNetworkEvent(nid, networkJoin, ltime); err != nil {
355
+		return fmt.Errorf("failed to send leave network event for %s: %v", nid, err)
356
+	}
357
+
358
+	logrus.Debugf("%s: joined network %s", nDB.config.NodeName, nid)
359
+	if _, err := nDB.bulkSync(nid, true); err != nil {
360
+		logrus.Errorf("Error bulk syncing while joining network %s: %v", nid, err)
361
+	}
362
+
363
+	return nil
364
+}
365
+
366
+// LeaveNetwork leaves this node from a given network and propogates
367
+// this event across the cluster. This triggers this node leaving the
368
+// sub-cluster of this network and as a result will no longer
369
+// participate in the network-scoped gossip and bulk sync for this
370
+// network.
371
+func (nDB *NetworkDB) LeaveNetwork(nid string) error {
372
+	ltime := nDB.networkClock.Increment()
373
+	if err := nDB.sendNetworkEvent(nid, networkLeave, ltime); err != nil {
374
+		return fmt.Errorf("failed to send leave network event for %s: %v", nid, err)
375
+	}
376
+
377
+	nDB.Lock()
378
+	defer nDB.Unlock()
379
+	nodeNetworks, ok := nDB.networks[nDB.config.NodeName]
380
+	if !ok {
381
+		return fmt.Errorf("could not find self node for network %s while trying to leave", nid)
382
+	}
383
+
384
+	n, ok := nodeNetworks[nid]
385
+	if !ok {
386
+		return fmt.Errorf("could not find network %s while trying to leave", nid)
387
+	}
388
+
389
+	n.ltime = ltime
390
+	n.leaving = true
391
+	return nil
392
+}
393
+
394
+// Deletes the node from the list of nodes which participate in the
395
+// passed network. Caller should hold the NetworkDB lock while calling
396
+// this
397
+func (nDB *NetworkDB) deleteNetworkNode(nid string, nodeName string) {
398
+	nodes := nDB.networkNodes[nid]
399
+	for i, name := range nodes {
400
+		if name == nodeName {
401
+			nodes[i] = nodes[len(nodes)-1]
402
+			nodes = nodes[:len(nodes)-1]
403
+			break
404
+		}
405
+	}
406
+	nDB.networkNodes[nid] = nodes
407
+}
408
+
409
+// findCommonnetworks find the networks that both this node and the
410
+// passed node have joined.
411
+func (nDB *NetworkDB) findCommonNetworks(nodeName string) []string {
412
+	nDB.RLock()
413
+	defer nDB.RUnlock()
414
+
415
+	var networks []string
416
+	for nid := range nDB.networks[nDB.config.NodeName] {
417
+		if _, ok := nDB.networks[nodeName][nid]; ok {
418
+			networks = append(networks, nid)
419
+		}
420
+	}
421
+
422
+	return networks
423
+}
0 424
new file mode 100644
... ...
@@ -0,0 +1,98 @@
0
+package networkdb
1
+
2
+import "github.com/docker/go-events"
3
+
4
+type opType uint8
5
+
6
+const (
7
+	opCreate opType = 1 + iota
8
+	opUpdate
9
+	opDelete
10
+)
11
+
12
+type event struct {
13
+	Table     string
14
+	NetworkID string
15
+	Key       string
16
+	Value     []byte
17
+}
18
+
19
+// CreateEvent generates a table entry create event to the watchers
20
+type CreateEvent event
21
+
22
+// UpdateEvent generates a table entry update event to the watchers
23
+type UpdateEvent event
24
+
25
+// DeleteEvent generates a table entry delete event to the watchers
26
+type DeleteEvent event
27
+
28
+// Watch creates a watcher with filters for a particular table or
29
+// network or key or any combination of the tuple. If any of the
30
+// filter is an empty string it acts as a wildcard for that
31
+// field. Watch returns a channel of events, where the events will be
32
+// sent.
33
+func (nDB *NetworkDB) Watch(tname, nid, key string) (chan events.Event, func()) {
34
+	var matcher events.Matcher
35
+
36
+	if tname != "" || nid != "" || key != "" {
37
+		matcher = events.MatcherFunc(func(ev events.Event) bool {
38
+			var evt event
39
+			switch ev := ev.(type) {
40
+			case CreateEvent:
41
+				evt = event(ev)
42
+			case UpdateEvent:
43
+				evt = event(ev)
44
+			case DeleteEvent:
45
+				evt = event(ev)
46
+			}
47
+
48
+			if tname != "" && evt.Table != tname {
49
+				return false
50
+			}
51
+
52
+			if nid != "" && evt.NetworkID != nid {
53
+				return false
54
+			}
55
+
56
+			if key != "" && evt.Key != key {
57
+				return false
58
+			}
59
+
60
+			return true
61
+		})
62
+	}
63
+
64
+	ch := events.NewChannel(0)
65
+	sink := events.Sink(events.NewQueue(ch))
66
+
67
+	if matcher != nil {
68
+		sink = events.NewFilter(sink, matcher)
69
+	}
70
+
71
+	nDB.broadcaster.Add(sink)
72
+	return ch.C, func() {
73
+		nDB.broadcaster.Remove(sink)
74
+		ch.Close()
75
+		sink.Close()
76
+	}
77
+}
78
+
79
+func makeEvent(op opType, tname, nid, key string, value []byte) events.Event {
80
+	ev := event{
81
+		Table:     tname,
82
+		NetworkID: nid,
83
+		Key:       key,
84
+		Value:     value,
85
+	}
86
+
87
+	switch op {
88
+	case opCreate:
89
+		return CreateEvent(ev)
90
+	case opUpdate:
91
+		return UpdateEvent(ev)
92
+	case opDelete:
93
+		return DeleteEvent(ev)
94
+	}
95
+
96
+	return nil
97
+}
0 98
new file mode 100644
... ...
@@ -0,0 +1,4 @@
0
+package osl
1
+
2
+// IfaceOption is a function option type to set interface options
3
+type IfaceOption func()
0 4
new file mode 100644
... ...
@@ -0,0 +1,4 @@
0
+package osl
1
+
2
+// NeighOption is a function option type to set interface options
3
+type NeighOption func()
... ...
@@ -142,7 +142,7 @@ func (n *networkNamespace) SetGatewayIPv6(gwv6 net.IP) error {
142 142
 
143 143
 	err := programGateway(n.nsPath(), gwv6, true)
144 144
 	if err == nil {
145
-		n.SetGatewayIPv6(gwv6)
145
+		n.setGatewayIPv6(gwv6)
146 146
 	}
147 147
 
148 148
 	return err
... ...
@@ -10,8 +10,8 @@ import (
10 10
 
11 11
 	"github.com/Sirupsen/logrus"
12 12
 	"github.com/docker/docker/pkg/ioutils"
13
-	"github.com/docker/libnetwork/netutils"
14 13
 	"github.com/docker/libnetwork/resolvconf/dns"
14
+	"github.com/docker/libnetwork/types"
15 15
 )
16 16
 
17 17
 var (
... ...
@@ -122,7 +122,7 @@ func FilterResolvDNS(resolvConf []byte, ipv6Enabled bool) (*File, error) {
122 122
 	}
123 123
 	// if the resulting resolvConf has no more nameservers defined, add appropriate
124 124
 	// default DNS servers for IPv4 and (optionally) IPv6
125
-	if len(GetNameservers(cleanedResolvConf, netutils.IP)) == 0 {
125
+	if len(GetNameservers(cleanedResolvConf, types.IP)) == 0 {
126 126
 		logrus.Infof("No non-localhost DNS nameservers are left in resolv.conf. Using default external servers : %v", defaultIPv4Dns)
127 127
 		dns := defaultIPv4Dns
128 128
 		if ipv6Enabled {
... ...
@@ -158,11 +158,11 @@ func GetNameservers(resolvConf []byte, kind int) []string {
158 158
 	nameservers := []string{}
159 159
 	for _, line := range getLines(resolvConf, []byte("#")) {
160 160
 		var ns [][]byte
161
-		if kind == netutils.IP {
161
+		if kind == types.IP {
162 162
 			ns = nsRegexp.FindSubmatch(line)
163
-		} else if kind == netutils.IPv4 {
163
+		} else if kind == types.IPv4 {
164 164
 			ns = nsIPv4Regexpmatch.FindSubmatch(line)
165
-		} else if kind == netutils.IPv6 {
165
+		} else if kind == types.IPv6 {
166 166
 			ns = nsIPv6Regexpmatch.FindSubmatch(line)
167 167
 		}
168 168
 		if len(ns) > 0 {
... ...
@@ -177,7 +177,7 @@ func GetNameservers(resolvConf []byte, kind int) []string {
177 177
 // This function's output is intended for net.ParseCIDR
178 178
 func GetNameserversAsCIDR(resolvConf []byte) []string {
179 179
 	nameservers := []string{}
180
-	for _, nameserver := range GetNameservers(resolvConf, netutils.IP) {
180
+	for _, nameserver := range GetNameservers(resolvConf, types.IP) {
181 181
 		nameservers = append(nameservers, nameserver+"/32")
182 182
 	}
183 183
 	return nameservers
... ...
@@ -9,8 +9,7 @@ import (
9 9
 	"time"
10 10
 
11 11
 	log "github.com/Sirupsen/logrus"
12
-	"github.com/docker/libnetwork/iptables"
13
-	"github.com/docker/libnetwork/netutils"
12
+	"github.com/docker/libnetwork/types"
14 13
 	"github.com/miekg/dns"
15 14
 )
16 15
 
... ...
@@ -47,7 +46,7 @@ const (
47 47
 	maxExtDNS       = 3 //max number of external servers to try
48 48
 	extIOTimeout    = 4 * time.Second
49 49
 	defaultRespSize = 512
50
-	maxConcurrent   = 50
50
+	maxConcurrent   = 100
51 51
 	logInterval     = 2 * time.Second
52 52
 	maxDNSID        = 65536
53 53
 )
... ...
@@ -105,8 +104,6 @@ func (r *resolver) SetupFunc() func() {
105 105
 			r.err = fmt.Errorf("error in opening name server socket %v", err)
106 106
 			return
107 107
 		}
108
-		laddr := r.conn.LocalAddr()
109
-		_, ipPort, _ := net.SplitHostPort(laddr.String())
110 108
 
111 109
 		// Listen on a TCP as well
112 110
 		tcpaddr := &net.TCPAddr{
... ...
@@ -118,21 +115,6 @@ func (r *resolver) SetupFunc() func() {
118 118
 			r.err = fmt.Errorf("error in opening name TCP server socket %v", err)
119 119
 			return
120 120
 		}
121
-		ltcpaddr := r.tcpListen.Addr()
122
-		_, tcpPort, _ := net.SplitHostPort(ltcpaddr.String())
123
-		rules := [][]string{
124
-			{"-t", "nat", "-A", "OUTPUT", "-d", resolverIP, "-p", "udp", "--dport", dnsPort, "-j", "DNAT", "--to-destination", laddr.String()},
125
-			{"-t", "nat", "-A", "POSTROUTING", "-s", resolverIP, "-p", "udp", "--sport", ipPort, "-j", "SNAT", "--to-source", ":" + dnsPort},
126
-			{"-t", "nat", "-A", "OUTPUT", "-d", resolverIP, "-p", "tcp", "--dport", dnsPort, "-j", "DNAT", "--to-destination", ltcpaddr.String()},
127
-			{"-t", "nat", "-A", "POSTROUTING", "-s", resolverIP, "-p", "tcp", "--sport", tcpPort, "-j", "SNAT", "--to-source", ":" + dnsPort},
128
-		}
129
-
130
-		for _, rule := range rules {
131
-			r.err = iptables.RawCombinedOutputNative(rule...)
132
-			if r.err != nil {
133
-				return
134
-			}
135
-		}
136 121
 		r.err = nil
137 122
 	})
138 123
 }
... ...
@@ -142,6 +124,11 @@ func (r *resolver) Start() error {
142 142
 	if r.err != nil {
143 143
 		return r.err
144 144
 	}
145
+
146
+	if err := r.setupIPTable(); err != nil {
147
+		return fmt.Errorf("setting up IP table rules failed: %v", err)
148
+	}
149
+
145 150
 	s := &dns.Server{Handler: r, PacketConn: r.conn}
146 151
 	r.server = s
147 152
 	go func() {
... ...
@@ -240,7 +227,7 @@ func (r *resolver) handleIPQuery(name string, query *dns.Msg, ipType int) (*dns.
240 240
 	if len(addr) > 1 {
241 241
 		addr = shuffleAddr(addr)
242 242
 	}
243
-	if ipType == netutils.IPv4 {
243
+	if ipType == types.IPv4 {
244 244
 		for _, ip := range addr {
245 245
 			rr := new(dns.A)
246 246
 			rr.Hdr = dns.RR_Header{Name: name, Rrtype: dns.TypeA, Class: dns.ClassINET, Ttl: respTTL}
... ...
@@ -305,6 +292,7 @@ func (r *resolver) ServeDNS(w dns.ResponseWriter, query *dns.Msg) {
305 305
 		extConn net.Conn
306 306
 		resp    *dns.Msg
307 307
 		err     error
308
+		writer  dns.ResponseWriter
308 309
 	)
309 310
 
310 311
 	if query == nil || len(query.Question) == 0 {
... ...
@@ -312,9 +300,9 @@ func (r *resolver) ServeDNS(w dns.ResponseWriter, query *dns.Msg) {
312 312
 	}
313 313
 	name := query.Question[0].Name
314 314
 	if query.Question[0].Qtype == dns.TypeA {
315
-		resp, err = r.handleIPQuery(name, query, netutils.IPv4)
315
+		resp, err = r.handleIPQuery(name, query, types.IPv4)
316 316
 	} else if query.Question[0].Qtype == dns.TypeAAAA {
317
-		resp, err = r.handleIPQuery(name, query, netutils.IPv6)
317
+		resp, err = r.handleIPQuery(name, query, types.IPv6)
318 318
 	} else if query.Question[0].Qtype == dns.TypePTR {
319 319
 		resp, err = r.handlePTRQuery(name, query)
320 320
 	}
... ...
@@ -342,7 +330,9 @@ func (r *resolver) ServeDNS(w dns.ResponseWriter, query *dns.Msg) {
342 342
 		if resp.Len() > maxSize {
343 343
 			truncateResp(resp, maxSize, proto == "tcp")
344 344
 		}
345
+		writer = w
345 346
 	} else {
347
+		queryID := query.Id
346 348
 		for i := 0; i < maxExtDNS; i++ {
347 349
 			extDNS := &r.extDNSList[i]
348 350
 			if extDNS.ipStr == "" {
... ...
@@ -388,11 +378,11 @@ func (r *resolver) ServeDNS(w dns.ResponseWriter, query *dns.Msg) {
388 388
 
389 389
 			// forwardQueryStart stores required context to mux multiple client queries over
390 390
 			// one connection; and limits the number of outstanding concurrent queries.
391
-			if r.forwardQueryStart(w, query) == false {
391
+			if r.forwardQueryStart(w, query, queryID) == false {
392 392
 				old := r.tStamp
393 393
 				r.tStamp = time.Now()
394 394
 				if r.tStamp.Sub(old) > logInterval {
395
-					log.Errorf("More than %v concurrent queries from %s", maxConcurrent, w.LocalAddr().String())
395
+					log.Errorf("More than %v concurrent queries from %s", maxConcurrent, extConn.LocalAddr().String())
396 396
 				}
397 397
 				continue
398 398
 			}
... ...
@@ -418,32 +408,33 @@ func (r *resolver) ServeDNS(w dns.ResponseWriter, query *dns.Msg) {
418 418
 
419 419
 			// Retrieves the context for the forwarded query and returns the client connection
420 420
 			// to send the reply to
421
-			w = r.forwardQueryEnd(w, resp)
422
-			if w == nil {
421
+			writer = r.forwardQueryEnd(w, resp)
422
+			if writer == nil {
423 423
 				continue
424 424
 			}
425 425
 
426 426
 			resp.Compress = true
427 427
 			break
428 428
 		}
429
-
430
-		if resp == nil || w == nil {
429
+		if resp == nil || writer == nil {
431 430
 			return
432 431
 		}
433 432
 	}
434 433
 
435
-	err = w.WriteMsg(resp)
436
-	if err != nil {
434
+	if writer == nil {
435
+		return
436
+	}
437
+	if err = writer.WriteMsg(resp); err != nil {
437 438
 		log.Errorf("error writing resolver resp, %s", err)
438 439
 	}
439 440
 }
440 441
 
441
-func (r *resolver) forwardQueryStart(w dns.ResponseWriter, msg *dns.Msg) bool {
442
+func (r *resolver) forwardQueryStart(w dns.ResponseWriter, msg *dns.Msg, queryID uint16) bool {
442 443
 	proto := w.LocalAddr().Network()
443 444
 	dnsID := uint16(rand.Intn(maxDNSID))
444 445
 
445 446
 	cc := clientConn{
446
-		dnsID:      msg.Id,
447
+		dnsID:      queryID,
447 448
 		respWriter: w,
448 449
 	}
449 450
 
... ...
@@ -462,7 +453,7 @@ func (r *resolver) forwardQueryStart(w dns.ResponseWriter, msg *dns.Msg) bool {
462 462
 		for ok := true; ok == true; dnsID = uint16(rand.Intn(maxDNSID)) {
463 463
 			_, ok = r.client[dnsID]
464 464
 		}
465
-		log.Debugf("client dns id %v, changed id %v", msg.Id, dnsID)
465
+		log.Debugf("client dns id %v, changed id %v", queryID, dnsID)
466 466
 		r.client[dnsID] = cc
467 467
 		msg.Id = dnsID
468 468
 	default:
... ...
@@ -497,6 +488,7 @@ func (r *resolver) forwardQueryEnd(w dns.ResponseWriter, msg *dns.Msg) dns.Respo
497 497
 			log.Debugf("Can't retrieve client context for dns id %v", msg.Id)
498 498
 			return nil
499 499
 		}
500
+		log.Debugf("dns msg id %v, client id %v", msg.Id, cc.dnsID)
500 501
 		delete(r.client, msg.Id)
501 502
 		msg.Id = cc.dnsID
502 503
 		w = cc.respWriter
503 504
new file mode 100644
... ...
@@ -0,0 +1,77 @@
0
+// +build !windows
1
+
2
+package libnetwork
3
+
4
+import (
5
+	"fmt"
6
+	"net"
7
+	"os"
8
+	"os/exec"
9
+	"runtime"
10
+
11
+	log "github.com/Sirupsen/logrus"
12
+	"github.com/docker/docker/pkg/reexec"
13
+	"github.com/docker/libnetwork/iptables"
14
+	"github.com/vishvananda/netns"
15
+)
16
+
17
+func init() {
18
+	reexec.Register("setup-resolver", reexecSetupResolver)
19
+}
20
+
21
+func reexecSetupResolver() {
22
+	runtime.LockOSThread()
23
+	defer runtime.UnlockOSThread()
24
+
25
+	if len(os.Args) < 4 {
26
+		log.Error("invalid number of arguments..")
27
+		os.Exit(1)
28
+	}
29
+
30
+	_, ipPort, _ := net.SplitHostPort(os.Args[2])
31
+	_, tcpPort, _ := net.SplitHostPort(os.Args[3])
32
+	rules := [][]string{
33
+		{"-t", "nat", "-A", "OUTPUT", "-d", resolverIP, "-p", "udp", "--dport", dnsPort, "-j", "DNAT", "--to-destination", os.Args[2]},
34
+		{"-t", "nat", "-A", "POSTROUTING", "-s", resolverIP, "-p", "udp", "--sport", ipPort, "-j", "SNAT", "--to-source", ":" + dnsPort},
35
+		{"-t", "nat", "-A", "OUTPUT", "-d", resolverIP, "-p", "tcp", "--dport", dnsPort, "-j", "DNAT", "--to-destination", os.Args[3]},
36
+		{"-t", "nat", "-A", "POSTROUTING", "-s", resolverIP, "-p", "tcp", "--sport", tcpPort, "-j", "SNAT", "--to-source", ":" + dnsPort},
37
+	}
38
+
39
+	f, err := os.OpenFile(os.Args[1], os.O_RDONLY, 0)
40
+	if err != nil {
41
+		log.Errorf("failed get network namespace %q: %v", os.Args[1], err)
42
+		os.Exit(2)
43
+	}
44
+	defer f.Close()
45
+
46
+	nsFD := f.Fd()
47
+	if err = netns.Set(netns.NsHandle(nsFD)); err != nil {
48
+		log.Errorf("setting into container net ns %v failed, %v", os.Args[1], err)
49
+		os.Exit(3)
50
+	}
51
+
52
+	for _, rule := range rules {
53
+		if iptables.RawCombinedOutputNative(rule...) != nil {
54
+			log.Errorf("setting up rule failed, %v", rule)
55
+		}
56
+	}
57
+}
58
+
59
+func (r *resolver) setupIPTable() error {
60
+	if r.err != nil {
61
+		return r.err
62
+	}
63
+	laddr := r.conn.LocalAddr().String()
64
+	ltcpaddr := r.tcpListen.Addr().String()
65
+
66
+	cmd := &exec.Cmd{
67
+		Path:   reexec.Self(),
68
+		Args:   append([]string{"setup-resolver"}, r.sb.Key(), laddr, ltcpaddr),
69
+		Stdout: os.Stdout,
70
+		Stderr: os.Stderr,
71
+	}
72
+	if err := cmd.Run(); err != nil {
73
+		return fmt.Errorf("reexec failed: %v", err)
74
+	}
75
+	return nil
76
+}
0 77
new file mode 100644
... ...
@@ -0,0 +1,7 @@
0
+// +build windows
1
+
2
+package libnetwork
3
+
4
+func (r *resolver) setupIPTable() error {
5
+	return nil
6
+}
... ...
@@ -12,7 +12,6 @@ import (
12 12
 	log "github.com/Sirupsen/logrus"
13 13
 	"github.com/docker/libnetwork/etchosts"
14 14
 	"github.com/docker/libnetwork/netlabel"
15
-	"github.com/docker/libnetwork/netutils"
16 15
 	"github.com/docker/libnetwork/osl"
17 16
 	"github.com/docker/libnetwork/types"
18 17
 )
... ...
@@ -406,7 +405,7 @@ func (sb *sandbox) ResolveIP(ip string) string {
406 406
 	for _, ep := range sb.getConnectedEndpoints() {
407 407
 		n := ep.getNetwork()
408 408
 
409
-		sr, ok := n.getController().svcDb[n.ID()]
409
+		sr, ok := n.getController().svcRecords[n.ID()]
410 410
 		if !ok {
411 411
 			continue
412 412
 		}
... ...
@@ -436,6 +435,7 @@ func (sb *sandbox) ResolveName(name string, ipType int) ([]net.IP, bool) {
436 436
 	// {a.b in network c.d},
437 437
 	// {a in network b.c.d},
438 438
 
439
+	log.Debugf("Name To resolve: %v", name)
439 440
 	name = strings.TrimSuffix(name, ".")
440 441
 	reqName := []string{name}
441 442
 	networkName := []string{""}
... ...
@@ -456,7 +456,6 @@ func (sb *sandbox) ResolveName(name string, ipType int) ([]net.IP, bool) {
456 456
 
457 457
 	epList := sb.getConnectedEndpoints()
458 458
 	for i := 0; i < len(reqName); i++ {
459
-		log.Debugf("To resolve: %v in %v", reqName[i], networkName[i])
460 459
 
461 460
 		// First check for local container alias
462 461
 		ip, ipv6Miss := sb.resolveName(reqName[i], networkName[i], epList, true, ipType)
... ...
@@ -513,7 +512,7 @@ func (sb *sandbox) resolveName(req string, networkName string, epList []*endpoin
513 513
 			ep.Unlock()
514 514
 		}
515 515
 
516
-		sr, ok := n.getController().svcDb[n.ID()]
516
+		sr, ok := n.getController().svcRecords[n.ID()]
517 517
 		if !ok {
518 518
 			continue
519 519
 		}
... ...
@@ -522,7 +521,7 @@ func (sb *sandbox) resolveName(req string, networkName string, epList []*endpoin
522 522
 		n.Lock()
523 523
 		ip, ok = sr.svcMap[name]
524 524
 
525
-		if ipType == netutils.IPv6 {
525
+		if ipType == types.IPv6 {
526 526
 			// If the name resolved to v4 address then its a valid name in
527 527
 			// the docker network domain. If the network is not v6 enabled
528 528
 			// set ipv6Miss to filter the DNS query from going to external
... ...
@@ -972,6 +971,14 @@ func (eh epHeap) Less(i, j int) bool {
972 972
 		return true
973 973
 	}
974 974
 
975
+	if epi.getNetwork().Internal() {
976
+		return false
977
+	}
978
+
979
+	if epj.getNetwork().Internal() {
980
+		return true
981
+	}
982
+
975 983
 	if ci != nil {
976 984
 		cip, ok = ci.epPriority[eh[i].ID()]
977 985
 		if !ok {
... ...
@@ -11,7 +11,6 @@ import (
11 11
 
12 12
 	log "github.com/Sirupsen/logrus"
13 13
 	"github.com/docker/libnetwork/etchosts"
14
-	"github.com/docker/libnetwork/netutils"
15 14
 	"github.com/docker/libnetwork/resolvconf"
16 15
 	"github.com/docker/libnetwork/types"
17 16
 )
... ...
@@ -91,6 +90,10 @@ func (sb *sandbox) buildHostsFile() error {
91 91
 func (sb *sandbox) updateHostsFile(ifaceIP string) error {
92 92
 	var mhost string
93 93
 
94
+	if ifaceIP == "" {
95
+		return nil
96
+	}
97
+
94 98
 	if sb.config.originHostsPath != "" {
95 99
 		return nil
96 100
 	}
... ...
@@ -166,7 +169,7 @@ func (sb *sandbox) setupDNS() error {
166 166
 	if len(sb.config.dnsList) > 0 || len(sb.config.dnsSearchList) > 0 || len(sb.config.dnsOptionsList) > 0 {
167 167
 		var (
168 168
 			err            error
169
-			dnsList        = resolvconf.GetNameservers(currRC.Content, netutils.IP)
169
+			dnsList        = resolvconf.GetNameservers(currRC.Content, types.IP)
170 170
 			dnsSearchList  = resolvconf.GetSearchDomains(currRC.Content)
171 171
 			dnsOptionsList = resolvconf.GetOptions(currRC.Content)
172 172
 		)
... ...
@@ -275,7 +278,7 @@ func (sb *sandbox) rebuildDNS() error {
275 275
 
276 276
 	// localhost entries have already been filtered out from the list
277 277
 	// retain only the v4 servers in sb for forwarding the DNS queries
278
-	sb.extDNS = resolvconf.GetNameservers(currRC.Content, netutils.IPv4)
278
+	sb.extDNS = resolvconf.GetNameservers(currRC.Content, types.IPv4)
279 279
 
280 280
 	var (
281 281
 		dnsList        = []string{sb.resolver.NameServer()}
... ...
@@ -284,7 +287,7 @@ func (sb *sandbox) rebuildDNS() error {
284 284
 	)
285 285
 
286 286
 	// external v6 DNS servers has to be listed in resolv.conf
287
-	dnsList = append(dnsList, resolvconf.GetNameservers(currRC.Content, netutils.IPv6)...)
287
+	dnsList = append(dnsList, resolvconf.GetNameservers(currRC.Content, types.IPv6)...)
288 288
 
289 289
 	// Resolver returns the options in the format resolv.conf expects
290 290
 	dnsOptionsList = append(dnsOptionsList, sb.resolver.ResolverOptions()...)
291 291
new file mode 100644
... ...
@@ -0,0 +1,45 @@
0
+// +build solaris
1
+
2
+package libnetwork
3
+
4
+import (
5
+	"io"
6
+	"net"
7
+
8
+	"github.com/docker/libnetwork/types"
9
+)
10
+
11
+// processSetKeyReexec is a private function that must be called only on an reexec path
12
+// It expects 3 args { [0] = "libnetwork-setkey", [1] = <container-id>, [2] = <controller-id> }
13
+// It also expects libcontainer.State as a json string in <stdin>
14
+// Refer to https://github.com/opencontainers/runc/pull/160/ for more information
15
+func processSetKeyReexec() {
16
+}
17
+
18
+// SetExternalKey provides a convenient way to set an External key to a sandbox
19
+func SetExternalKey(controllerID string, containerID string, key string) error {
20
+	return types.NotImplementedErrorf("SetExternalKey isn't supported on non linux systems")
21
+}
22
+
23
+func sendKey(c net.Conn, data setKeyData) error {
24
+	return types.NotImplementedErrorf("sendKey isn't supported on non linux systems")
25
+}
26
+
27
+func processReturn(r io.Reader) error {
28
+	return types.NotImplementedErrorf("processReturn isn't supported on non linux systems")
29
+}
30
+
31
+// no-op on non linux systems
32
+func (c *controller) startExternalKeyListener() error {
33
+	return nil
34
+}
35
+
36
+func (c *controller) acceptClientConnections(sock string, l net.Listener) {
37
+}
38
+
39
+func (c *controller) processExternalKey(conn net.Conn) error {
40
+	return types.NotImplementedErrorf("processExternalKey isn't supported on non linux systems")
41
+}
42
+
43
+func (c *controller) stopExternalKeyListener() {
44
+}
... ...
@@ -213,7 +213,7 @@ func (c *controller) sandboxCleanup() {
213 213
 			var ep *endpoint
214 214
 			if err != nil {
215 215
 				logrus.Errorf("getNetworkFromStore for nid %s failed while trying to build sandbox for cleanup: %v", eps.Nid, err)
216
-				n = &network{id: eps.Nid, ctrlr: c, drvOnce: &sync.Once{}}
216
+				n = &network{id: eps.Nid, ctrlr: c, drvOnce: &sync.Once{}, persist: true}
217 217
 				ep = &endpoint{id: eps.Eid, network: n, sandboxID: sbs.ID}
218 218
 			} else {
219 219
 				ep, err = n.getEndpointFromStore(eps.Eid)
220 220
new file mode 100644
... ...
@@ -0,0 +1,80 @@
0
+package libnetwork
1
+
2
+import "net"
3
+
4
+type service struct {
5
+	name     string
6
+	id       string
7
+	backEnds map[string]map[string]net.IP
8
+}
9
+
10
+func newService(name string, id string) *service {
11
+	return &service{
12
+		name:     name,
13
+		id:       id,
14
+		backEnds: make(map[string]map[string]net.IP),
15
+	}
16
+}
17
+
18
+func (c *controller) addServiceBinding(name, sid, nid, eid string, ip net.IP) error {
19
+	var s *service
20
+
21
+	n, err := c.NetworkByID(nid)
22
+	if err != nil {
23
+		return err
24
+	}
25
+
26
+	c.Lock()
27
+	s, ok := c.serviceBindings[sid]
28
+	if !ok {
29
+		s = newService(name, sid)
30
+	}
31
+
32
+	netBackEnds, ok := s.backEnds[nid]
33
+	if !ok {
34
+		netBackEnds = make(map[string]net.IP)
35
+		s.backEnds[nid] = netBackEnds
36
+	}
37
+
38
+	netBackEnds[eid] = ip
39
+	c.serviceBindings[sid] = s
40
+	c.Unlock()
41
+
42
+	n.(*network).addSvcRecords(name, ip, nil, false)
43
+	return nil
44
+}
45
+
46
+func (c *controller) rmServiceBinding(name, sid, nid, eid string, ip net.IP) error {
47
+	n, err := c.NetworkByID(nid)
48
+	if err != nil {
49
+		return err
50
+	}
51
+
52
+	c.Lock()
53
+	s, ok := c.serviceBindings[sid]
54
+	if !ok {
55
+		c.Unlock()
56
+		return nil
57
+	}
58
+
59
+	netBackEnds, ok := s.backEnds[nid]
60
+	if !ok {
61
+		c.Unlock()
62
+		return nil
63
+	}
64
+
65
+	delete(netBackEnds, eid)
66
+
67
+	if len(netBackEnds) == 0 {
68
+		delete(s.backEnds, nid)
69
+	}
70
+
71
+	if len(s.backEnds) == 0 {
72
+		delete(c.serviceBindings, sid)
73
+	}
74
+	c.Unlock()
75
+
76
+	n.(*network).deleteSvcRecords(name, ip, nil, false)
77
+
78
+	return err
79
+}
... ...
@@ -4,9 +4,20 @@ import (
4 4
 	"fmt"
5 5
 
6 6
 	log "github.com/Sirupsen/logrus"
7
+	"github.com/docker/libkv/store/boltdb"
8
+	"github.com/docker/libkv/store/consul"
9
+	"github.com/docker/libkv/store/etcd"
10
+	"github.com/docker/libkv/store/zookeeper"
7 11
 	"github.com/docker/libnetwork/datastore"
8 12
 )
9 13
 
14
+func registerKVStores() {
15
+	consul.Register()
16
+	zookeeper.Register()
17
+	etcd.Register()
18
+	boltdb.Register()
19
+}
20
+
10 21
 func (c *controller) initScopedStore(scope string, scfg *datastore.ScopeCfg) error {
11 22
 	store, err := datastore.NewDataStore(scope, scfg)
12 23
 	if err != nil {
... ...
@@ -20,6 +31,8 @@ func (c *controller) initScopedStore(scope string, scfg *datastore.ScopeCfg) err
20 20
 }
21 21
 
22 22
 func (c *controller) initStores() error {
23
+	registerKVStores()
24
+
23 25
 	c.Lock()
24 26
 	if c.cfg == nil {
25 27
 		c.Unlock()
... ...
@@ -208,8 +221,7 @@ func (n *network) getEndpointsFromStore() ([]*endpoint, error) {
208 208
 func (c *controller) updateToStore(kvObject datastore.KVObject) error {
209 209
 	cs := c.getStore(kvObject.DataScope())
210 210
 	if cs == nil {
211
-		log.Warnf("datastore for scope %s not initialized. kv object %s is not added to the store", kvObject.DataScope(), datastore.Key(kvObject.Key()...))
212
-		return nil
211
+		return fmt.Errorf("datastore for scope %q is not initialized ", kvObject.DataScope())
213 212
 	}
214 213
 
215 214
 	if err := cs.PutObjectAtomic(kvObject); err != nil {
... ...
@@ -225,8 +237,7 @@ func (c *controller) updateToStore(kvObject datastore.KVObject) error {
225 225
 func (c *controller) deleteFromStore(kvObject datastore.KVObject) error {
226 226
 	cs := c.getStore(kvObject.DataScope())
227 227
 	if cs == nil {
228
-		log.Debugf("datastore for scope %s not initialized. kv object %s is not deleted from datastore", kvObject.DataScope(), datastore.Key(kvObject.Key()...))
229
-		return nil
228
+		return fmt.Errorf("datastore for scope %q is not initialized ", kvObject.DataScope())
230 229
 	}
231 230
 
232 231
 retry:
... ...
@@ -407,7 +418,7 @@ func (c *controller) processEndpointDelete(nmap map[string]*netWatch, ep *endpoi
407 407
 
408 408
 			// This is the last container going away for the network. Destroy
409 409
 			// this network's svc db entry
410
-			delete(c.svcDb, ep.getNetwork().ID())
410
+			delete(c.svcRecords, ep.getNetwork().ID())
411 411
 
412 412
 			delete(nmap, ep.getNetwork().ID())
413 413
 		}
... ...
@@ -9,6 +9,13 @@ import (
9 9
 	"strings"
10 10
 )
11 11
 
12
+// constants for the IP address type
13
+const (
14
+	IP = iota // IPv4 and IPv6
15
+	IPv4
16
+	IPv6
17
+)
18
+
12 19
 // UUID represents a globally unique ID of various resources like network and endpoint
13 20
 type UUID string
14 21
 
... ...
@@ -323,6 +330,12 @@ func GetMinimalIPNet(nw *net.IPNet) *net.IPNet {
323 323
 	return nw
324 324
 }
325 325
 
326
+// IsIPNetValid returns true if the ipnet is a valid network/mask
327
+// combination. Otherwise returns false.
328
+func IsIPNetValid(nw *net.IPNet) bool {
329
+	return nw.String() != "0.0.0.0/0"
330
+}
331
+
326 332
 var v4inV6MaskPrefix = []byte{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}
327 333
 
328 334
 // compareIPMask checks if the passed ip and mask are semantically compatible.