Browse code

Vendoring Libnetwork caf22bd9a6a53dfe91b0266274155bc69235e8ed

* fixes https://github.com/docker/docker/issues/23622
* fixes a memory leak issue with bulk sync
* fixes external DNS resolution issue after live restore

Signed-off-by: Madhu Venugopal <madhu@docker.com>

Madhu Venugopal authored on 2016/06/17 06:32:12
Showing 9 changed files
... ...
@@ -65,7 +65,7 @@ clone git github.com/RackSec/srslog 259aed10dfa74ea2961eddd1d9847619f6e98837
65 65
 clone git github.com/imdario/mergo 0.2.1
66 66
 
67 67
 #get libnetwork packages
68
-clone git github.com/docker/libnetwork 96d45528599c32354230480a1ebc0657cd4d077f
68
+clone git github.com/docker/libnetwork caf22bd9a6a53dfe91b0266274155bc69235e8ed
69 69
 clone git github.com/docker/go-events 39718a26497694185f8fb58a7d6f31947f3dc42d
70 70
 clone git github.com/armon/go-radix e39d623f12e8e41c7b5529e9a9dd67a1e2261f80
71 71
 clone git github.com/armon/go-metrics eb0af217e5e9747e41dd5303755356b62d28e3ec
... ...
@@ -810,12 +810,13 @@ func (c *controller) NewSandbox(containerID string, options ...SandboxOption) (s
810 810
 	// Create sandbox and process options first. Key generation depends on an option
811 811
 	if sb == nil {
812 812
 		sb = &sandbox{
813
-			id:          stringid.GenerateRandomID(),
814
-			containerID: containerID,
815
-			endpoints:   epHeap{},
816
-			epPriority:  map[string]int{},
817
-			config:      containerConfig{},
818
-			controller:  c,
813
+			id:                 stringid.GenerateRandomID(),
814
+			containerID:        containerID,
815
+			endpoints:          epHeap{},
816
+			epPriority:         map[string]int{},
817
+			populatedEndpoints: map[string]struct{}{},
818
+			config:             containerConfig{},
819
+			controller:         c,
819 820
 		}
820 821
 	}
821 822
 	sBox = sb
... ...
@@ -588,13 +588,26 @@ func (d *driver) createNetwork(config *networkConfiguration) error {
588 588
 	defer osl.InitOSContext()()
589 589
 
590 590
 	networkList := d.getNetworks()
591
-	for _, nw := range networkList {
591
+	for i, nw := range networkList {
592 592
 		nw.Lock()
593 593
 		nwConfig := nw.config
594 594
 		nw.Unlock()
595 595
 		if err := nwConfig.Conflicts(config); err != nil {
596
-			return types.ForbiddenErrorf("cannot create network %s (%s): conflicts with network %s (%s): %s",
597
-				config.ID, config.BridgeName, nwConfig.ID, nwConfig.BridgeName, err.Error())
596
+			if config.DefaultBridge {
597
+				// We encountered and identified a stale default network
598
+				// We must delete it as libnetwork is the source of thruth
599
+				// The default network being created must be the only one
600
+				// This can happen only from docker 1.12 on ward
601
+				logrus.Infof("Removing stale default bridge network %s (%s)", nwConfig.ID, nwConfig.BridgeName)
602
+				if err := d.DeleteNetwork(nwConfig.ID); err != nil {
603
+					logrus.Warnf("Failed to remove stale default network: %s (%s): %v. Will remove from store.", nwConfig.ID, nwConfig.BridgeName, err)
604
+					d.storeDelete(nwConfig)
605
+				}
606
+				networkList = append(networkList[:i], networkList[i+1:]...)
607
+			} else {
608
+				return types.ForbiddenErrorf("cannot create network %s (%s): conflicts with network %s (%s): %s",
609
+					config.ID, config.BridgeName, nwConfig.ID, nwConfig.BridgeName, err.Error())
610
+			}
598 611
 		}
599 612
 	}
600 613
 
... ...
@@ -762,12 +775,6 @@ func (d *driver) DeleteNetwork(nid string) error {
762 762
 		return err
763 763
 	}
764 764
 
765
-	// Cannot remove network if endpoints are still present
766
-	if len(n.endpoints) != 0 {
767
-		err = ActiveEndpointsError(n.id)
768
-		return err
769
-	}
770
-
771 765
 	// We only delete the bridge when it's not the default bridge. This is keep the backward compatible behavior.
772 766
 	if !config.DefaultBridge {
773 767
 		if err := d.nlh.LinkDel(n.bridge.Link); err != nil {
... ...
@@ -330,11 +330,15 @@ func (nDB *NetworkDB) bulkSyncTables() {
330 330
 		// successfully completed bulk sync in this iteration.
331 331
 		updatedNetworks := make([]string, 0, len(networks))
332 332
 		for _, nid := range networks {
333
+			var found bool
333 334
 			for _, completedNid := range completed {
334 335
 				if nid == completedNid {
335
-					continue
336
+					found = true
337
+					break
336 338
 				}
339
+			}
337 340
 
341
+			if !found {
338 342
 				updatedNetworks = append(updatedNetworks, nid)
339 343
 			}
340 344
 		}
... ...
@@ -449,8 +453,9 @@ func (nDB *NetworkDB) bulkSyncNode(networks []string, node string, unsolicited b
449 449
 	// Wait on a response only if it is unsolicited.
450 450
 	if unsolicited {
451 451
 		startTime := time.Now()
452
+		t := time.NewTimer(30 * time.Second)
452 453
 		select {
453
-		case <-time.After(30 * time.Second):
454
+		case <-t.C:
454 455
 			logrus.Errorf("Bulk sync to node %s timed out", node)
455 456
 		case <-ch:
456 457
 			nDB.Lock()
... ...
@@ -459,6 +464,7 @@ func (nDB *NetworkDB) bulkSyncNode(networks []string, node string, unsolicited b
459 459
 
460 460
 			logrus.Debugf("%s: Bulk sync to node %s took %s", nDB.config.NodeName, node, time.Now().Sub(startTime))
461 461
 		}
462
+		t.Stop()
462 463
 	}
463 464
 
464 465
 	return nil
... ...
@@ -68,23 +68,24 @@ func (sb *sandbox) processOptions(options ...SandboxOption) {
68 68
 type epHeap []*endpoint
69 69
 
70 70
 type sandbox struct {
71
-	id            string
72
-	containerID   string
73
-	config        containerConfig
74
-	extDNS        []string
75
-	osSbox        osl.Sandbox
76
-	controller    *controller
77
-	resolver      Resolver
78
-	resolverOnce  sync.Once
79
-	refCnt        int
80
-	endpoints     epHeap
81
-	epPriority    map[string]int
82
-	joinLeaveDone chan struct{}
83
-	dbIndex       uint64
84
-	dbExists      bool
85
-	isStub        bool
86
-	inDelete      bool
87
-	ingress       bool
71
+	id                 string
72
+	containerID        string
73
+	config             containerConfig
74
+	extDNS             []string
75
+	osSbox             osl.Sandbox
76
+	controller         *controller
77
+	resolver           Resolver
78
+	resolverOnce       sync.Once
79
+	refCnt             int
80
+	endpoints          epHeap
81
+	epPriority         map[string]int
82
+	populatedEndpoints map[string]struct{}
83
+	joinLeaveDone      chan struct{}
84
+	dbIndex            uint64
85
+	dbExists           bool
86
+	isStub             bool
87
+	inDelete           bool
88
+	ingress            bool
88 89
 	sync.Mutex
89 90
 }
90 91
 
... ...
@@ -728,7 +729,7 @@ func (sb *sandbox) restoreOslSandbox() error {
728 728
 			}
729 729
 		}
730 730
 		if ep.needResolver() {
731
-			sb.startResolver()
731
+			sb.startResolver(true)
732 732
 		}
733 733
 	}
734 734
 
... ...
@@ -761,7 +762,7 @@ func (sb *sandbox) populateNetworkResources(ep *endpoint) error {
761 761
 	ep.Unlock()
762 762
 
763 763
 	if ep.needResolver() {
764
-		sb.startResolver()
764
+		sb.startResolver(false)
765 765
 	}
766 766
 
767 767
 	if i != nil && i.srcName != "" {
... ...
@@ -798,6 +799,12 @@ func (sb *sandbox) populateNetworkResources(ep *endpoint) error {
798 798
 		}
799 799
 	}
800 800
 
801
+	// Make sure to add the endpoint to the populated endpoint set
802
+	// before populating loadbalancers.
803
+	sb.Lock()
804
+	sb.populatedEndpoints[ep.ID()] = struct{}{}
805
+	sb.Unlock()
806
+
801 807
 	// Populate load balancer only after updating all the other
802 808
 	// information including gateway and other routes so that
803 809
 	// loadbalancers are populated all the network state is in
... ...
@@ -830,6 +837,7 @@ func (sb *sandbox) clearNetworkResources(origEp *endpoint) error {
830 830
 		releaseOSSboxResources(osSbox, ep)
831 831
 	}
832 832
 
833
+	delete(sb.populatedEndpoints, ep.ID())
833 834
 	sb.Lock()
834 835
 	if len(sb.endpoints) == 0 {
835 836
 		// sb.endpoints should never be empty and this is unexpected error condition
... ...
@@ -879,6 +887,13 @@ func (sb *sandbox) clearNetworkResources(origEp *endpoint) error {
879 879
 	return nil
880 880
 }
881 881
 
882
+func (sb *sandbox) isEndpointPopulated(ep *endpoint) bool {
883
+	sb.Lock()
884
+	_, ok := sb.populatedEndpoints[ep.ID()]
885
+	sb.Unlock()
886
+	return ok
887
+}
888
+
882 889
 // joinLeaveStart waits to ensure there are no joins or leaves in progress and
883 890
 // marks this join/leave in progress without race
884 891
 func (sb *sandbox) joinLeaveStart() {
... ...
@@ -21,7 +21,7 @@ const (
21 21
 	filePerm      = 0644
22 22
 )
23 23
 
24
-func (sb *sandbox) startResolver() {
24
+func (sb *sandbox) startResolver(restore bool) {
25 25
 	sb.resolverOnce.Do(func() {
26 26
 		var err error
27 27
 		sb.resolver = NewResolver(sb)
... ...
@@ -31,10 +31,16 @@ func (sb *sandbox) startResolver() {
31 31
 			}
32 32
 		}()
33 33
 
34
-		err = sb.rebuildDNS()
35
-		if err != nil {
36
-			log.Errorf("Updating resolv.conf failed for container %s, %q", sb.ContainerID(), err)
37
-			return
34
+		// In the case of live restore container is already running with
35
+		// right resolv.conf contents created before. Just update the
36
+		// external DNS servers from the restored sandbox for embedded
37
+		// server to use.
38
+		if !restore {
39
+			err = sb.rebuildDNS()
40
+			if err != nil {
41
+				log.Errorf("Updating resolv.conf failed for container %s, %q", sb.ContainerID(), err)
42
+				return
43
+			}
38 44
 		}
39 45
 		sb.resolver.SetExtServers(sb.extDNS)
40 46
 
... ...
@@ -8,7 +8,7 @@ import (
8 8
 
9 9
 // Stub implementations for DNS related functions
10 10
 
11
-func (sb *sandbox) startResolver() {
11
+func (sb *sandbox) startResolver(bool) {
12 12
 }
13 13
 
14 14
 func (sb *sandbox) setupResolutionFiles() error {
... ...
@@ -27,6 +27,7 @@ type sbState struct {
27 27
 	dbExists   bool
28 28
 	Eps        []epState
29 29
 	EpPriority map[string]int
30
+	ExtDNS     []string
30 31
 }
31 32
 
32 33
 func (sbs *sbState) Key() []string {
... ...
@@ -113,6 +114,10 @@ func (sbs *sbState) CopyTo(o datastore.KVObject) error {
113 113
 		dstSbs.Eps = append(dstSbs.Eps, eps)
114 114
 	}
115 115
 
116
+	for _, dns := range sbs.ExtDNS {
117
+		dstSbs.ExtDNS = append(dstSbs.ExtDNS, dns)
118
+	}
119
+
116 120
 	return nil
117 121
 }
118 122
 
... ...
@@ -126,6 +131,7 @@ func (sb *sandbox) storeUpdate() error {
126 126
 		ID:         sb.id,
127 127
 		Cid:        sb.containerID,
128 128
 		EpPriority: sb.epPriority,
129
+		ExtDNS:     sb.extDNS,
129 130
 	}
130 131
 
131 132
 retry:
... ...
@@ -191,13 +197,15 @@ func (c *controller) sandboxCleanup(activeSandboxes map[string]interface{}) {
191 191
 		sbs := kvo.(*sbState)
192 192
 
193 193
 		sb := &sandbox{
194
-			id:          sbs.ID,
195
-			controller:  sbs.c,
196
-			containerID: sbs.Cid,
197
-			endpoints:   epHeap{},
198
-			dbIndex:     sbs.dbIndex,
199
-			isStub:      true,
200
-			dbExists:    true,
194
+			id:                 sbs.ID,
195
+			controller:         sbs.c,
196
+			containerID:        sbs.Cid,
197
+			endpoints:          epHeap{},
198
+			populatedEndpoints: map[string]struct{}{},
199
+			dbIndex:            sbs.dbIndex,
200
+			isStub:             true,
201
+			dbExists:           true,
202
+			extDNS:             sbs.ExtDNS,
201 203
 		}
202 204
 
203 205
 		msg := " for cleanup"
... ...
@@ -184,14 +184,20 @@ func (c *controller) rmServiceBinding(name, sid, nid, eid string, vip net.IP, in
184 184
 func (n *network) connectedLoadbalancers() []*loadBalancer {
185 185
 	c := n.getController()
186 186
 
187
+	serviceBindings := make([]*service, 0, len(c.serviceBindings))
187 188
 	c.Lock()
188
-	defer c.Unlock()
189
+	for _, s := range c.serviceBindings {
190
+		serviceBindings = append(serviceBindings, s)
191
+	}
192
+	c.Unlock()
189 193
 
190 194
 	var lbs []*loadBalancer
191
-	for _, s := range c.serviceBindings {
195
+	for _, s := range serviceBindings {
196
+		s.Lock()
192 197
 		if lb, ok := s.loadBalancers[n.ID()]; ok {
193 198
 			lbs = append(lbs, lb)
194 199
 		}
200
+		s.Unlock()
195 201
 	}
196 202
 
197 203
 	return lbs
... ...
@@ -229,12 +235,14 @@ func (sb *sandbox) populateLoadbalancers(ep *endpoint) {
229 229
 			continue
230 230
 		}
231 231
 
232
+		lb.service.Lock()
232 233
 		addService := true
233 234
 		for _, ip := range lb.backEnds {
234 235
 			sb.addLBBackend(ip, lb.vip, lb.fwMark, lb.service.ingressPorts,
235 236
 				eIP, gwIP, addService)
236 237
 			addService = false
237 238
 		}
239
+		lb.service.Unlock()
238 240
 	}
239 241
 }
240 242
 
... ...
@@ -245,6 +253,10 @@ func (n *network) addLBBackend(ip, vip net.IP, fwMark uint32, ingressPorts []*Po
245 245
 	n.WalkEndpoints(func(e Endpoint) bool {
246 246
 		ep := e.(*endpoint)
247 247
 		if sb, ok := ep.getSandbox(); ok {
248
+			if !sb.isEndpointPopulated(ep) {
249
+				return false
250
+			}
251
+
248 252
 			var gwIP net.IP
249 253
 			if ep := sb.getGatewayEndpoint(); ep != nil {
250 254
 				gwIP = ep.Iface().Address().IP
... ...
@@ -264,6 +276,10 @@ func (n *network) rmLBBackend(ip, vip net.IP, fwMark uint32, ingressPorts []*Por
264 264
 	n.WalkEndpoints(func(e Endpoint) bool {
265 265
 		ep := e.(*endpoint)
266 266
 		if sb, ok := ep.getSandbox(); ok {
267
+			if !sb.isEndpointPopulated(ep) {
268
+				return false
269
+			}
270
+
267 271
 			var gwIP net.IP
268 272
 			if ep := sb.getGatewayEndpoint(); ep != nil {
269 273
 				gwIP = ep.Iface().Address().IP
... ...
@@ -356,15 +372,13 @@ func (sb *sandbox) rmLBBackend(ip, vip net.IP, fwMark uint32, ingressPorts []*Po
356 356
 	}
357 357
 
358 358
 	if err := i.DelDestination(s, d); err != nil {
359
-		logrus.Errorf("Failed to delete real server %s for vip %s fwmark %d: %v", ip, vip, fwMark, err)
360
-		return
359
+		logrus.Infof("Failed to delete real server %s for vip %s fwmark %d: %v", ip, vip, fwMark, err)
361 360
 	}
362 361
 
363 362
 	if rmService {
364 363
 		s.SchedName = ipvs.RoundRobin
365 364
 		if err := i.DelService(s); err != nil {
366
-			logrus.Errorf("Failed to create a new service for vip %s fwmark %d: %v", vip, fwMark, err)
367
-			return
365
+			logrus.Errorf("Failed to delete a new service for vip %s fwmark %d: %v", vip, fwMark, err)
368 366
 		}
369 367
 
370 368
 		var iPorts []*PortConfig
... ...
@@ -372,13 +386,11 @@ func (sb *sandbox) rmLBBackend(ip, vip net.IP, fwMark uint32, ingressPorts []*Po
372 372
 			iPorts = ingressPorts
373 373
 			if err := programIngress(gwIP, iPorts, true); err != nil {
374 374
 				logrus.Errorf("Failed to delete ingress: %v", err)
375
-				return
376 375
 			}
377 376
 		}
378 377
 
379 378
 		if err := invokeFWMarker(sb.Key(), vip, fwMark, iPorts, eIP, true); err != nil {
380 379
 			logrus.Errorf("Failed to add firewall mark rule in sbox %s: %v", sb.Key(), err)
381
-			return
382 380
 		}
383 381
 	}
384 382
 }
... ...
@@ -454,12 +466,17 @@ func programIngress(gwIP net.IP, ingressPorts []*PortConfig, isDelete bool) erro
454 454
 			rule := strings.Fields(fmt.Sprintf("-t nat %s %s -p %s --dport %d -j DNAT --to-destination %s:%d",
455 455
 				addDelOpt, ingressChain, strings.ToLower(PortConfig_Protocol_name[int32(iPort.Protocol)]), iPort.PublishedPort, gwIP, iPort.PublishedPort))
456 456
 			if err := iptables.RawCombinedOutput(rule...); err != nil {
457
-				return fmt.Errorf("setting up rule failed, %v: %v", rule, err)
457
+				errStr := fmt.Sprintf("setting up rule failed, %v: %v", rule, err)
458
+				if !isDelete {
459
+					return fmt.Errorf("%s", errStr)
460
+				}
461
+
462
+				logrus.Infof("%s", errStr)
458 463
 			}
459 464
 		}
460 465
 
461 466
 		if err := plumbProxy(iPort, isDelete); err != nil {
462
-			return fmt.Errorf("failed to create proxy for port %d: %v", iPort.PublishedPort, err)
467
+			logrus.Warnf("failed to create proxy for port %d: %v", iPort.PublishedPort, err)
463 468
 		}
464 469
 	}
465 470