* fixes https://github.com/docker/docker/issues/23622
* fixes a memory leak issue with bulk sync
* fixes external DNS resolution issue after live restore
Signed-off-by: Madhu Venugopal <madhu@docker.com>
| ... | ... |
@@ -65,7 +65,7 @@ clone git github.com/RackSec/srslog 259aed10dfa74ea2961eddd1d9847619f6e98837 |
| 65 | 65 |
clone git github.com/imdario/mergo 0.2.1 |
| 66 | 66 |
|
| 67 | 67 |
#get libnetwork packages |
| 68 |
-clone git github.com/docker/libnetwork 96d45528599c32354230480a1ebc0657cd4d077f |
|
| 68 |
+clone git github.com/docker/libnetwork caf22bd9a6a53dfe91b0266274155bc69235e8ed |
|
| 69 | 69 |
clone git github.com/docker/go-events 39718a26497694185f8fb58a7d6f31947f3dc42d |
| 70 | 70 |
clone git github.com/armon/go-radix e39d623f12e8e41c7b5529e9a9dd67a1e2261f80 |
| 71 | 71 |
clone git github.com/armon/go-metrics eb0af217e5e9747e41dd5303755356b62d28e3ec |
| ... | ... |
@@ -810,12 +810,13 @@ func (c *controller) NewSandbox(containerID string, options ...SandboxOption) (s |
| 810 | 810 |
// Create sandbox and process options first. Key generation depends on an option |
| 811 | 811 |
if sb == nil {
|
| 812 | 812 |
sb = &sandbox{
|
| 813 |
- id: stringid.GenerateRandomID(), |
|
| 814 |
- containerID: containerID, |
|
| 815 |
- endpoints: epHeap{},
|
|
| 816 |
- epPriority: map[string]int{},
|
|
| 817 |
- config: containerConfig{},
|
|
| 818 |
- controller: c, |
|
| 813 |
+ id: stringid.GenerateRandomID(), |
|
| 814 |
+ containerID: containerID, |
|
| 815 |
+ endpoints: epHeap{},
|
|
| 816 |
+ epPriority: map[string]int{},
|
|
| 817 |
+ populatedEndpoints: map[string]struct{}{},
|
|
| 818 |
+ config: containerConfig{},
|
|
| 819 |
+ controller: c, |
|
| 819 | 820 |
} |
| 820 | 821 |
} |
| 821 | 822 |
sBox = sb |
| ... | ... |
@@ -588,13 +588,26 @@ func (d *driver) createNetwork(config *networkConfiguration) error {
|
| 588 | 588 |
defer osl.InitOSContext()() |
| 589 | 589 |
|
| 590 | 590 |
networkList := d.getNetworks() |
| 591 |
- for _, nw := range networkList {
|
|
| 591 |
+ for i, nw := range networkList {
|
|
| 592 | 592 |
nw.Lock() |
| 593 | 593 |
nwConfig := nw.config |
| 594 | 594 |
nw.Unlock() |
| 595 | 595 |
if err := nwConfig.Conflicts(config); err != nil {
|
| 596 |
- return types.ForbiddenErrorf("cannot create network %s (%s): conflicts with network %s (%s): %s",
|
|
| 597 |
- config.ID, config.BridgeName, nwConfig.ID, nwConfig.BridgeName, err.Error()) |
|
| 596 |
+ if config.DefaultBridge {
|
|
| 597 |
+ // We encountered and identified a stale default network |
|
| 598 |
+ // We must delete it as libnetwork is the source of thruth |
|
| 599 |
+ // The default network being created must be the only one |
|
| 600 |
+ // This can happen only from docker 1.12 on ward |
|
| 601 |
+ logrus.Infof("Removing stale default bridge network %s (%s)", nwConfig.ID, nwConfig.BridgeName)
|
|
| 602 |
+ if err := d.DeleteNetwork(nwConfig.ID); err != nil {
|
|
| 603 |
+ logrus.Warnf("Failed to remove stale default network: %s (%s): %v. Will remove from store.", nwConfig.ID, nwConfig.BridgeName, err)
|
|
| 604 |
+ d.storeDelete(nwConfig) |
|
| 605 |
+ } |
|
| 606 |
+ networkList = append(networkList[:i], networkList[i+1:]...) |
|
| 607 |
+ } else {
|
|
| 608 |
+ return types.ForbiddenErrorf("cannot create network %s (%s): conflicts with network %s (%s): %s",
|
|
| 609 |
+ config.ID, config.BridgeName, nwConfig.ID, nwConfig.BridgeName, err.Error()) |
|
| 610 |
+ } |
|
| 598 | 611 |
} |
| 599 | 612 |
} |
| 600 | 613 |
|
| ... | ... |
@@ -762,12 +775,6 @@ func (d *driver) DeleteNetwork(nid string) error {
|
| 762 | 762 |
return err |
| 763 | 763 |
} |
| 764 | 764 |
|
| 765 |
- // Cannot remove network if endpoints are still present |
|
| 766 |
- if len(n.endpoints) != 0 {
|
|
| 767 |
- err = ActiveEndpointsError(n.id) |
|
| 768 |
- return err |
|
| 769 |
- } |
|
| 770 |
- |
|
| 771 | 765 |
// We only delete the bridge when it's not the default bridge. This is keep the backward compatible behavior. |
| 772 | 766 |
if !config.DefaultBridge {
|
| 773 | 767 |
if err := d.nlh.LinkDel(n.bridge.Link); err != nil {
|
| ... | ... |
@@ -330,11 +330,15 @@ func (nDB *NetworkDB) bulkSyncTables() {
|
| 330 | 330 |
// successfully completed bulk sync in this iteration. |
| 331 | 331 |
updatedNetworks := make([]string, 0, len(networks)) |
| 332 | 332 |
for _, nid := range networks {
|
| 333 |
+ var found bool |
|
| 333 | 334 |
for _, completedNid := range completed {
|
| 334 | 335 |
if nid == completedNid {
|
| 335 |
- continue |
|
| 336 |
+ found = true |
|
| 337 |
+ break |
|
| 336 | 338 |
} |
| 339 |
+ } |
|
| 337 | 340 |
|
| 341 |
+ if !found {
|
|
| 338 | 342 |
updatedNetworks = append(updatedNetworks, nid) |
| 339 | 343 |
} |
| 340 | 344 |
} |
| ... | ... |
@@ -449,8 +453,9 @@ func (nDB *NetworkDB) bulkSyncNode(networks []string, node string, unsolicited b |
| 449 | 449 |
// Wait on a response only if it is unsolicited. |
| 450 | 450 |
if unsolicited {
|
| 451 | 451 |
startTime := time.Now() |
| 452 |
+ t := time.NewTimer(30 * time.Second) |
|
| 452 | 453 |
select {
|
| 453 |
- case <-time.After(30 * time.Second): |
|
| 454 |
+ case <-t.C: |
|
| 454 | 455 |
logrus.Errorf("Bulk sync to node %s timed out", node)
|
| 455 | 456 |
case <-ch: |
| 456 | 457 |
nDB.Lock() |
| ... | ... |
@@ -459,6 +464,7 @@ func (nDB *NetworkDB) bulkSyncNode(networks []string, node string, unsolicited b |
| 459 | 459 |
|
| 460 | 460 |
logrus.Debugf("%s: Bulk sync to node %s took %s", nDB.config.NodeName, node, time.Now().Sub(startTime))
|
| 461 | 461 |
} |
| 462 |
+ t.Stop() |
|
| 462 | 463 |
} |
| 463 | 464 |
|
| 464 | 465 |
return nil |
| ... | ... |
@@ -68,23 +68,24 @@ func (sb *sandbox) processOptions(options ...SandboxOption) {
|
| 68 | 68 |
type epHeap []*endpoint |
| 69 | 69 |
|
| 70 | 70 |
type sandbox struct {
|
| 71 |
- id string |
|
| 72 |
- containerID string |
|
| 73 |
- config containerConfig |
|
| 74 |
- extDNS []string |
|
| 75 |
- osSbox osl.Sandbox |
|
| 76 |
- controller *controller |
|
| 77 |
- resolver Resolver |
|
| 78 |
- resolverOnce sync.Once |
|
| 79 |
- refCnt int |
|
| 80 |
- endpoints epHeap |
|
| 81 |
- epPriority map[string]int |
|
| 82 |
- joinLeaveDone chan struct{}
|
|
| 83 |
- dbIndex uint64 |
|
| 84 |
- dbExists bool |
|
| 85 |
- isStub bool |
|
| 86 |
- inDelete bool |
|
| 87 |
- ingress bool |
|
| 71 |
+ id string |
|
| 72 |
+ containerID string |
|
| 73 |
+ config containerConfig |
|
| 74 |
+ extDNS []string |
|
| 75 |
+ osSbox osl.Sandbox |
|
| 76 |
+ controller *controller |
|
| 77 |
+ resolver Resolver |
|
| 78 |
+ resolverOnce sync.Once |
|
| 79 |
+ refCnt int |
|
| 80 |
+ endpoints epHeap |
|
| 81 |
+ epPriority map[string]int |
|
| 82 |
+ populatedEndpoints map[string]struct{}
|
|
| 83 |
+ joinLeaveDone chan struct{}
|
|
| 84 |
+ dbIndex uint64 |
|
| 85 |
+ dbExists bool |
|
| 86 |
+ isStub bool |
|
| 87 |
+ inDelete bool |
|
| 88 |
+ ingress bool |
|
| 88 | 89 |
sync.Mutex |
| 89 | 90 |
} |
| 90 | 91 |
|
| ... | ... |
@@ -728,7 +729,7 @@ func (sb *sandbox) restoreOslSandbox() error {
|
| 728 | 728 |
} |
| 729 | 729 |
} |
| 730 | 730 |
if ep.needResolver() {
|
| 731 |
- sb.startResolver() |
|
| 731 |
+ sb.startResolver(true) |
|
| 732 | 732 |
} |
| 733 | 733 |
} |
| 734 | 734 |
|
| ... | ... |
@@ -761,7 +762,7 @@ func (sb *sandbox) populateNetworkResources(ep *endpoint) error {
|
| 761 | 761 |
ep.Unlock() |
| 762 | 762 |
|
| 763 | 763 |
if ep.needResolver() {
|
| 764 |
- sb.startResolver() |
|
| 764 |
+ sb.startResolver(false) |
|
| 765 | 765 |
} |
| 766 | 766 |
|
| 767 | 767 |
if i != nil && i.srcName != "" {
|
| ... | ... |
@@ -798,6 +799,12 @@ func (sb *sandbox) populateNetworkResources(ep *endpoint) error {
|
| 798 | 798 |
} |
| 799 | 799 |
} |
| 800 | 800 |
|
| 801 |
+ // Make sure to add the endpoint to the populated endpoint set |
|
| 802 |
+ // before populating loadbalancers. |
|
| 803 |
+ sb.Lock() |
|
| 804 |
+ sb.populatedEndpoints[ep.ID()] = struct{}{}
|
|
| 805 |
+ sb.Unlock() |
|
| 806 |
+ |
|
| 801 | 807 |
// Populate load balancer only after updating all the other |
| 802 | 808 |
// information including gateway and other routes so that |
| 803 | 809 |
// loadbalancers are populated all the network state is in |
| ... | ... |
@@ -830,6 +837,7 @@ func (sb *sandbox) clearNetworkResources(origEp *endpoint) error {
|
| 830 | 830 |
releaseOSSboxResources(osSbox, ep) |
| 831 | 831 |
} |
| 832 | 832 |
|
| 833 |
+ delete(sb.populatedEndpoints, ep.ID()) |
|
| 833 | 834 |
sb.Lock() |
| 834 | 835 |
if len(sb.endpoints) == 0 {
|
| 835 | 836 |
// sb.endpoints should never be empty and this is unexpected error condition |
| ... | ... |
@@ -879,6 +887,13 @@ func (sb *sandbox) clearNetworkResources(origEp *endpoint) error {
|
| 879 | 879 |
return nil |
| 880 | 880 |
} |
| 881 | 881 |
|
| 882 |
+func (sb *sandbox) isEndpointPopulated(ep *endpoint) bool {
|
|
| 883 |
+ sb.Lock() |
|
| 884 |
+ _, ok := sb.populatedEndpoints[ep.ID()] |
|
| 885 |
+ sb.Unlock() |
|
| 886 |
+ return ok |
|
| 887 |
+} |
|
| 888 |
+ |
|
| 882 | 889 |
// joinLeaveStart waits to ensure there are no joins or leaves in progress and |
| 883 | 890 |
// marks this join/leave in progress without race |
| 884 | 891 |
func (sb *sandbox) joinLeaveStart() {
|
| ... | ... |
@@ -21,7 +21,7 @@ const ( |
| 21 | 21 |
filePerm = 0644 |
| 22 | 22 |
) |
| 23 | 23 |
|
| 24 |
-func (sb *sandbox) startResolver() {
|
|
| 24 |
+func (sb *sandbox) startResolver(restore bool) {
|
|
| 25 | 25 |
sb.resolverOnce.Do(func() {
|
| 26 | 26 |
var err error |
| 27 | 27 |
sb.resolver = NewResolver(sb) |
| ... | ... |
@@ -31,10 +31,16 @@ func (sb *sandbox) startResolver() {
|
| 31 | 31 |
} |
| 32 | 32 |
}() |
| 33 | 33 |
|
| 34 |
- err = sb.rebuildDNS() |
|
| 35 |
- if err != nil {
|
|
| 36 |
- log.Errorf("Updating resolv.conf failed for container %s, %q", sb.ContainerID(), err)
|
|
| 37 |
- return |
|
| 34 |
+ // In the case of live restore container is already running with |
|
| 35 |
+ // right resolv.conf contents created before. Just update the |
|
| 36 |
+ // external DNS servers from the restored sandbox for embedded |
|
| 37 |
+ // server to use. |
|
| 38 |
+ if !restore {
|
|
| 39 |
+ err = sb.rebuildDNS() |
|
| 40 |
+ if err != nil {
|
|
| 41 |
+ log.Errorf("Updating resolv.conf failed for container %s, %q", sb.ContainerID(), err)
|
|
| 42 |
+ return |
|
| 43 |
+ } |
|
| 38 | 44 |
} |
| 39 | 45 |
sb.resolver.SetExtServers(sb.extDNS) |
| 40 | 46 |
|
| ... | ... |
@@ -27,6 +27,7 @@ type sbState struct {
|
| 27 | 27 |
dbExists bool |
| 28 | 28 |
Eps []epState |
| 29 | 29 |
EpPriority map[string]int |
| 30 |
+ ExtDNS []string |
|
| 30 | 31 |
} |
| 31 | 32 |
|
| 32 | 33 |
func (sbs *sbState) Key() []string {
|
| ... | ... |
@@ -113,6 +114,10 @@ func (sbs *sbState) CopyTo(o datastore.KVObject) error {
|
| 113 | 113 |
dstSbs.Eps = append(dstSbs.Eps, eps) |
| 114 | 114 |
} |
| 115 | 115 |
|
| 116 |
+ for _, dns := range sbs.ExtDNS {
|
|
| 117 |
+ dstSbs.ExtDNS = append(dstSbs.ExtDNS, dns) |
|
| 118 |
+ } |
|
| 119 |
+ |
|
| 116 | 120 |
return nil |
| 117 | 121 |
} |
| 118 | 122 |
|
| ... | ... |
@@ -126,6 +131,7 @@ func (sb *sandbox) storeUpdate() error {
|
| 126 | 126 |
ID: sb.id, |
| 127 | 127 |
Cid: sb.containerID, |
| 128 | 128 |
EpPriority: sb.epPriority, |
| 129 |
+ ExtDNS: sb.extDNS, |
|
| 129 | 130 |
} |
| 130 | 131 |
|
| 131 | 132 |
retry: |
| ... | ... |
@@ -191,13 +197,15 @@ func (c *controller) sandboxCleanup(activeSandboxes map[string]interface{}) {
|
| 191 | 191 |
sbs := kvo.(*sbState) |
| 192 | 192 |
|
| 193 | 193 |
sb := &sandbox{
|
| 194 |
- id: sbs.ID, |
|
| 195 |
- controller: sbs.c, |
|
| 196 |
- containerID: sbs.Cid, |
|
| 197 |
- endpoints: epHeap{},
|
|
| 198 |
- dbIndex: sbs.dbIndex, |
|
| 199 |
- isStub: true, |
|
| 200 |
- dbExists: true, |
|
| 194 |
+ id: sbs.ID, |
|
| 195 |
+ controller: sbs.c, |
|
| 196 |
+ containerID: sbs.Cid, |
|
| 197 |
+ endpoints: epHeap{},
|
|
| 198 |
+ populatedEndpoints: map[string]struct{}{},
|
|
| 199 |
+ dbIndex: sbs.dbIndex, |
|
| 200 |
+ isStub: true, |
|
| 201 |
+ dbExists: true, |
|
| 202 |
+ extDNS: sbs.ExtDNS, |
|
| 201 | 203 |
} |
| 202 | 204 |
|
| 203 | 205 |
msg := " for cleanup" |
| ... | ... |
@@ -184,14 +184,20 @@ func (c *controller) rmServiceBinding(name, sid, nid, eid string, vip net.IP, in |
| 184 | 184 |
func (n *network) connectedLoadbalancers() []*loadBalancer {
|
| 185 | 185 |
c := n.getController() |
| 186 | 186 |
|
| 187 |
+ serviceBindings := make([]*service, 0, len(c.serviceBindings)) |
|
| 187 | 188 |
c.Lock() |
| 188 |
- defer c.Unlock() |
|
| 189 |
+ for _, s := range c.serviceBindings {
|
|
| 190 |
+ serviceBindings = append(serviceBindings, s) |
|
| 191 |
+ } |
|
| 192 |
+ c.Unlock() |
|
| 189 | 193 |
|
| 190 | 194 |
var lbs []*loadBalancer |
| 191 |
- for _, s := range c.serviceBindings {
|
|
| 195 |
+ for _, s := range serviceBindings {
|
|
| 196 |
+ s.Lock() |
|
| 192 | 197 |
if lb, ok := s.loadBalancers[n.ID()]; ok {
|
| 193 | 198 |
lbs = append(lbs, lb) |
| 194 | 199 |
} |
| 200 |
+ s.Unlock() |
|
| 195 | 201 |
} |
| 196 | 202 |
|
| 197 | 203 |
return lbs |
| ... | ... |
@@ -229,12 +235,14 @@ func (sb *sandbox) populateLoadbalancers(ep *endpoint) {
|
| 229 | 229 |
continue |
| 230 | 230 |
} |
| 231 | 231 |
|
| 232 |
+ lb.service.Lock() |
|
| 232 | 233 |
addService := true |
| 233 | 234 |
for _, ip := range lb.backEnds {
|
| 234 | 235 |
sb.addLBBackend(ip, lb.vip, lb.fwMark, lb.service.ingressPorts, |
| 235 | 236 |
eIP, gwIP, addService) |
| 236 | 237 |
addService = false |
| 237 | 238 |
} |
| 239 |
+ lb.service.Unlock() |
|
| 238 | 240 |
} |
| 239 | 241 |
} |
| 240 | 242 |
|
| ... | ... |
@@ -245,6 +253,10 @@ func (n *network) addLBBackend(ip, vip net.IP, fwMark uint32, ingressPorts []*Po |
| 245 | 245 |
n.WalkEndpoints(func(e Endpoint) bool {
|
| 246 | 246 |
ep := e.(*endpoint) |
| 247 | 247 |
if sb, ok := ep.getSandbox(); ok {
|
| 248 |
+ if !sb.isEndpointPopulated(ep) {
|
|
| 249 |
+ return false |
|
| 250 |
+ } |
|
| 251 |
+ |
|
| 248 | 252 |
var gwIP net.IP |
| 249 | 253 |
if ep := sb.getGatewayEndpoint(); ep != nil {
|
| 250 | 254 |
gwIP = ep.Iface().Address().IP |
| ... | ... |
@@ -264,6 +276,10 @@ func (n *network) rmLBBackend(ip, vip net.IP, fwMark uint32, ingressPorts []*Por |
| 264 | 264 |
n.WalkEndpoints(func(e Endpoint) bool {
|
| 265 | 265 |
ep := e.(*endpoint) |
| 266 | 266 |
if sb, ok := ep.getSandbox(); ok {
|
| 267 |
+ if !sb.isEndpointPopulated(ep) {
|
|
| 268 |
+ return false |
|
| 269 |
+ } |
|
| 270 |
+ |
|
| 267 | 271 |
var gwIP net.IP |
| 268 | 272 |
if ep := sb.getGatewayEndpoint(); ep != nil {
|
| 269 | 273 |
gwIP = ep.Iface().Address().IP |
| ... | ... |
@@ -356,15 +372,13 @@ func (sb *sandbox) rmLBBackend(ip, vip net.IP, fwMark uint32, ingressPorts []*Po |
| 356 | 356 |
} |
| 357 | 357 |
|
| 358 | 358 |
if err := i.DelDestination(s, d); err != nil {
|
| 359 |
- logrus.Errorf("Failed to delete real server %s for vip %s fwmark %d: %v", ip, vip, fwMark, err)
|
|
| 360 |
- return |
|
| 359 |
+ logrus.Infof("Failed to delete real server %s for vip %s fwmark %d: %v", ip, vip, fwMark, err)
|
|
| 361 | 360 |
} |
| 362 | 361 |
|
| 363 | 362 |
if rmService {
|
| 364 | 363 |
s.SchedName = ipvs.RoundRobin |
| 365 | 364 |
if err := i.DelService(s); err != nil {
|
| 366 |
- logrus.Errorf("Failed to create a new service for vip %s fwmark %d: %v", vip, fwMark, err)
|
|
| 367 |
- return |
|
| 365 |
+ logrus.Errorf("Failed to delete a new service for vip %s fwmark %d: %v", vip, fwMark, err)
|
|
| 368 | 366 |
} |
| 369 | 367 |
|
| 370 | 368 |
var iPorts []*PortConfig |
| ... | ... |
@@ -372,13 +386,11 @@ func (sb *sandbox) rmLBBackend(ip, vip net.IP, fwMark uint32, ingressPorts []*Po |
| 372 | 372 |
iPorts = ingressPorts |
| 373 | 373 |
if err := programIngress(gwIP, iPorts, true); err != nil {
|
| 374 | 374 |
logrus.Errorf("Failed to delete ingress: %v", err)
|
| 375 |
- return |
|
| 376 | 375 |
} |
| 377 | 376 |
} |
| 378 | 377 |
|
| 379 | 378 |
if err := invokeFWMarker(sb.Key(), vip, fwMark, iPorts, eIP, true); err != nil {
|
| 380 | 379 |
logrus.Errorf("Failed to add firewall mark rule in sbox %s: %v", sb.Key(), err)
|
| 381 |
- return |
|
| 382 | 380 |
} |
| 383 | 381 |
} |
| 384 | 382 |
} |
| ... | ... |
@@ -454,12 +466,17 @@ func programIngress(gwIP net.IP, ingressPorts []*PortConfig, isDelete bool) erro |
| 454 | 454 |
rule := strings.Fields(fmt.Sprintf("-t nat %s %s -p %s --dport %d -j DNAT --to-destination %s:%d",
|
| 455 | 455 |
addDelOpt, ingressChain, strings.ToLower(PortConfig_Protocol_name[int32(iPort.Protocol)]), iPort.PublishedPort, gwIP, iPort.PublishedPort)) |
| 456 | 456 |
if err := iptables.RawCombinedOutput(rule...); err != nil {
|
| 457 |
- return fmt.Errorf("setting up rule failed, %v: %v", rule, err)
|
|
| 457 |
+ errStr := fmt.Sprintf("setting up rule failed, %v: %v", rule, err)
|
|
| 458 |
+ if !isDelete {
|
|
| 459 |
+ return fmt.Errorf("%s", errStr)
|
|
| 460 |
+ } |
|
| 461 |
+ |
|
| 462 |
+ logrus.Infof("%s", errStr)
|
|
| 458 | 463 |
} |
| 459 | 464 |
} |
| 460 | 465 |
|
| 461 | 466 |
if err := plumbProxy(iPort, isDelete); err != nil {
|
| 462 |
- return fmt.Errorf("failed to create proxy for port %d: %v", iPort.PublishedPort, err)
|
|
| 467 |
+ logrus.Warnf("failed to create proxy for port %d: %v", iPort.PublishedPort, err)
|
|
| 463 | 468 |
} |
| 464 | 469 |
} |
| 465 | 470 |
|