Browse code

Release the network attachment on allocation failure

- otherwise the attachment task will stay in store and
consume IP addresses and there is no way to remove it.

Signed-off-by: Alessandro Boch <aboch@docker.com>

Alessandro Boch authored on 2017/02/16 12:53:51
Showing 2 changed files
... ...
@@ -1697,18 +1697,31 @@ func (c *Cluster) AttachNetwork(target string, containerID string, addresses []s
1697 1697
 	close(attachCompleteCh)
1698 1698
 	c.Unlock()
1699 1699
 
1700
-	logrus.Debugf("Successfully attached to network %s with tid %s", target, taskID)
1700
+	logrus.Debugf("Successfully attached to network %s with task id %s", target, taskID)
1701
+
1702
+	release := func() {
1703
+		ctx, cancel := c.getRequestContext()
1704
+		defer cancel()
1705
+		if err := agent.ResourceAllocator().DetachNetwork(ctx, taskID); err != nil {
1706
+			logrus.Errorf("Failed remove network attachment %s to network %s on allocation failure: %v",
1707
+				taskID, target, err)
1708
+		}
1709
+	}
1701 1710
 
1702 1711
 	var config *network.NetworkingConfig
1703 1712
 	select {
1704 1713
 	case config = <-attachWaitCh:
1705 1714
 	case <-ctx.Done():
1715
+		release()
1706 1716
 		return nil, fmt.Errorf("attaching to network failed, make sure your network options are correct and check manager logs: %v", ctx.Err())
1707 1717
 	}
1708 1718
 
1709 1719
 	c.Lock()
1710 1720
 	c.attachers[aKey].config = config
1711 1721
 	c.Unlock()
1722
+
1723
+	logrus.Debugf("Successfully allocated resources on network %s for task id %s", target, taskID)
1724
+
1712 1725
 	return config, nil
1713 1726
 }
1714 1727
 
... ...
@@ -420,6 +420,30 @@ func (s *DockerSwarmSuite) TestOverlayAttachableOnSwarmLeave(c *check.C) {
420 420
 	c.Assert(out, checker.Not(checker.Contains), nwName)
421 421
 }
422 422
 
423
+func (s *DockerSwarmSuite) TestOverlayAttachableReleaseResourcesOnFailure(c *check.C) {
424
+	d := s.AddDaemon(c, true, true)
425
+
426
+	// Create attachable network
427
+	out, err := d.Cmd("network", "create", "-d", "overlay", "--attachable", "--subnet", "10.10.9.0/24", "ovnet")
428
+	c.Assert(err, checker.IsNil, check.Commentf(out))
429
+
430
+	// Attach a container with specific IP
431
+	out, err = d.Cmd("run", "-d", "--network", "ovnet", "--name", "c1", "--ip", "10.10.9.33", "busybox", "top")
432
+	c.Assert(err, checker.IsNil, check.Commentf(out))
433
+
434
+	// Attempt to attach another contianer with same IP, must fail
435
+	_, err = d.Cmd("run", "-d", "--network", "ovnet", "--name", "c2", "--ip", "10.10.9.33", "busybox", "top")
436
+	c.Assert(err, checker.NotNil)
437
+
438
+	// Remove first container
439
+	out, err = d.Cmd("rm", "-f", "c1")
440
+	c.Assert(err, checker.IsNil, check.Commentf(out))
441
+
442
+	// Verify the network can be removed, no phantom network attachment task left over
443
+	out, err = d.Cmd("network", "rm", "ovnet")
444
+	c.Assert(err, checker.IsNil, check.Commentf(out))
445
+}
446
+
423 447
 func (s *DockerSwarmSuite) TestSwarmRemoveInternalNetwork(c *check.C) {
424 448
 	d := s.AddDaemon(c, true, true)
425 449