Browse code

Merge pull request #345 from thaJeztah/19.03_backport_swarm_flaky

[19.03 backport] integration-cli: fix swarm tests flakiness

Sebastiaan van Stijn authored on 2019/09/16 23:00:57
Showing 3 changed files
... ...
@@ -27,6 +27,7 @@ import (
27 27
 	"github.com/docker/docker/internal/test/request"
28 28
 	"github.com/docker/swarmkit/ca"
29 29
 	"github.com/go-check/check"
30
+	"github.com/pkg/errors"
30 31
 	"gotest.tools/assert"
31 32
 	is "gotest.tools/assert/cmp"
32 33
 )
... ...
@@ -313,13 +314,24 @@ func (s *DockerSwarmSuite) TestAPISwarmLeaderElection(c *check.C) {
313 313
 		leader    *daemon.Daemon   // keep track of leader
314 314
 		followers []*daemon.Daemon // keep track of followers
315 315
 	)
316
+	var lastErr error
316 317
 	checkLeader := func(nodes ...*daemon.Daemon) checkF {
317 318
 		return func(c *check.C) (interface{}, check.CommentInterface) {
318 319
 			// clear these out before each run
319 320
 			leader = nil
320 321
 			followers = nil
321 322
 			for _, d := range nodes {
322
-				if d.GetNode(c, d.NodeID()).ManagerStatus.Leader {
323
+				n := d.GetNode(c, d.NodeID(), func(err error) bool {
324
+					if strings.Contains(errors.Cause(err).Error(), context.DeadlineExceeded.Error()) || strings.Contains(err.Error(), "swarm does not have a leader") {
325
+						lastErr = err
326
+						return true
327
+					}
328
+					return false
329
+				})
330
+				if n == nil {
331
+					return false, check.Commentf("failed to get node: %v", lastErr)
332
+				}
333
+				if n.ManagerStatus.Leader {
323 334
 					leader = d
324 335
 				} else {
325 336
 					followers = append(followers, d)
... ...
@@ -391,7 +403,7 @@ func (s *DockerSwarmSuite) TestAPISwarmRaftQuorum(c *check.C) {
391 391
 	defer cli.Close()
392 392
 
393 393
 	// d1 will eventually step down from leader because there is no longer an active quorum, wait for that to happen
394
-	waitAndAssert(c, defaultReconciliationTimeout, func(c *check.C) (interface{}, check.CommentInterface) {
394
+	waitAndAssert(c, defaultReconciliationTimeout*2, func(c *check.C) (interface{}, check.CommentInterface) {
395 395
 		_, err := cli.ServiceCreate(context.Background(), service.Spec, types.ServiceCreateOptions{})
396 396
 		return err.Error(), nil
397 397
 	}, checker.Contains, "Make sure more than half of the managers are online.")
... ...
@@ -1303,9 +1303,21 @@ func (s *DockerSwarmSuite) TestSwarmRotateUnlockKey(c *check.C) {
1303 1303
 
1304 1304
 		c.Assert(getNodeStatus(c, d), checker.Equals, swarm.LocalNodeStateActive)
1305 1305
 
1306
-		outs, err = d.Cmd("node", "ls")
1307
-		assert.NilError(c, err)
1308
-		c.Assert(outs, checker.Not(checker.Contains), "Swarm is encrypted and needs to be unlocked")
1306
+		retry := 0
1307
+		for {
1308
+			// an issue sometimes prevents leader to be available right away
1309
+			outs, err = d.Cmd("node", "ls")
1310
+			if err != nil && retry < 5 {
1311
+				if strings.Contains(outs, "swarm does not have a leader") {
1312
+					retry++
1313
+					time.Sleep(3 * time.Second)
1314
+					continue
1315
+				}
1316
+			}
1317
+			assert.NilError(c, err)
1318
+			c.Assert(outs, checker.Not(checker.Contains), "Swarm is encrypted and needs to be unlocked")
1319
+			break
1320
+		}
1309 1321
 
1310 1322
 		unlockKey = newUnlockKey
1311 1323
 	}
... ...
@@ -1383,9 +1395,21 @@ func (s *DockerSwarmSuite) TestSwarmClusterRotateUnlockKey(c *check.C) {
1383 1383
 
1384 1384
 			c.Assert(getNodeStatus(c, d), checker.Equals, swarm.LocalNodeStateActive)
1385 1385
 
1386
-			outs, err = d.Cmd("node", "ls")
1387
-			c.Assert(err, checker.IsNil, check.Commentf("%s", outs))
1388
-			c.Assert(outs, checker.Not(checker.Contains), "Swarm is encrypted and needs to be unlocked")
1386
+			retry := 0
1387
+			for {
1388
+				// an issue sometimes prevents leader to be available right away
1389
+				outs, err = d.Cmd("node", "ls")
1390
+				if err != nil && retry < 5 {
1391
+					if strings.Contains(outs, "swarm does not have a leader") {
1392
+						retry++
1393
+						time.Sleep(3 * time.Second)
1394
+						continue
1395
+					}
1396
+				}
1397
+				c.Assert(err, checker.IsNil, check.Commentf("%s", outs))
1398
+				c.Assert(outs, checker.Not(checker.Contains), "Swarm is encrypted and needs to be unlocked")
1399
+				break
1400
+			}
1389 1401
 		}
1390 1402
 
1391 1403
 		unlockKey = newUnlockKey
... ...
@@ -15,7 +15,7 @@ import (
15 15
 type NodeConstructor func(*swarm.Node)
16 16
 
17 17
 // GetNode returns a swarm node identified by the specified id
18
-func (d *Daemon) GetNode(t assert.TestingT, id string) *swarm.Node {
18
+func (d *Daemon) GetNode(t assert.TestingT, id string, errCheck ...func(error) bool) *swarm.Node {
19 19
 	if ht, ok := t.(test.HelperT); ok {
20 20
 		ht.Helper()
21 21
 	}
... ...
@@ -23,6 +23,13 @@ func (d *Daemon) GetNode(t assert.TestingT, id string) *swarm.Node {
23 23
 	defer cli.Close()
24 24
 
25 25
 	node, _, err := cli.NodeInspectWithRaw(context.Background(), id)
26
+	if err != nil {
27
+		for _, f := range errCheck {
28
+			if f(err) {
29
+				return nil
30
+			}
31
+		}
32
+	}
26 33
 	assert.NilError(t, err, "[%s] (*Daemon).GetNode: NodeInspectWithRaw(%q) failed", d.id, id)
27 34
 	assert.Check(t, node.ID == id)
28 35
 	return &node