Browse code

integration-cli: allow temporary errors on leader switch

Signed-off-by: Tonis Tiigi <tonistiigi@gmail.com>
(cherry picked from commit 3df1095bbdc331d4effa5452d8aafd5aaead5789)
Signed-off-by: Sebastiaan van Stijn <github@gone.nl>

Tonis Tiigi authored on 2019/07/16 09:23:55
Showing 2 changed files
... ...
@@ -27,6 +27,7 @@ import (
27 27
 	"github.com/docker/docker/internal/test/request"
28 28
 	"github.com/docker/swarmkit/ca"
29 29
 	"github.com/go-check/check"
30
+	"github.com/pkg/errors"
30 31
 	"gotest.tools/assert"
31 32
 	is "gotest.tools/assert/cmp"
32 33
 )
... ...
@@ -313,13 +314,24 @@ func (s *DockerSwarmSuite) TestAPISwarmLeaderElection(c *check.C) {
313 313
 		leader    *daemon.Daemon   // keep track of leader
314 314
 		followers []*daemon.Daemon // keep track of followers
315 315
 	)
316
+	var lastErr error
316 317
 	checkLeader := func(nodes ...*daemon.Daemon) checkF {
317 318
 		return func(c *check.C) (interface{}, check.CommentInterface) {
318 319
 			// clear these out before each run
319 320
 			leader = nil
320 321
 			followers = nil
321 322
 			for _, d := range nodes {
322
-				if d.GetNode(c, d.NodeID()).ManagerStatus.Leader {
323
+				n := d.GetNode(c, d.NodeID(), func(err error) bool {
324
+					if strings.Contains(errors.Cause(err).Error(), context.DeadlineExceeded.Error()) || strings.Contains(err.Error(), "swarm does not have a leader") {
325
+						lastErr = err
326
+						return true
327
+					}
328
+					return false
329
+				})
330
+				if n == nil {
331
+					return false, check.Commentf("failed to get node: %v", lastErr)
332
+				}
333
+				if n.ManagerStatus.Leader {
323 334
 					leader = d
324 335
 				} else {
325 336
 					followers = append(followers, d)
... ...
@@ -391,7 +403,7 @@ func (s *DockerSwarmSuite) TestAPISwarmRaftQuorum(c *check.C) {
391 391
 	defer cli.Close()
392 392
 
393 393
 	// d1 will eventually step down from leader because there is no longer an active quorum, wait for that to happen
394
-	waitAndAssert(c, defaultReconciliationTimeout, func(c *check.C) (interface{}, check.CommentInterface) {
394
+	waitAndAssert(c, defaultReconciliationTimeout*2, func(c *check.C) (interface{}, check.CommentInterface) {
395 395
 		_, err := cli.ServiceCreate(context.Background(), service.Spec, types.ServiceCreateOptions{})
396 396
 		return err.Error(), nil
397 397
 	}, checker.Contains, "Make sure more than half of the managers are online.")
... ...
@@ -15,7 +15,7 @@ import (
15 15
 type NodeConstructor func(*swarm.Node)
16 16
 
17 17
 // GetNode returns a swarm node identified by the specified id
18
-func (d *Daemon) GetNode(t assert.TestingT, id string) *swarm.Node {
18
+func (d *Daemon) GetNode(t assert.TestingT, id string, errCheck ...func(error) bool) *swarm.Node {
19 19
 	if ht, ok := t.(test.HelperT); ok {
20 20
 		ht.Helper()
21 21
 	}
... ...
@@ -23,6 +23,13 @@ func (d *Daemon) GetNode(t assert.TestingT, id string) *swarm.Node {
23 23
 	defer cli.Close()
24 24
 
25 25
 	node, _, err := cli.NodeInspectWithRaw(context.Background(), id)
26
+	if err != nil {
27
+		for _, f := range errCheck {
28
+			if f(err) {
29
+				return nil
30
+			}
31
+		}
32
+	}
26 33
 	assert.NilError(t, err, "[%s] (*Daemon).GetNode: NodeInspectWithRaw(%q) failed", d.id, id)
27 34
 	assert.Check(t, node.ID == id)
28 35
 	return &node