[19.03 backport] integration-cli: fix swarm tests flakiness
| ... | ... |
@@ -27,6 +27,7 @@ import ( |
| 27 | 27 |
"github.com/docker/docker/internal/test/request" |
| 28 | 28 |
"github.com/docker/swarmkit/ca" |
| 29 | 29 |
"github.com/go-check/check" |
| 30 |
+ "github.com/pkg/errors" |
|
| 30 | 31 |
"gotest.tools/assert" |
| 31 | 32 |
is "gotest.tools/assert/cmp" |
| 32 | 33 |
) |
| ... | ... |
@@ -313,13 +314,24 @@ func (s *DockerSwarmSuite) TestAPISwarmLeaderElection(c *check.C) {
|
| 313 | 313 |
leader *daemon.Daemon // keep track of leader |
| 314 | 314 |
followers []*daemon.Daemon // keep track of followers |
| 315 | 315 |
) |
| 316 |
+ var lastErr error |
|
| 316 | 317 |
checkLeader := func(nodes ...*daemon.Daemon) checkF {
|
| 317 | 318 |
return func(c *check.C) (interface{}, check.CommentInterface) {
|
| 318 | 319 |
// clear these out before each run |
| 319 | 320 |
leader = nil |
| 320 | 321 |
followers = nil |
| 321 | 322 |
for _, d := range nodes {
|
| 322 |
- if d.GetNode(c, d.NodeID()).ManagerStatus.Leader {
|
|
| 323 |
+ n := d.GetNode(c, d.NodeID(), func(err error) bool {
|
|
| 324 |
+ if strings.Contains(errors.Cause(err).Error(), context.DeadlineExceeded.Error()) || strings.Contains(err.Error(), "swarm does not have a leader") {
|
|
| 325 |
+ lastErr = err |
|
| 326 |
+ return true |
|
| 327 |
+ } |
|
| 328 |
+ return false |
|
| 329 |
+ }) |
|
| 330 |
+ if n == nil {
|
|
| 331 |
+ return false, check.Commentf("failed to get node: %v", lastErr)
|
|
| 332 |
+ } |
|
| 333 |
+ if n.ManagerStatus.Leader {
|
|
| 323 | 334 |
leader = d |
| 324 | 335 |
} else {
|
| 325 | 336 |
followers = append(followers, d) |
| ... | ... |
@@ -391,7 +403,7 @@ func (s *DockerSwarmSuite) TestAPISwarmRaftQuorum(c *check.C) {
|
| 391 | 391 |
defer cli.Close() |
| 392 | 392 |
|
| 393 | 393 |
// d1 will eventually step down from leader because there is no longer an active quorum, wait for that to happen |
| 394 |
- waitAndAssert(c, defaultReconciliationTimeout, func(c *check.C) (interface{}, check.CommentInterface) {
|
|
| 394 |
+ waitAndAssert(c, defaultReconciliationTimeout*2, func(c *check.C) (interface{}, check.CommentInterface) {
|
|
| 395 | 395 |
_, err := cli.ServiceCreate(context.Background(), service.Spec, types.ServiceCreateOptions{})
|
| 396 | 396 |
return err.Error(), nil |
| 397 | 397 |
}, checker.Contains, "Make sure more than half of the managers are online.") |
| ... | ... |
@@ -1303,9 +1303,21 @@ func (s *DockerSwarmSuite) TestSwarmRotateUnlockKey(c *check.C) {
|
| 1303 | 1303 |
|
| 1304 | 1304 |
c.Assert(getNodeStatus(c, d), checker.Equals, swarm.LocalNodeStateActive) |
| 1305 | 1305 |
|
| 1306 |
- outs, err = d.Cmd("node", "ls")
|
|
| 1307 |
- assert.NilError(c, err) |
|
| 1308 |
- c.Assert(outs, checker.Not(checker.Contains), "Swarm is encrypted and needs to be unlocked") |
|
| 1306 |
+ retry := 0 |
|
| 1307 |
+ for {
|
|
| 1308 |
+ // an issue sometimes prevents leader to be available right away |
|
| 1309 |
+ outs, err = d.Cmd("node", "ls")
|
|
| 1310 |
+ if err != nil && retry < 5 {
|
|
| 1311 |
+ if strings.Contains(outs, "swarm does not have a leader") {
|
|
| 1312 |
+ retry++ |
|
| 1313 |
+ time.Sleep(3 * time.Second) |
|
| 1314 |
+ continue |
|
| 1315 |
+ } |
|
| 1316 |
+ } |
|
| 1317 |
+ assert.NilError(c, err) |
|
| 1318 |
+ c.Assert(outs, checker.Not(checker.Contains), "Swarm is encrypted and needs to be unlocked") |
|
| 1319 |
+ break |
|
| 1320 |
+ } |
|
| 1309 | 1321 |
|
| 1310 | 1322 |
unlockKey = newUnlockKey |
| 1311 | 1323 |
} |
| ... | ... |
@@ -1383,9 +1395,21 @@ func (s *DockerSwarmSuite) TestSwarmClusterRotateUnlockKey(c *check.C) {
|
| 1383 | 1383 |
|
| 1384 | 1384 |
c.Assert(getNodeStatus(c, d), checker.Equals, swarm.LocalNodeStateActive) |
| 1385 | 1385 |
|
| 1386 |
- outs, err = d.Cmd("node", "ls")
|
|
| 1387 |
- c.Assert(err, checker.IsNil, check.Commentf("%s", outs))
|
|
| 1388 |
- c.Assert(outs, checker.Not(checker.Contains), "Swarm is encrypted and needs to be unlocked") |
|
| 1386 |
+ retry := 0 |
|
| 1387 |
+ for {
|
|
| 1388 |
+ // an issue sometimes prevents leader to be available right away |
|
| 1389 |
+ outs, err = d.Cmd("node", "ls")
|
|
| 1390 |
+ if err != nil && retry < 5 {
|
|
| 1391 |
+ if strings.Contains(outs, "swarm does not have a leader") {
|
|
| 1392 |
+ retry++ |
|
| 1393 |
+ time.Sleep(3 * time.Second) |
|
| 1394 |
+ continue |
|
| 1395 |
+ } |
|
| 1396 |
+ } |
|
| 1397 |
+ c.Assert(err, checker.IsNil, check.Commentf("%s", outs))
|
|
| 1398 |
+ c.Assert(outs, checker.Not(checker.Contains), "Swarm is encrypted and needs to be unlocked") |
|
| 1399 |
+ break |
|
| 1400 |
+ } |
|
| 1389 | 1401 |
} |
| 1390 | 1402 |
|
| 1391 | 1403 |
unlockKey = newUnlockKey |
| ... | ... |
@@ -15,7 +15,7 @@ import ( |
| 15 | 15 |
type NodeConstructor func(*swarm.Node) |
| 16 | 16 |
|
| 17 | 17 |
// GetNode returns a swarm node identified by the specified id |
| 18 |
-func (d *Daemon) GetNode(t assert.TestingT, id string) *swarm.Node {
|
|
| 18 |
+func (d *Daemon) GetNode(t assert.TestingT, id string, errCheck ...func(error) bool) *swarm.Node {
|
|
| 19 | 19 |
if ht, ok := t.(test.HelperT); ok {
|
| 20 | 20 |
ht.Helper() |
| 21 | 21 |
} |
| ... | ... |
@@ -23,6 +23,13 @@ func (d *Daemon) GetNode(t assert.TestingT, id string) *swarm.Node {
|
| 23 | 23 |
defer cli.Close() |
| 24 | 24 |
|
| 25 | 25 |
node, _, err := cli.NodeInspectWithRaw(context.Background(), id) |
| 26 |
+ if err != nil {
|
|
| 27 |
+ for _, f := range errCheck {
|
|
| 28 |
+ if f(err) {
|
|
| 29 |
+ return nil |
|
| 30 |
+ } |
|
| 31 |
+ } |
|
| 32 |
+ } |
|
| 26 | 33 |
assert.NilError(t, err, "[%s] (*Daemon).GetNode: NodeInspectWithRaw(%q) failed", d.id, id) |
| 27 | 34 |
assert.Check(t, node.ID == id) |
| 28 | 35 |
return &node |