In `TestApiSwarmRestartCluster`, it's calling `checkClusterHealth`.
`checkClusterHealth` calls `d.info()`, which will return an error if
there is no cluster leader... problem is `checkClusterHealth` is doing a
nil error assertion w/o giving any time for a leader to be elected.
This moves the `d.info()` call into a `waitAndAssert` using the default
reconciliation timeout.
It also moves some other checks into a `waitAndAssert` to give the
cluster enough time to come back up.
Signed-off-by: Brian Goff <cpuguy83@gmail.com>
| ... | ... |
@@ -823,19 +823,49 @@ func setGlobalMode(s *swarm.Service) {
|
| 823 | 823 |
|
| 824 | 824 |
func checkClusterHealth(c *check.C, cl []*SwarmDaemon, managerCount, workerCount int) {
|
| 825 | 825 |
var totalMCount, totalWCount int |
| 826 |
+ |
|
| 826 | 827 |
for _, d := range cl {
|
| 827 |
- info, err := d.info() |
|
| 828 |
- c.Assert(err, check.IsNil) |
|
| 828 |
+ var ( |
|
| 829 |
+ info swarm.Info |
|
| 830 |
+ err error |
|
| 831 |
+ ) |
|
| 832 |
+ |
|
| 833 |
+ // check info in a waitAndAssert, because if the cluster doesn't have a leader, `info` will return an error |
|
| 834 |
+ checkInfo := func(c *check.C) (interface{}, check.CommentInterface) {
|
|
| 835 |
+ info, err = d.info() |
|
| 836 |
+ return err, check.Commentf("cluster not ready in time")
|
|
| 837 |
+ } |
|
| 838 |
+ waitAndAssert(c, defaultReconciliationTimeout, checkInfo, checker.IsNil) |
|
| 829 | 839 |
if !info.ControlAvailable {
|
| 830 | 840 |
totalWCount++ |
| 831 | 841 |
continue |
| 832 | 842 |
} |
| 843 |
+ |
|
| 833 | 844 |
var leaderFound bool |
| 834 | 845 |
totalMCount++ |
| 835 | 846 |
var mCount, wCount int |
| 847 |
+ |
|
| 836 | 848 |
for _, n := range d.listNodes(c) {
|
| 837 |
- c.Assert(n.Status.State, checker.Equals, swarm.NodeStateReady, check.Commentf("state of node %s, reported by %s", n.ID, d.Info.NodeID))
|
|
| 838 |
- c.Assert(n.Spec.Availability, checker.Equals, swarm.NodeAvailabilityActive, check.Commentf("availability of node %s, reported by %s", n.ID, d.Info.NodeID))
|
|
| 849 |
+ waitReady := func(c *check.C) (interface{}, check.CommentInterface) {
|
|
| 850 |
+ if n.Status.State == swarm.NodeStateReady {
|
|
| 851 |
+ return true, nil |
|
| 852 |
+ } |
|
| 853 |
+ nn := d.getNode(c, n.ID) |
|
| 854 |
+ n = *nn |
|
| 855 |
+ return n.Status.State == swarm.NodeStateReady, check.Commentf("state of node %s, reported by %s", n.ID, d.Info.NodeID)
|
|
| 856 |
+ } |
|
| 857 |
+ waitAndAssert(c, defaultReconciliationTimeout, waitReady, checker.True) |
|
| 858 |
+ |
|
| 859 |
+ waitActive := func(c *check.C) (interface{}, check.CommentInterface) {
|
|
| 860 |
+ if n.Spec.Availability == swarm.NodeAvailabilityActive {
|
|
| 861 |
+ return true, nil |
|
| 862 |
+ } |
|
| 863 |
+ nn := d.getNode(c, n.ID) |
|
| 864 |
+ n = *nn |
|
| 865 |
+ return n.Spec.Availability == swarm.NodeAvailabilityActive, check.Commentf("availability of node %s, reported by %s", n.ID, d.Info.NodeID)
|
|
| 866 |
+ } |
|
| 867 |
+ waitAndAssert(c, defaultReconciliationTimeout, waitActive, checker.True) |
|
| 868 |
+ |
|
| 839 | 869 |
if n.Spec.Role == swarm.NodeRoleManager {
|
| 840 | 870 |
c.Assert(n.ManagerStatus, checker.NotNil, check.Commentf("manager status of node %s (manager), reported by %s", n.ID, d.Info.NodeID))
|
| 841 | 871 |
if n.ManagerStatus.Leader {
|