integration-cli: add test for restarting entire swarm cluster
| ... | ... |
@@ -5,8 +5,11 @@ package main |
| 5 | 5 |
import ( |
| 6 | 6 |
"fmt" |
| 7 | 7 |
"net/http" |
| 8 |
+ "os" |
|
| 9 |
+ "path/filepath" |
|
| 8 | 10 |
"strconv" |
| 9 | 11 |
"strings" |
| 12 |
+ "sync" |
|
| 10 | 13 |
"syscall" |
| 11 | 14 |
"time" |
| 12 | 15 |
|
| ... | ... |
@@ -767,3 +770,108 @@ func setGlobalMode(s *swarm.Service) {
|
| 767 | 767 |
Global: &swarm.GlobalService{},
|
| 768 | 768 |
} |
| 769 | 769 |
} |
| 770 |
+ |
|
| 771 |
+func checkClusterHealth(c *check.C, cl []*SwarmDaemon, managerCount, workerCount int) {
|
|
| 772 |
+ var totalMCount, totalWCount int |
|
| 773 |
+ for _, d := range cl {
|
|
| 774 |
+ info, err := d.info() |
|
| 775 |
+ c.Assert(err, check.IsNil) |
|
| 776 |
+ if !info.ControlAvailable {
|
|
| 777 |
+ totalWCount++ |
|
| 778 |
+ continue |
|
| 779 |
+ } |
|
| 780 |
+ var leaderFound bool |
|
| 781 |
+ totalMCount++ |
|
| 782 |
+ var mCount, wCount int |
|
| 783 |
+ for _, n := range d.listNodes(c) {
|
|
| 784 |
+ c.Assert(n.Status.State, checker.Equals, swarm.NodeStateReady, check.Commentf("state of node %s, reported by %s", n.ID, d.Info.NodeID))
|
|
| 785 |
+ c.Assert(n.Spec.Availability, checker.Equals, swarm.NodeAvailabilityActive, check.Commentf("availability of node %s, reported by %s", n.ID, d.Info.NodeID))
|
|
| 786 |
+ c.Assert(n.Spec.Membership, checker.Equals, swarm.NodeMembershipAccepted, check.Commentf("membership of node %s, reported by %s", n.ID, d.Info.NodeID))
|
|
| 787 |
+ if n.Spec.Role == swarm.NodeRoleManager {
|
|
| 788 |
+ c.Assert(n.ManagerStatus, checker.NotNil, check.Commentf("manager status of node %s (manager), reported by %s", n.ID, d.Info.NodeID))
|
|
| 789 |
+ if n.ManagerStatus.Leader {
|
|
| 790 |
+ leaderFound = true |
|
| 791 |
+ } |
|
| 792 |
+ mCount++ |
|
| 793 |
+ } else {
|
|
| 794 |
+ c.Assert(n.ManagerStatus, checker.IsNil, check.Commentf("manager status of node %s (worker), reported by %s", n.ID, d.Info.NodeID))
|
|
| 795 |
+ wCount++ |
|
| 796 |
+ } |
|
| 797 |
+ } |
|
| 798 |
+ c.Assert(leaderFound, checker.True, check.Commentf("lack of leader reported by node %s", info.NodeID))
|
|
| 799 |
+ c.Assert(mCount, checker.Equals, managerCount, check.Commentf("managers count reported by node %s", info.NodeID))
|
|
| 800 |
+ c.Assert(wCount, checker.Equals, workerCount, check.Commentf("workers count reported by node %s", info.NodeID))
|
|
| 801 |
+ } |
|
| 802 |
+ c.Assert(totalMCount, checker.Equals, managerCount) |
|
| 803 |
+ c.Assert(totalWCount, checker.Equals, workerCount) |
|
| 804 |
+} |
|
| 805 |
+ |
|
| 806 |
+func (s *DockerSwarmSuite) TestApiSwarmRestartCluster(c *check.C) {
|
|
| 807 |
+ mCount, wCount := 5, 1 |
|
| 808 |
+ |
|
| 809 |
+ var nodes []*SwarmDaemon |
|
| 810 |
+ for i := 0; i < mCount; i++ {
|
|
| 811 |
+ manager := s.AddDaemon(c, true, true) |
|
| 812 |
+ info, err := manager.info() |
|
| 813 |
+ c.Assert(err, checker.IsNil) |
|
| 814 |
+ c.Assert(info.ControlAvailable, checker.True) |
|
| 815 |
+ c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateActive) |
|
| 816 |
+ nodes = append(nodes, manager) |
|
| 817 |
+ } |
|
| 818 |
+ |
|
| 819 |
+ for i := 0; i < wCount; i++ {
|
|
| 820 |
+ worker := s.AddDaemon(c, true, false) |
|
| 821 |
+ info, err := worker.info() |
|
| 822 |
+ c.Assert(err, checker.IsNil) |
|
| 823 |
+ c.Assert(info.ControlAvailable, checker.False) |
|
| 824 |
+ c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateActive) |
|
| 825 |
+ nodes = append(nodes, worker) |
|
| 826 |
+ } |
|
| 827 |
+ |
|
| 828 |
+ // stop whole cluster |
|
| 829 |
+ {
|
|
| 830 |
+ var wg sync.WaitGroup |
|
| 831 |
+ wg.Add(len(nodes)) |
|
| 832 |
+ errs := make(chan error, len(nodes)) |
|
| 833 |
+ |
|
| 834 |
+ for _, d := range nodes {
|
|
| 835 |
+ go func(daemon *SwarmDaemon) {
|
|
| 836 |
+ defer wg.Done() |
|
| 837 |
+ if err := daemon.Stop(); err != nil {
|
|
| 838 |
+ errs <- err |
|
| 839 |
+ } |
|
| 840 |
+ if root := os.Getenv("DOCKER_REMAP_ROOT"); root != "" {
|
|
| 841 |
+ daemon.root = filepath.Dir(daemon.root) |
|
| 842 |
+ } |
|
| 843 |
+ }(d) |
|
| 844 |
+ } |
|
| 845 |
+ wg.Wait() |
|
| 846 |
+ close(errs) |
|
| 847 |
+ for err := range errs {
|
|
| 848 |
+ c.Assert(err, check.IsNil) |
|
| 849 |
+ } |
|
| 850 |
+ } |
|
| 851 |
+ |
|
| 852 |
+ // start whole cluster |
|
| 853 |
+ {
|
|
| 854 |
+ var wg sync.WaitGroup |
|
| 855 |
+ wg.Add(len(nodes)) |
|
| 856 |
+ errs := make(chan error, len(nodes)) |
|
| 857 |
+ |
|
| 858 |
+ for _, d := range nodes {
|
|
| 859 |
+ go func(daemon *SwarmDaemon) {
|
|
| 860 |
+ defer wg.Done() |
|
| 861 |
+ if err := daemon.Start("--iptables=false"); err != nil {
|
|
| 862 |
+ errs <- err |
|
| 863 |
+ } |
|
| 864 |
+ }(d) |
|
| 865 |
+ } |
|
| 866 |
+ wg.Wait() |
|
| 867 |
+ close(errs) |
|
| 868 |
+ for err := range errs {
|
|
| 869 |
+ c.Assert(err, check.IsNil) |
|
| 870 |
+ } |
|
| 871 |
+ } |
|
| 872 |
+ |
|
| 873 |
+ checkClusterHealth(c, nodes, mCount, wCount) |
|
| 874 |
+} |