Browse code

Swarm integration tests

Signed-off-by: Tonis Tiigi <tonistiigi@gmail.com>
Signed-off-by: Victor Vieux <vieux@docker.com>

Tonis Tiigi authored on 2016/06/14 11:54:20
Showing 8 changed files
... ...
@@ -184,6 +184,61 @@ func (s *DockerDaemonSuite) TearDownTest(c *check.C) {
184 184
 	s.ds.TearDownTest(c)
185 185
 }
186 186
 
187
+const defaultSwarmPort = 2477
188
+
189
+func init() {
190
+	check.Suite(&DockerSwarmSuite{
191
+		ds: &DockerSuite{},
192
+	})
193
+}
194
+
195
+type DockerSwarmSuite struct {
196
+	ds        *DockerSuite
197
+	daemons   []*SwarmDaemon
198
+	portIndex int
199
+}
200
+
201
+func (s *DockerSwarmSuite) SetUpTest(c *check.C) {
202
+	testRequires(c, DaemonIsLinux)
203
+}
204
+
205
+func (s *DockerSwarmSuite) AddDaemon(c *check.C, joinSwarm, manager bool) *SwarmDaemon {
206
+	d := &SwarmDaemon{
207
+		Daemon: NewDaemon(c),
208
+		port:   defaultSwarmPort + s.portIndex,
209
+	}
210
+	d.listenAddr = fmt.Sprintf("0.0.0.0:%d", d.port)
211
+	err := d.StartWithBusybox()
212
+	c.Assert(err, check.IsNil)
213
+
214
+	if joinSwarm == true {
215
+		if len(s.daemons) > 0 {
216
+			c.Assert(d.Join(s.daemons[0].listenAddr, "", "", manager), check.IsNil)
217
+		} else {
218
+			aa := make(map[string]bool)
219
+			aa["worker"] = true
220
+			aa["manager"] = true
221
+			c.Assert(d.Init(aa, ""), check.IsNil)
222
+		}
223
+	}
224
+
225
+	s.portIndex++
226
+	s.daemons = append(s.daemons, d)
227
+
228
+	return d
229
+}
230
+
231
+func (s *DockerSwarmSuite) TearDownTest(c *check.C) {
232
+	testRequires(c, DaemonIsLinux)
233
+	for _, d := range s.daemons {
234
+		d.Stop()
235
+	}
236
+	s.daemons = nil
237
+	s.portIndex = 0
238
+
239
+	s.ds.TearDownTest(c)
240
+}
241
+
187 242
 func init() {
188 243
 	check.Suite(&DockerTrustSuite{
189 244
 		ds: &DockerSuite{},
... ...
@@ -1,6 +1,7 @@
1 1
 package main
2 2
 
3 3
 import (
4
+	"bytes"
4 5
 	"encoding/json"
5 6
 	"errors"
6 7
 	"fmt"
... ...
@@ -292,9 +293,9 @@ out1:
292 292
 		select {
293 293
 		case err := <-d.wait:
294 294
 			return err
295
-		case <-time.After(15 * time.Second):
295
+		case <-time.After(20 * time.Second):
296 296
 			// time for stopping jobs and run onShutdown hooks
297
-			d.c.Log("timeout")
297
+			d.c.Logf("timeout: %v", d.id)
298 298
 			break out1
299 299
 		}
300 300
 	}
... ...
@@ -306,7 +307,7 @@ out2:
306 306
 			return err
307 307
 		case <-tick:
308 308
 			i++
309
-			if i > 4 {
309
+			if i > 5 {
310 310
 				d.c.Logf("tried to interrupt daemon for %d times, now try to kill it", i)
311 311
 				break out2
312 312
 			}
... ...
@@ -452,6 +453,27 @@ func (d *Daemon) CmdWithArgs(daemonArgs []string, name string, arg ...string) (s
452 452
 	return string(b), err
453 453
 }
454 454
 
455
+// SockRequest executes a socket request on a daemon and returns statuscode and output.
456
+func (d *Daemon) SockRequest(method, endpoint string, data interface{}) (int, []byte, error) {
457
+	jsonData := bytes.NewBuffer(nil)
458
+	if err := json.NewEncoder(jsonData).Encode(data); err != nil {
459
+		return -1, nil, err
460
+	}
461
+
462
+	res, body, err := d.SockRequestRaw(method, endpoint, jsonData, "application/json")
463
+	if err != nil {
464
+		return -1, nil, err
465
+	}
466
+	b, err := readBody(body)
467
+	return res.StatusCode, b, err
468
+}
469
+
470
+// SockRequestRaw executes a socket request on a daemon and returns a http
471
+// response and a reader for the output data.
472
+func (d *Daemon) SockRequestRaw(method, endpoint string, data io.Reader, ct string) (*http.Response, io.ReadCloser, error) {
473
+	return sockRequestRawToDaemon(method, endpoint, data, ct, d.sock())
474
+}
475
+
455 476
 // LogFileName returns the path the the daemon's log file
456 477
 func (d *Daemon) LogFileName() string {
457 478
 	return d.logFile.Name()
... ...
@@ -461,6 +483,16 @@ func (d *Daemon) getIDByName(name string) (string, error) {
461 461
 	return d.inspectFieldWithError(name, "Id")
462 462
 }
463 463
 
464
+func (d *Daemon) activeContainers() (ids []string) {
465
+	out, _ := d.Cmd("ps", "-q")
466
+	for _, id := range strings.Split(out, "\n") {
467
+		if id = strings.TrimSpace(id); id != "" {
468
+			ids = append(ids, id)
469
+		}
470
+	}
471
+	return
472
+}
473
+
464 474
 func (d *Daemon) inspectFilter(name, filter string) (string, error) {
465 475
 	format := fmt.Sprintf("{{%s}}", filter)
466 476
 	out, err := d.Cmd("inspect", "-f", format, name)
... ...
@@ -486,3 +518,12 @@ func (d *Daemon) buildImageWithOut(name, dockerfile string, useCache bool, build
486 486
 	buildCmd := buildImageCmdWithHost(name, dockerfile, d.sock(), useCache, buildFlags...)
487 487
 	return runCommandWithOutput(buildCmd)
488 488
 }
489
+
490
+func (d *Daemon) checkActiveContainerCount(c *check.C) (interface{}, check.CommentInterface) {
491
+	out, err := d.Cmd("ps", "-q")
492
+	c.Assert(err, checker.IsNil)
493
+	if len(strings.TrimSpace(out)) == 0 {
494
+		return 0, nil
495
+	}
496
+	return len(strings.Split(strings.TrimSpace(out), "\n")), check.Commentf("output: %q", string(out))
497
+}
489 498
new file mode 100644
... ...
@@ -0,0 +1,178 @@
0
+package main
1
+
2
+import (
3
+	"encoding/json"
4
+	"fmt"
5
+	"net/http"
6
+	"strings"
7
+
8
+	"github.com/docker/docker/pkg/integration/checker"
9
+	"github.com/docker/engine-api/types"
10
+	"github.com/docker/engine-api/types/swarm"
11
+	"github.com/go-check/check"
12
+)
13
+
14
+// SwarmDaemon is a test daemon with helpers for participating in a swarm.
15
+type SwarmDaemon struct {
16
+	*Daemon
17
+	swarm.Info
18
+	port       int
19
+	listenAddr string
20
+}
21
+
22
+// Init initializes a new swarm cluster.
23
+func (d *SwarmDaemon) Init(autoAccept map[string]bool, secret string) error {
24
+	req := swarm.InitRequest{
25
+		ListenAddr: d.listenAddr,
26
+	}
27
+	for _, role := range []swarm.NodeRole{swarm.NodeRoleManager, swarm.NodeRoleWorker} {
28
+		req.Spec.AcceptancePolicy.Policies = append(req.Spec.AcceptancePolicy.Policies, swarm.Policy{
29
+			Role:       role,
30
+			Autoaccept: autoAccept[strings.ToLower(string(role))],
31
+			Secret:     secret,
32
+		})
33
+	}
34
+	status, out, err := d.SockRequest("POST", "/swarm/init", req)
35
+	if status != http.StatusOK {
36
+		return fmt.Errorf("initializing swarm: invalid statuscode %v, %q", status, out)
37
+	}
38
+	if err != nil {
39
+		return fmt.Errorf("initializing swarm: %v", err)
40
+	}
41
+	info, err := d.info()
42
+	if err != nil {
43
+		return err
44
+	}
45
+	d.Info = info
46
+	return nil
47
+}
48
+
49
+// Join joins a current daemon with existing cluster.
50
+func (d *SwarmDaemon) Join(remoteAddr, secret, cahash string, manager bool) error {
51
+	status, out, err := d.SockRequest("POST", "/swarm/join", swarm.JoinRequest{
52
+		ListenAddr:  d.listenAddr,
53
+		RemoteAddrs: []string{remoteAddr},
54
+		Manager:     manager,
55
+		Secret:      secret,
56
+		CACertHash:  cahash,
57
+	})
58
+	if status != http.StatusOK {
59
+		return fmt.Errorf("joining swarm: invalid statuscode %v, %q", status, out)
60
+	}
61
+	if err != nil {
62
+		return fmt.Errorf("joining swarm: %v", err)
63
+	}
64
+	info, err := d.info()
65
+	if err != nil {
66
+		return err
67
+	}
68
+	d.Info = info
69
+	return nil
70
+}
71
+
72
+// Leave forces daemon to leave current cluster.
73
+func (d *SwarmDaemon) Leave(force bool) error {
74
+	url := "/swarm/leave"
75
+	if force {
76
+		url += "?force=1"
77
+	}
78
+	status, out, err := d.SockRequest("POST", url, nil)
79
+	if status != http.StatusOK {
80
+		return fmt.Errorf("leaving swarm: invalid statuscode %v, %q", status, out)
81
+	}
82
+	if err != nil {
83
+		err = fmt.Errorf("leaving swarm: %v", err)
84
+	}
85
+	return err
86
+}
87
+
88
+func (d *SwarmDaemon) info() (swarm.Info, error) {
89
+	var info struct {
90
+		Swarm swarm.Info
91
+	}
92
+	status, dt, err := d.SockRequest("GET", "/info", nil)
93
+	if status != http.StatusOK {
94
+		return info.Swarm, fmt.Errorf("get swarm info: invalid statuscode %v", status)
95
+	}
96
+	if err != nil {
97
+		return info.Swarm, fmt.Errorf("get swarm info: %v", err)
98
+	}
99
+	if err := json.Unmarshal(dt, &info); err != nil {
100
+		return info.Swarm, err
101
+	}
102
+	return info.Swarm, nil
103
+}
104
+
105
+type serviceConstructor func(*swarm.Service)
106
+type nodeConstructor func(*swarm.Node)
107
+
108
+func (d *SwarmDaemon) createService(c *check.C, f ...serviceConstructor) string {
109
+	var service swarm.Service
110
+	for _, fn := range f {
111
+		fn(&service)
112
+	}
113
+	status, out, err := d.SockRequest("POST", "/services/create", service.Spec)
114
+
115
+	c.Assert(err, checker.IsNil)
116
+	c.Assert(status, checker.Equals, http.StatusCreated, check.Commentf("output: %q", string(out)))
117
+
118
+	var scr types.ServiceCreateResponse
119
+	c.Assert(json.Unmarshal(out, &scr), checker.IsNil)
120
+	return scr.ID
121
+}
122
+
123
+func (d *SwarmDaemon) getService(c *check.C, id string) *swarm.Service {
124
+	var service swarm.Service
125
+	status, out, err := d.SockRequest("GET", "/services/"+id, nil)
126
+	c.Assert(status, checker.Equals, http.StatusOK, check.Commentf("output: %q", string(out)))
127
+	c.Assert(err, checker.IsNil)
128
+	c.Assert(json.Unmarshal(out, &service), checker.IsNil)
129
+	c.Assert(service.ID, checker.Equals, id)
130
+	return &service
131
+}
132
+
133
+func (d *SwarmDaemon) updateService(c *check.C, service *swarm.Service, f ...serviceConstructor) {
134
+	for _, fn := range f {
135
+		fn(service)
136
+	}
137
+	url := fmt.Sprintf("/services/%s/update?version=%d", service.ID, service.Version.Index)
138
+	status, out, err := d.SockRequest("POST", url, service.Spec)
139
+	c.Assert(err, checker.IsNil)
140
+	c.Assert(status, checker.Equals, http.StatusOK, check.Commentf("output: %q", string(out)))
141
+}
142
+
143
+func (d *SwarmDaemon) removeService(c *check.C, id string) {
144
+	status, out, err := d.SockRequest("DELETE", "/services/"+id, nil)
145
+	c.Assert(status, checker.Equals, http.StatusOK, check.Commentf("output: %q", string(out)))
146
+	c.Assert(err, checker.IsNil)
147
+}
148
+
149
+func (d *SwarmDaemon) getNode(c *check.C, id string) *swarm.Node {
150
+	var node swarm.Node
151
+	status, out, err := d.SockRequest("GET", "/nodes/"+id, nil)
152
+	c.Assert(status, checker.Equals, http.StatusOK, check.Commentf("output: %q", string(out)))
153
+	c.Assert(err, checker.IsNil)
154
+	c.Assert(json.Unmarshal(out, &node), checker.IsNil)
155
+	c.Assert(node.ID, checker.Equals, id)
156
+	return &node
157
+}
158
+
159
+func (d *SwarmDaemon) updateNode(c *check.C, node *swarm.Node, f ...nodeConstructor) {
160
+	for _, fn := range f {
161
+		fn(node)
162
+	}
163
+	url := fmt.Sprintf("/nodes/%s/update?version=%d", node.ID, node.Version.Index)
164
+	status, out, err := d.SockRequest("POST", url, node.Spec)
165
+	c.Assert(err, checker.IsNil)
166
+	c.Assert(status, checker.Equals, http.StatusOK, check.Commentf("output: %q", string(out)))
167
+}
168
+
169
+func (d *SwarmDaemon) listNodes(c *check.C) []swarm.Node {
170
+	status, out, err := d.SockRequest("GET", "/nodes", nil)
171
+	c.Assert(err, checker.IsNil)
172
+	c.Assert(status, checker.Equals, http.StatusOK, check.Commentf("output: %q", string(out)))
173
+
174
+	nodes := []swarm.Node{}
175
+	c.Assert(json.Unmarshal(out, &nodes), checker.IsNil)
176
+	return nodes
177
+}
... ...
@@ -17,7 +17,7 @@ func (s *DockerSuite) TestGetContainersAttachWebsocket(c *check.C) {
17 17
 	testRequires(c, DaemonIsLinux)
18 18
 	out, _ := dockerCmd(c, "run", "-dit", "busybox", "cat")
19 19
 
20
-	rwc, err := sockConn(time.Duration(10 * time.Second))
20
+	rwc, err := sockConn(time.Duration(10*time.Second), "")
21 21
 	c.Assert(err, checker.IsNil)
22 22
 
23 23
 	cleanedContainerID := strings.TrimSpace(out)
... ...
@@ -67,7 +67,7 @@ func (s *DockerSuite) TestGetContainersAttachWebsocket(c *check.C) {
67 67
 
68 68
 // regression gh14320
69 69
 func (s *DockerSuite) TestPostContainersAttachContainerNotFound(c *check.C) {
70
-	req, client, err := newRequestClient("POST", "/containers/doesnotexist/attach", nil, "")
70
+	req, client, err := newRequestClient("POST", "/containers/doesnotexist/attach", nil, "", "")
71 71
 	c.Assert(err, checker.IsNil)
72 72
 
73 73
 	resp, err := client.Do(req)
... ...
@@ -1076,7 +1076,7 @@ func (s *DockerSuite) TestContainerApiChunkedEncoding(c *check.C) {
1076 1076
 	// TODO Windows CI: This can be ported
1077 1077
 	testRequires(c, DaemonIsLinux)
1078 1078
 
1079
-	conn, err := sockConn(time.Duration(10 * time.Second))
1079
+	conn, err := sockConn(time.Duration(10*time.Second), "")
1080 1080
 	c.Assert(err, checker.IsNil)
1081 1081
 	client := httputil.NewClientConn(conn, nil)
1082 1082
 	defer client.Close()
1083 1083
new file mode 100644
... ...
@@ -0,0 +1,573 @@
0
+// +build !windows
1
+
2
+package main
3
+
4
+import (
5
+	"net/http"
6
+	"strconv"
7
+	"strings"
8
+	"syscall"
9
+	"time"
10
+
11
+	"github.com/docker/docker/pkg/integration/checker"
12
+	"github.com/docker/engine-api/types/swarm"
13
+	"github.com/go-check/check"
14
+)
15
+
16
+var defaultReconciliationTimeout = 30 * time.Second
17
+
18
+func (s *DockerSwarmSuite) TestApiSwarmInit(c *check.C) {
19
+	// todo: should find a better way to verify that components are running than /info
20
+	d1 := s.AddDaemon(c, true, true)
21
+	info, err := d1.info()
22
+	c.Assert(err, checker.IsNil)
23
+	c.Assert(info.ControlAvailable, checker.Equals, true)
24
+	c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateActive)
25
+
26
+	d2 := s.AddDaemon(c, true, false)
27
+	info, err = d2.info()
28
+	c.Assert(err, checker.IsNil)
29
+	c.Assert(info.ControlAvailable, checker.Equals, false)
30
+	c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateActive)
31
+
32
+	// Leaving cluster
33
+	c.Assert(d2.Leave(false), checker.IsNil)
34
+
35
+	info, err = d2.info()
36
+	c.Assert(err, checker.IsNil)
37
+	c.Assert(info.ControlAvailable, checker.Equals, false)
38
+	c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateInactive)
39
+
40
+	c.Assert(d2.Join(d1.listenAddr, "", "", false), checker.IsNil)
41
+
42
+	info, err = d2.info()
43
+	c.Assert(err, checker.IsNil)
44
+	c.Assert(info.ControlAvailable, checker.Equals, false)
45
+	c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateActive)
46
+
47
+	// Current state restoring after restarts
48
+	err = d1.Stop()
49
+	c.Assert(err, checker.IsNil)
50
+	err = d2.Stop()
51
+	c.Assert(err, checker.IsNil)
52
+
53
+	err = d1.Start()
54
+	c.Assert(err, checker.IsNil)
55
+	err = d2.Start()
56
+	c.Assert(err, checker.IsNil)
57
+
58
+	info, err = d1.info()
59
+	c.Assert(err, checker.IsNil)
60
+	c.Assert(info.ControlAvailable, checker.Equals, true)
61
+	c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateActive)
62
+
63
+	info, err = d2.info()
64
+	c.Assert(err, checker.IsNil)
65
+	c.Assert(info.ControlAvailable, checker.Equals, false)
66
+	c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateActive)
67
+}
68
+
69
+func (s *DockerSwarmSuite) TestApiSwarmManualAcceptance(c *check.C) {
70
+	s.testAPISwarmManualAcceptance(c, "")
71
+}
72
+func (s *DockerSwarmSuite) TestApiSwarmManualAcceptanceSecret(c *check.C) {
73
+	s.testAPISwarmManualAcceptance(c, "foobaz")
74
+}
75
+
76
+func (s *DockerSwarmSuite) testAPISwarmManualAcceptance(c *check.C, secret string) {
77
+	d1 := s.AddDaemon(c, false, false)
78
+	c.Assert(d1.Init(map[string]bool{}, secret), checker.IsNil)
79
+
80
+	d2 := s.AddDaemon(c, false, false)
81
+	err := d2.Join(d1.listenAddr, "", "", false)
82
+	c.Assert(err, checker.NotNil)
83
+	if secret == "" {
84
+		c.Assert(err.Error(), checker.Contains, "Timeout reached")
85
+		info, err := d2.info()
86
+		c.Assert(err, checker.IsNil)
87
+		c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStatePending)
88
+		c.Assert(d2.Leave(false), checker.IsNil)
89
+		info, err = d2.info()
90
+		c.Assert(err, checker.IsNil)
91
+		c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateInactive)
92
+	} else {
93
+		c.Assert(err.Error(), checker.Contains, "valid secret token is necessary")
94
+		info, err := d2.info()
95
+		c.Assert(err, checker.IsNil)
96
+		c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateInactive)
97
+	}
98
+	d3 := s.AddDaemon(c, false, false)
99
+	go func() {
100
+		for i := 0; ; i++ {
101
+			info, err := d3.info()
102
+			c.Assert(err, checker.IsNil)
103
+			if info.NodeID != "" {
104
+				d1.updateNode(c, d1.getNode(c, info.NodeID), func(n *swarm.Node) {
105
+					n.Spec.Membership = swarm.NodeMembershipAccepted
106
+				})
107
+				return
108
+			}
109
+			if i >= 10 {
110
+				c.Errorf("could not find nodeID")
111
+			}
112
+			time.Sleep(300 * time.Millisecond)
113
+		}
114
+	}()
115
+	c.Assert(d3.Join(d1.listenAddr, secret, "", false), checker.IsNil)
116
+}
117
+
118
+func (s *DockerSwarmSuite) TestApiSwarmSecretAcceptance(c *check.C) {
119
+	d1 := s.AddDaemon(c, false, false)
120
+	aa := make(map[string]bool)
121
+	aa["worker"] = true
122
+	c.Assert(d1.Init(aa, "foobar"), checker.IsNil)
123
+
124
+	d2 := s.AddDaemon(c, false, false)
125
+	err := d2.Join(d1.listenAddr, "", "", false)
126
+	c.Assert(err, checker.NotNil)
127
+	c.Assert(err.Error(), checker.Contains, "secret token is necessary")
128
+	info, err := d2.info()
129
+	c.Assert(err, checker.IsNil)
130
+	c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateInactive)
131
+
132
+	err = d2.Join(d1.listenAddr, "foobaz", "", false)
133
+	c.Assert(err, checker.NotNil)
134
+	c.Assert(err.Error(), checker.Contains, "secret token is necessary")
135
+	info, err = d2.info()
136
+	c.Assert(err, checker.IsNil)
137
+	c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateInactive)
138
+
139
+	c.Assert(d2.Join(d1.listenAddr, "foobar", "", false), checker.IsNil)
140
+	info, err = d2.info()
141
+	c.Assert(err, checker.IsNil)
142
+	c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateActive)
143
+	c.Assert(d2.Leave(false), checker.IsNil)
144
+	info, err = d2.info()
145
+	c.Assert(err, checker.IsNil)
146
+	c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateInactive)
147
+}
148
+
149
+func (s *DockerSwarmSuite) TestApiSwarmCAHash(c *check.C) {
150
+	d1 := s.AddDaemon(c, true, true)
151
+	d2 := s.AddDaemon(c, false, false)
152
+	err := d2.Join(d1.listenAddr, "", "foobar", false)
153
+	c.Assert(err, checker.NotNil)
154
+	c.Assert(err.Error(), checker.Contains, "invalid checksum digest format")
155
+
156
+	c.Assert(len(d1.CACertHash), checker.GreaterThan, 0)
157
+	c.Assert(d2.Join(d1.listenAddr, "", d1.CACertHash, false), checker.IsNil)
158
+}
159
+
160
+func (s *DockerSwarmSuite) TestApiSwarmPromoteDemote(c *check.C) {
161
+	d1 := s.AddDaemon(c, false, false)
162
+	c.Assert(d1.Init(map[string]bool{"worker": true}, ""), checker.IsNil)
163
+	d2 := s.AddDaemon(c, true, false)
164
+
165
+	info, err := d2.info()
166
+	c.Assert(err, checker.IsNil)
167
+	c.Assert(info.ControlAvailable, checker.Equals, false)
168
+	c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateActive)
169
+
170
+	d1.updateNode(c, d1.getNode(c, d2.NodeID), func(n *swarm.Node) {
171
+		n.Spec.Role = swarm.NodeRoleManager
172
+	})
173
+
174
+	for i := 0; ; i++ {
175
+		info, err := d2.info()
176
+		c.Assert(err, checker.IsNil)
177
+		c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateActive)
178
+		if info.ControlAvailable {
179
+			break
180
+		}
181
+		if i > 10 {
182
+			c.Errorf("node did not turn into manager")
183
+		} else {
184
+			break
185
+		}
186
+		time.Sleep(100 * time.Millisecond)
187
+	}
188
+
189
+	d1.updateNode(c, d1.getNode(c, d2.NodeID), func(n *swarm.Node) {
190
+		n.Spec.Role = swarm.NodeRoleWorker
191
+	})
192
+
193
+	for i := 0; ; i++ {
194
+		info, err := d2.info()
195
+		c.Assert(err, checker.IsNil)
196
+		c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateActive)
197
+		if !info.ControlAvailable {
198
+			break
199
+		}
200
+		if i > 10 {
201
+			c.Errorf("node did not turn into manager")
202
+		} else {
203
+			break
204
+		}
205
+		time.Sleep(100 * time.Millisecond)
206
+	}
207
+
208
+	// todo: test raft qourum stability
209
+}
210
+
211
+func (s *DockerSwarmSuite) TestApiSwarmServicesCreate(c *check.C) {
212
+	d := s.AddDaemon(c, true, true)
213
+
214
+	instances := 2
215
+	id := d.createService(c, simpleTestService, setInstances(instances))
216
+	waitAndAssert(c, defaultReconciliationTimeout, d.checkActiveContainerCount, checker.Equals, instances)
217
+
218
+	service := d.getService(c, id)
219
+	instances = 5
220
+	d.updateService(c, service, setInstances(instances))
221
+	waitAndAssert(c, defaultReconciliationTimeout, d.checkActiveContainerCount, checker.Equals, instances)
222
+
223
+	d.removeService(c, service.ID)
224
+	waitAndAssert(c, defaultReconciliationTimeout, d.checkActiveContainerCount, checker.Equals, 0)
225
+}
226
+
227
+func (s *DockerSwarmSuite) TestApiSwarmServicesMultipleAgents(c *check.C) {
228
+	d1 := s.AddDaemon(c, true, true)
229
+	d2 := s.AddDaemon(c, true, false)
230
+	d3 := s.AddDaemon(c, true, false)
231
+
232
+	time.Sleep(1 * time.Second) // make sure all daemons are ready to accept tasks
233
+
234
+	instances := 9
235
+	id := d1.createService(c, simpleTestService, setInstances(instances))
236
+
237
+	waitAndAssert(c, defaultReconciliationTimeout, d1.checkActiveContainerCount, checker.GreaterThan, 0)
238
+	waitAndAssert(c, defaultReconciliationTimeout, d2.checkActiveContainerCount, checker.GreaterThan, 0)
239
+	waitAndAssert(c, defaultReconciliationTimeout, d3.checkActiveContainerCount, checker.GreaterThan, 0)
240
+
241
+	waitAndAssert(c, defaultReconciliationTimeout, reducedCheck(sumAsIntegers, d1.checkActiveContainerCount, d2.checkActiveContainerCount, d3.checkActiveContainerCount), checker.Equals, instances)
242
+
243
+	// reconciliation on d2 node down
244
+	c.Assert(d2.Stop(), checker.IsNil)
245
+
246
+	waitAndAssert(c, defaultReconciliationTimeout, reducedCheck(sumAsIntegers, d1.checkActiveContainerCount, d3.checkActiveContainerCount), checker.Equals, instances)
247
+
248
+	// test downscaling
249
+	instances = 5
250
+	d1.updateService(c, d1.getService(c, id), setInstances(instances))
251
+	waitAndAssert(c, defaultReconciliationTimeout, reducedCheck(sumAsIntegers, d1.checkActiveContainerCount, d3.checkActiveContainerCount), checker.Equals, instances)
252
+
253
+}
254
+
255
+func (s *DockerSwarmSuite) TestApiSwarmServicesCreateGlobal(c *check.C) {
256
+	d1 := s.AddDaemon(c, true, true)
257
+	d2 := s.AddDaemon(c, true, false)
258
+	d3 := s.AddDaemon(c, true, false)
259
+
260
+	d1.createService(c, simpleTestService, setGlobalMode)
261
+
262
+	waitAndAssert(c, defaultReconciliationTimeout, d1.checkActiveContainerCount, checker.Equals, 1)
263
+	waitAndAssert(c, defaultReconciliationTimeout, d2.checkActiveContainerCount, checker.Equals, 1)
264
+	waitAndAssert(c, defaultReconciliationTimeout, d3.checkActiveContainerCount, checker.Equals, 1)
265
+
266
+	d4 := s.AddDaemon(c, true, false)
267
+	d5 := s.AddDaemon(c, true, false)
268
+
269
+	waitAndAssert(c, defaultReconciliationTimeout, d4.checkActiveContainerCount, checker.Equals, 1)
270
+	waitAndAssert(c, defaultReconciliationTimeout, d5.checkActiveContainerCount, checker.Equals, 1)
271
+}
272
+
273
+func (s *DockerSwarmSuite) TestApiSwarmServicesStateReporting(c *check.C) {
274
+	testRequires(c, SameHostDaemon)
275
+	testRequires(c, DaemonIsLinux)
276
+
277
+	d1 := s.AddDaemon(c, true, true)
278
+	d2 := s.AddDaemon(c, true, true)
279
+	d3 := s.AddDaemon(c, true, false)
280
+
281
+	time.Sleep(1 * time.Second) // make sure all daemons are ready to accept
282
+
283
+	instances := 9
284
+	d1.createService(c, simpleTestService, setInstances(instances))
285
+
286
+	waitAndAssert(c, defaultReconciliationTimeout, reducedCheck(sumAsIntegers, d1.checkActiveContainerCount, d2.checkActiveContainerCount, d3.checkActiveContainerCount), checker.Equals, instances)
287
+
288
+	getContainers := func() map[string]*SwarmDaemon {
289
+		m := make(map[string]*SwarmDaemon)
290
+		for _, d := range []*SwarmDaemon{d1, d2, d3} {
291
+			for _, id := range d.activeContainers() {
292
+				m[id] = d
293
+			}
294
+		}
295
+		return m
296
+	}
297
+
298
+	containers := getContainers()
299
+	c.Assert(containers, checker.HasLen, instances)
300
+	var toRemove string
301
+	for i := range containers {
302
+		toRemove = i
303
+	}
304
+
305
+	_, err := containers[toRemove].Cmd("stop", toRemove)
306
+	c.Assert(err, checker.IsNil)
307
+
308
+	waitAndAssert(c, defaultReconciliationTimeout, reducedCheck(sumAsIntegers, d1.checkActiveContainerCount, d2.checkActiveContainerCount, d3.checkActiveContainerCount), checker.Equals, instances)
309
+
310
+	containers2 := getContainers()
311
+	c.Assert(containers2, checker.HasLen, instances)
312
+	for i := range containers {
313
+		if i == toRemove {
314
+			c.Assert(containers2[i], checker.IsNil)
315
+		} else {
316
+			c.Assert(containers2[i], checker.NotNil)
317
+		}
318
+	}
319
+
320
+	containers = containers2
321
+	for i := range containers {
322
+		toRemove = i
323
+	}
324
+
325
+	// try with killing process outside of docker
326
+	pidStr, err := containers[toRemove].Cmd("inspect", "-f", "{{.State.Pid}}", toRemove)
327
+	c.Assert(err, checker.IsNil)
328
+	pid, err := strconv.Atoi(strings.TrimSpace(pidStr))
329
+	c.Assert(err, checker.IsNil)
330
+	c.Assert(syscall.Kill(pid, syscall.SIGKILL), checker.IsNil)
331
+
332
+	time.Sleep(time.Second) // give some time to handle the signal
333
+
334
+	waitAndAssert(c, defaultReconciliationTimeout, reducedCheck(sumAsIntegers, d1.checkActiveContainerCount, d2.checkActiveContainerCount, d3.checkActiveContainerCount), checker.Equals, instances)
335
+
336
+	containers2 = getContainers()
337
+	c.Assert(containers2, checker.HasLen, instances)
338
+	for i := range containers {
339
+		if i == toRemove {
340
+			c.Assert(containers2[i], checker.IsNil)
341
+		} else {
342
+			c.Assert(containers2[i], checker.NotNil)
343
+		}
344
+	}
345
+}
346
+
347
+func (s *DockerSwarmSuite) TestApiSwarmRaftQuorum(c *check.C) {
348
+	d1 := s.AddDaemon(c, true, true)
349
+	d2 := s.AddDaemon(c, true, true)
350
+	d3 := s.AddDaemon(c, true, true)
351
+
352
+	d1.createService(c, simpleTestService)
353
+
354
+	c.Assert(d2.Stop(), checker.IsNil)
355
+
356
+	d1.createService(c, simpleTestService, func(s *swarm.Service) {
357
+		s.Spec.Name = "top1"
358
+	})
359
+
360
+	c.Assert(d3.Stop(), checker.IsNil)
361
+
362
+	var service swarm.Service
363
+	simpleTestService(&service)
364
+	service.Spec.Name = "top2"
365
+	status, out, err := d1.SockRequest("POST", "/services/create", service.Spec)
366
+	c.Assert(err, checker.IsNil)
367
+	c.Assert(status, checker.Equals, http.StatusInternalServerError, check.Commentf("deadline exceeded", string(out)))
368
+
369
+	c.Assert(d2.Start(), checker.IsNil)
370
+
371
+	d1.createService(c, simpleTestService, func(s *swarm.Service) {
372
+		s.Spec.Name = "top3"
373
+	})
374
+}
375
+
376
+func (s *DockerSwarmSuite) TestApiSwarmListNodes(c *check.C) {
377
+	d1 := s.AddDaemon(c, true, true)
378
+	d2 := s.AddDaemon(c, true, false)
379
+	d3 := s.AddDaemon(c, true, false)
380
+
381
+	nodes := d1.listNodes(c)
382
+	c.Assert(len(nodes), checker.Equals, 3, check.Commentf("nodes: %#v", nodes))
383
+
384
+loop0:
385
+	for _, n := range nodes {
386
+		for _, d := range []*SwarmDaemon{d1, d2, d3} {
387
+			if n.ID == d.NodeID {
388
+				continue loop0
389
+			}
390
+		}
391
+		c.Errorf("unknown nodeID %v", n.ID)
392
+	}
393
+}
394
+
395
+func (s *DockerSwarmSuite) TestApiSwarmNodeUpdate(c *check.C) {
396
+	d := s.AddDaemon(c, true, true)
397
+
398
+	nodes := d.listNodes(c)
399
+
400
+	d.updateNode(c, d.getNode(c, nodes[0].ID), func(n *swarm.Node) {
401
+		n.Spec.Availability = swarm.NodeAvailabilityPause
402
+	})
403
+
404
+	n := d.getNode(c, nodes[0].ID)
405
+	c.Assert(n.Spec.Availability, checker.Equals, swarm.NodeAvailabilityPause)
406
+}
407
+
408
+func (s *DockerSwarmSuite) TestApiSwarmNodeDrainPause(c *check.C) {
409
+	d1 := s.AddDaemon(c, true, true)
410
+	d2 := s.AddDaemon(c, true, false)
411
+
412
+	time.Sleep(1 * time.Second) // make sure all daemons are ready to accept tasks
413
+
414
+	// start a service, expect balanced distribution
415
+	instances := 8
416
+	id := d1.createService(c, simpleTestService, setInstances(instances))
417
+
418
+	waitAndAssert(c, defaultReconciliationTimeout, d1.checkActiveContainerCount, checker.GreaterThan, 0)
419
+	waitAndAssert(c, defaultReconciliationTimeout, d2.checkActiveContainerCount, checker.GreaterThan, 0)
420
+	waitAndAssert(c, defaultReconciliationTimeout, reducedCheck(sumAsIntegers, d1.checkActiveContainerCount, d2.checkActiveContainerCount), checker.Equals, instances)
421
+
422
+	// drain d2, all containers should move to d1
423
+	d1.updateNode(c, d1.getNode(c, d2.NodeID), func(n *swarm.Node) {
424
+		n.Spec.Availability = swarm.NodeAvailabilityDrain
425
+	})
426
+	waitAndAssert(c, defaultReconciliationTimeout, d1.checkActiveContainerCount, checker.Equals, instances)
427
+	waitAndAssert(c, defaultReconciliationTimeout, d2.checkActiveContainerCount, checker.Equals, 0)
428
+
429
+	// set d2 back to active
430
+	d1.updateNode(c, d1.getNode(c, d2.NodeID), func(n *swarm.Node) {
431
+		n.Spec.Availability = swarm.NodeAvailabilityActive
432
+	})
433
+
434
+	// change environment variable, resulting balanced rescheduling
435
+	d1.updateService(c, d1.getService(c, id), func(s *swarm.Service) {
436
+		s.Spec.TaskTemplate.ContainerSpec.Env = []string{"FOO=BAR"}
437
+		s.Spec.UpdateConfig = &swarm.UpdateConfig{
438
+			Parallelism: 2,
439
+			Delay:       250 * time.Millisecond,
440
+		}
441
+	})
442
+
443
+	// drained node first so we don't get any old containers
444
+	waitAndAssert(c, defaultReconciliationTimeout, d2.checkActiveContainerCount, checker.GreaterThan, 0)
445
+	waitAndAssert(c, defaultReconciliationTimeout, d1.checkActiveContainerCount, checker.GreaterThan, 0)
446
+	waitAndAssert(c, defaultReconciliationTimeout*2, reducedCheck(sumAsIntegers, d1.checkActiveContainerCount, d2.checkActiveContainerCount), checker.Equals, instances)
447
+
448
+	d2ContainerCount := len(d2.activeContainers())
449
+
450
+	// set d2 to paused, scale service up, only d1 gets new tasks
451
+	d1.updateNode(c, d1.getNode(c, d2.NodeID), func(n *swarm.Node) {
452
+		n.Spec.Availability = swarm.NodeAvailabilityPause
453
+	})
454
+
455
+	instances = 14
456
+	d1.updateService(c, d1.getService(c, id), setInstances(instances))
457
+
458
+	waitAndAssert(c, defaultReconciliationTimeout, d1.checkActiveContainerCount, checker.Equals, instances-d2ContainerCount)
459
+	waitAndAssert(c, defaultReconciliationTimeout, d2.checkActiveContainerCount, checker.Equals, d2ContainerCount)
460
+
461
+}
462
+
463
+func (s *DockerSwarmSuite) TestApiSwarmLeaveRemovesContainer(c *check.C) {
464
+	d := s.AddDaemon(c, true, true)
465
+
466
+	instances := 2
467
+	d.createService(c, simpleTestService, setInstances(instances))
468
+
469
+	id, err := d.Cmd("run", "-d", "busybox", "top")
470
+	c.Assert(err, checker.IsNil)
471
+	id = strings.TrimSpace(id)
472
+
473
+	waitAndAssert(c, defaultReconciliationTimeout, d.checkActiveContainerCount, checker.Equals, instances+1)
474
+
475
+	c.Assert(d.Leave(false), checker.NotNil)
476
+	c.Assert(d.Leave(true), checker.IsNil)
477
+
478
+	waitAndAssert(c, defaultReconciliationTimeout, d.checkActiveContainerCount, checker.Equals, 1)
479
+
480
+	id2, err := d.Cmd("ps", "-q")
481
+	c.Assert(err, checker.IsNil)
482
+	c.Assert(id, checker.HasPrefix, strings.TrimSpace(id2))
483
+}
484
+
485
+func (s *DockerSwarmSuite) TestApiSwarmManagerRestore(c *check.C) {
486
+	d1 := s.AddDaemon(c, true, true)
487
+
488
+	instances := 2
489
+	id := d1.createService(c, simpleTestService, setInstances(instances))
490
+
491
+	d1.getService(c, id)
492
+	d1.Stop()
493
+	d1.Start()
494
+	d1.getService(c, id)
495
+
496
+	d2 := s.AddDaemon(c, true, true)
497
+	d2.getService(c, id)
498
+	d2.Stop()
499
+	d2.Start()
500
+	d2.getService(c, id)
501
+
502
+	d3 := s.AddDaemon(c, true, true)
503
+	d3.getService(c, id)
504
+	d3.Stop()
505
+	d3.Start()
506
+	d3.getService(c, id)
507
+
508
+	d3.Kill()
509
+	time.Sleep(1 * time.Second) // time to handle signal
510
+	d3.Start()
511
+	d3.getService(c, id)
512
+}
513
+
514
+func (s *DockerSwarmSuite) TestApiSwarmScaleNoRollingUpdate(c *check.C) {
515
+	d := s.AddDaemon(c, true, true)
516
+
517
+	instances := 2
518
+	id := d.createService(c, simpleTestService, setInstances(instances))
519
+
520
+	waitAndAssert(c, defaultReconciliationTimeout, d.checkActiveContainerCount, checker.Equals, instances)
521
+	containers := d.activeContainers()
522
+	instances = 4
523
+	d.updateService(c, d.getService(c, id), setInstances(instances))
524
+	waitAndAssert(c, defaultReconciliationTimeout, d.checkActiveContainerCount, checker.Equals, instances)
525
+	containers2 := d.activeContainers()
526
+
527
+loop0:
528
+	for _, c1 := range containers {
529
+		for _, c2 := range containers2 {
530
+			if c1 == c2 {
531
+				continue loop0
532
+			}
533
+		}
534
+		c.Errorf("container %v not found in new set %#v", c1, containers2)
535
+	}
536
+}
537
+
538
+func simpleTestService(s *swarm.Service) {
539
+	var ureplicas uint64
540
+	ureplicas = 1
541
+	s.Spec = swarm.ServiceSpec{
542
+		TaskTemplate: swarm.TaskSpec{
543
+			ContainerSpec: swarm.ContainerSpec{
544
+				Image:   "busybox:latest",
545
+				Command: []string{"/bin/top"},
546
+			},
547
+		},
548
+		Mode: swarm.ServiceMode{
549
+			Replicated: &swarm.ReplicatedService{
550
+				Replicas: &ureplicas,
551
+			},
552
+		},
553
+	}
554
+	s.Spec.Name = "top"
555
+}
556
+
557
+func setInstances(replicas int) serviceConstructor {
558
+	ureplicas := uint64(replicas)
559
+	return func(s *swarm.Service) {
560
+		s.Spec.Mode = swarm.ServiceMode{
561
+			Replicated: &swarm.ReplicatedService{
562
+				Replicas: &ureplicas,
563
+			},
564
+		}
565
+	}
566
+}
567
+
568
+func setGlobalMode(s *swarm.Service) {
569
+	s.Spec.Mode = swarm.ServiceMode{
570
+		Global: &swarm.GlobalService{},
571
+	}
572
+}
... ...
@@ -34,7 +34,7 @@ func (s *DockerSuite) TestApiGetEnabledCors(c *check.C) {
34 34
 }
35 35
 
36 36
 func (s *DockerSuite) TestApiVersionStatusCode(c *check.C) {
37
-	conn, err := sockConn(time.Duration(10 * time.Second))
37
+	conn, err := sockConn(time.Duration(10*time.Second), "")
38 38
 	c.Assert(err, checker.IsNil)
39 39
 
40 40
 	client := httputil.NewClientConn(conn, nil)
... ...
@@ -124,8 +124,10 @@ func getTLSConfig() (*tls.Config, error) {
124 124
 	return tlsConfig, nil
125 125
 }
126 126
 
127
-func sockConn(timeout time.Duration) (net.Conn, error) {
128
-	daemon := daemonHost()
127
+func sockConn(timeout time.Duration, daemon string) (net.Conn, error) {
128
+	if daemon == "" {
129
+		daemon = daemonHost()
130
+	}
129 131
 	daemonURL, err := url.Parse(daemon)
130 132
 	if err != nil {
131 133
 		return nil, fmt.Errorf("could not parse url %q: %v", daemon, err)
... ...
@@ -168,7 +170,11 @@ func sockRequest(method, endpoint string, data interface{}) (int, []byte, error)
168 168
 }
169 169
 
170 170
 func sockRequestRaw(method, endpoint string, data io.Reader, ct string) (*http.Response, io.ReadCloser, error) {
171
-	req, client, err := newRequestClient(method, endpoint, data, ct)
171
+	return sockRequestRawToDaemon(method, endpoint, data, ct, "")
172
+}
173
+
174
+func sockRequestRawToDaemon(method, endpoint string, data io.Reader, ct, daemon string) (*http.Response, io.ReadCloser, error) {
175
+	req, client, err := newRequestClient(method, endpoint, data, ct, daemon)
172 176
 	if err != nil {
173 177
 		return nil, nil, err
174 178
 	}
... ...
@@ -187,7 +193,7 @@ func sockRequestRaw(method, endpoint string, data io.Reader, ct string) (*http.R
187 187
 }
188 188
 
189 189
 func sockRequestHijack(method, endpoint string, data io.Reader, ct string) (net.Conn, *bufio.Reader, error) {
190
-	req, client, err := newRequestClient(method, endpoint, data, ct)
190
+	req, client, err := newRequestClient(method, endpoint, data, ct, "")
191 191
 	if err != nil {
192 192
 		return nil, nil, err
193 193
 	}
... ...
@@ -197,8 +203,8 @@ func sockRequestHijack(method, endpoint string, data io.Reader, ct string) (net.
197 197
 	return conn, br, nil
198 198
 }
199 199
 
200
-func newRequestClient(method, endpoint string, data io.Reader, ct string) (*http.Request, *httputil.ClientConn, error) {
201
-	c, err := sockConn(time.Duration(10 * time.Second))
200
+func newRequestClient(method, endpoint string, data io.Reader, ct, daemon string) (*http.Request, *httputil.ClientConn, error) {
201
+	c, err := sockConn(time.Duration(10*time.Second), daemon)
202 202
 	if err != nil {
203 203
 		return nil, nil, fmt.Errorf("could not dial docker daemon: %v", err)
204 204
 	}
... ...
@@ -1514,3 +1520,50 @@ func getErrorMessage(c *check.C, body []byte) string {
1514 1514
 	c.Assert(json.Unmarshal(body, &resp), check.IsNil)
1515 1515
 	return strings.TrimSpace(resp.Message)
1516 1516
 }
1517
+
1518
+func waitAndAssert(c *check.C, timeout time.Duration, f checkF, checker check.Checker, args ...interface{}) {
1519
+	after := time.After(timeout)
1520
+	for {
1521
+		v, comment := f(c)
1522
+		assert, _ := checker.Check(append([]interface{}{v}, args...), checker.Info().Params)
1523
+		select {
1524
+		case <-after:
1525
+			assert = true
1526
+		default:
1527
+		}
1528
+		if assert {
1529
+			if comment != nil {
1530
+				args = append(args, comment)
1531
+			}
1532
+			c.Assert(v, checker, args...)
1533
+			return
1534
+		}
1535
+		time.Sleep(100 * time.Millisecond)
1536
+	}
1537
+}
1538
+
1539
+type checkF func(*check.C) (interface{}, check.CommentInterface)
1540
+type reducer func(...interface{}) interface{}
1541
+
1542
+func reducedCheck(r reducer, funcs ...checkF) checkF {
1543
+	return func(c *check.C) (interface{}, check.CommentInterface) {
1544
+		var values []interface{}
1545
+		var comments []string
1546
+		for _, f := range funcs {
1547
+			v, comment := f(c)
1548
+			values = append(values, v)
1549
+			if comment != nil {
1550
+				comments = append(comments, comment.CheckCommentString())
1551
+			}
1552
+		}
1553
+		return r(values...), check.Commentf("%v", strings.Join(comments, ", "))
1554
+	}
1555
+}
1556
+
1557
+func sumAsIntegers(vals ...interface{}) interface{} {
1558
+	var s int
1559
+	for _, v := range vals {
1560
+		s += v.(int)
1561
+	}
1562
+	return s
1563
+}