Browse code

Add support for sending down service Running and Desired task counts

Adds a new ServiceStatus field to the Service object, which includes the
running and desired task counts. This new field is gated behind a
"status" query parameter.

Signed-off-by: Drew Erny <drew.erny@docker.com>

Drew Erny authored on 2019/05/17 07:43:48
Showing 8 changed files
... ...
@@ -167,7 +167,19 @@ func (sr *swarmRouter) getServices(ctx context.Context, w http.ResponseWriter, r
167 167
 		return errdefs.InvalidParameter(err)
168 168
 	}
169 169
 
170
-	services, err := sr.backend.GetServices(basictypes.ServiceListOptions{Filters: filter})
170
+	// the status query parameter is only support in API versions >= 1.41. If
171
+	// the client is using a lesser version, ignore the parameter.
172
+	cliVersion := httputils.VersionFromContext(ctx)
173
+	var status bool
174
+	if value := r.URL.Query().Get("status"); value != "" && !versions.LessThan(cliVersion, "1.41") {
175
+		var err error
176
+		status, err = strconv.ParseBool(value)
177
+		if err != nil {
178
+			return errors.Wrapf(errdefs.InvalidParameter(err), "invalid value for status: %s", value)
179
+		}
180
+	}
181
+
182
+	services, err := sr.backend.GetServices(basictypes.ServiceListOptions{Filters: filter, Status: status})
171 183
 	if err != nil {
172 184
 		logrus.Errorf("Error getting services: %v", err)
173 185
 		return err
... ...
@@ -178,15 +190,21 @@ func (sr *swarmRouter) getServices(ctx context.Context, w http.ResponseWriter, r
178 178
 
179 179
 func (sr *swarmRouter) getService(ctx context.Context, w http.ResponseWriter, r *http.Request, vars map[string]string) error {
180 180
 	var insertDefaults bool
181
+
181 182
 	if value := r.URL.Query().Get("insertDefaults"); value != "" {
182 183
 		var err error
183 184
 		insertDefaults, err = strconv.ParseBool(value)
184 185
 		if err != nil {
185
-			err := fmt.Errorf("invalid value for insertDefaults: %s", value)
186 186
 			return errors.Wrapf(errdefs.InvalidParameter(err), "invalid value for insertDefaults: %s", value)
187 187
 		}
188 188
 	}
189 189
 
190
+	// you may note that there is no code here to handle the "status" query
191
+	// parameter, as in getServices. the Status field is not supported when
192
+	// retrieving an individual service because the Backend API changes
193
+	// required to accommodate it would be too disruptive, and because that
194
+	// field is so rarely needed as part of an individual service inspection.
195
+
190 196
 	service, err := sr.backend.GetService(vars["id"], insertDefaults)
191 197
 	if err != nil {
192 198
 		logrus.Errorf("Error getting service %s: %v", vars["id"], err)
... ...
@@ -3369,6 +3369,27 @@ definitions:
3369 3369
             format: "dateTime"
3370 3370
           Message:
3371 3371
             type: "string"
3372
+      ServiceStatus:
3373
+        description: |
3374
+          The status of the service's tasks. Provided only when requested as
3375
+          part of a ServiceList operation.
3376
+        type: "object"
3377
+        properties:
3378
+          RunningTasks:
3379
+            description: "The number of tasks for the service currently in the Running state"
3380
+            type: "integer"
3381
+            format: "uint64"
3382
+            example: 7
3383
+          DesiredTasks:
3384
+            description: |
3385
+              The number of tasks for the service desired to be running.
3386
+              For replicated services, this is the replica count from the
3387
+              service spec. For global services, this is computed by taking
3388
+              count of all tasks for the service with a Desired State other
3389
+              than Shutdown.
3390
+            type: "integer"
3391
+            format: "uint64"
3392
+            example: 10
3372 3393
     example:
3373 3394
       ID: "9mnpnzenvg8p8tdbtq4wvbkcz"
3374 3395
       Version:
... ...
@@ -9316,6 +9337,10 @@ paths:
9316 9316
             - `label=<service label>`
9317 9317
             - `mode=["replicated"|"global"]`
9318 9318
             - `name=<service name>`
9319
+        - name: "status"
9320
+          in: "query"
9321
+          type: "boolean"
9322
+          description: "Include service status, with count of running and desired tasks"
9319 9323
       tags: ["Service"]
9320 9324
   /services/create:
9321 9325
     post:
... ...
@@ -363,6 +363,10 @@ type ServiceUpdateOptions struct {
363 363
 // ServiceListOptions holds parameters to list services with.
364 364
 type ServiceListOptions struct {
365 365
 	Filters filters.Args
366
+
367
+	// Status indicates whether the server should include the service task
368
+	// count of running and desired tasks.
369
+	Status bool
366 370
 }
367 371
 
368 372
 // ServiceInspectOptions holds parameters related to the "service inspect"
... ...
@@ -10,6 +10,13 @@ type Service struct {
10 10
 	PreviousSpec *ServiceSpec  `json:",omitempty"`
11 11
 	Endpoint     Endpoint      `json:",omitempty"`
12 12
 	UpdateStatus *UpdateStatus `json:",omitempty"`
13
+
14
+	// ServiceStatus is an optional, extra field indicating the number of
15
+	// desired and running tasks. It is provided primarily as a shortcut to
16
+	// calculating these values client-side, which otherwise would require
17
+	// listing all tasks for a service, an operation that could be
18
+	// computation and network expensive.
19
+	ServiceStatus *ServiceStatus `json:",omitempty"`
13 20
 }
14 21
 
15 22
 // ServiceSpec represents the spec of a service.
... ...
@@ -122,3 +129,17 @@ type UpdateConfig struct {
122 122
 	// started, or the new task is started before the old task is shut down.
123 123
 	Order string
124 124
 }
125
+
126
+// ServiceStatus represents the number of running tasks in a service and the
127
+// number of tasks desired to be running.
128
+type ServiceStatus struct {
129
+	// RunningTasks is the number of tasks for the service actually in the
130
+	// Running state
131
+	RunningTasks uint64
132
+
133
+	// DesiredTasks is the number of tasks desired to be running by the
134
+	// service. For replicated services, this is the replica count. For global
135
+	// services, this is computed by taking the number of tasks with desired
136
+	// state of not-Shutdown.
137
+	DesiredTasks uint64
138
+}
... ...
@@ -23,6 +23,10 @@ func (cli *Client) ServiceList(ctx context.Context, options types.ServiceListOpt
23 23
 		query.Set("filters", filterJSON)
24 24
 	}
25 25
 
26
+	if options.Status {
27
+		query.Set("status", "true")
28
+	}
29
+
26 30
 	resp, err := cli.get(ctx, "/services", query, nil)
27 31
 	defer ensureReaderClosed(resp)
28 32
 	if err != nil {
... ...
@@ -77,6 +77,12 @@ func (c *Cluster) GetServices(options apitypes.ServiceListOptions) ([]types.Serv
77 77
 
78 78
 	services := make([]types.Service, 0, len(r.Services))
79 79
 
80
+	// if the  user requests the service statuses, we'll store the IDs needed
81
+	// in this slice
82
+	var serviceIDs []string
83
+	if options.Status {
84
+		serviceIDs = make([]string, 0, len(r.Services))
85
+	}
80 86
 	for _, service := range r.Services {
81 87
 		if options.Filters.Contains("mode") {
82 88
 			var mode string
... ...
@@ -91,6 +97,9 @@ func (c *Cluster) GetServices(options apitypes.ServiceListOptions) ([]types.Serv
91 91
 				continue
92 92
 			}
93 93
 		}
94
+		if options.Status {
95
+			serviceIDs = append(serviceIDs, service.ID)
96
+		}
94 97
 		svcs, err := convert.ServiceFromGRPC(*service)
95 98
 		if err != nil {
96 99
 			return nil, err
... ...
@@ -98,6 +107,49 @@ func (c *Cluster) GetServices(options apitypes.ServiceListOptions) ([]types.Serv
98 98
 		services = append(services, svcs)
99 99
 	}
100 100
 
101
+	if options.Status {
102
+		// Listing service statuses is a separate call because, while it is the
103
+		// most common UI operation, it is still just a UI operation, and it
104
+		// would be improper to include this data in swarm's Service object.
105
+		// We pay the cost with some complexity here, but this is still way
106
+		// more efficient than marshalling and unmarshalling all the JSON
107
+		// needed to list tasks and get this data otherwise client-side
108
+		resp, err := state.controlClient.ListServiceStatuses(
109
+			ctx,
110
+			&swarmapi.ListServiceStatusesRequest{Services: serviceIDs},
111
+			grpc.MaxCallRecvMsgSize(defaultRecvSizeForListResponse),
112
+		)
113
+		if err != nil {
114
+			return nil, err
115
+		}
116
+
117
+		// we'll need to match up statuses in the response with the services in
118
+		// the list operation. if we did this by operating on two lists, the
119
+		// result would be quadratic. instead, make a mapping of service IDs to
120
+		// service statuses so that this is roughly linear. additionally,
121
+		// convert the status response to an engine api service status here.
122
+		serviceMap := map[string]*types.ServiceStatus{}
123
+		for _, status := range resp.Statuses {
124
+			serviceMap[status.ServiceID] = &types.ServiceStatus{
125
+				RunningTasks: status.RunningTasks,
126
+				DesiredTasks: status.DesiredTasks,
127
+			}
128
+		}
129
+
130
+		// because this is a list of values and not pointers, make sure we
131
+		// actually alter the value when iterating.
132
+		for i, service := range services {
133
+			// the return value of the ListServiceStatuses operation is
134
+			// guaranteed to contain a value in the response for every argument
135
+			// in the request, so we can safely do this assignment. and even if
136
+			// it wasn't, and the service ID was for some reason absent from
137
+			// this map, the resulting value of service.Status would just be
138
+			// nil -- the same thing it was before
139
+			service.ServiceStatus = serviceMap[service.ID]
140
+			services[i] = service
141
+		}
142
+	}
143
+
101 144
 	return services, nil
102 145
 }
103 146
 
... ...
@@ -31,6 +31,9 @@ keywords: "API, Docker, rcli, REST, documentation"
31 31
 * `GET /info` now  returns an `OSVersion` field, containing the operating system's
32 32
   version. This change is not versioned, and affects all API versions if the daemon
33 33
   has this patch.
34
+* `GET /services` now accepts query parameter `status`. When set `true`,
35
+  services returned will include `ServiceStatus`, which provides Desired and
36
+  Running task counts for the service.
34 37
 
35 38
 ## v1.40 API changes
36 39
 
37 40
new file mode 100644
... ...
@@ -0,0 +1,108 @@
0
+package service // import "github.com/docker/docker/integration/service"
1
+
2
+import (
3
+	"context"
4
+	"fmt"
5
+	"testing"
6
+
7
+	"github.com/docker/docker/api/types"
8
+	"github.com/docker/docker/api/types/filters"
9
+	swarmtypes "github.com/docker/docker/api/types/swarm"
10
+	"github.com/docker/docker/api/types/versions"
11
+	"github.com/docker/docker/integration/internal/swarm"
12
+	"gotest.tools/assert"
13
+	is "gotest.tools/assert/cmp"
14
+	"gotest.tools/poll"
15
+	"gotest.tools/skip"
16
+)
17
+
18
+// TestServiceListWithStatuses tests that performing a ServiceList operation
19
+// correctly uses the Status parameter, and that the resulting response
20
+// contains correct service statuses.
21
+//
22
+// NOTE(dperny): because it's a pain to elicit the behavior of an unconverged
23
+// service reliably, I'm not testing that an unconverged service returns X
24
+// running and Y desired tasks. Instead, I'm just going to trust that I can
25
+// successfully assign a value to another value without screwing it up. The
26
+// logic for computing service statuses is in swarmkit anyway, not in the
27
+// engine, and is well-tested there, so this test just needs to make sure that
28
+// statuses get correctly associated with the right services.
29
+func TestServiceListWithStatuses(t *testing.T) {
30
+	skip.If(t, testEnv.IsRemoteDaemon)
31
+	skip.If(t, testEnv.DaemonInfo.OSType == "windows")
32
+	// statuses were added in API version 1.41
33
+	skip.If(t, versions.LessThan(testEnv.DaemonInfo.ServerVersion, "1.41"))
34
+	defer setupTest(t)()
35
+	d := swarm.NewSwarm(t, testEnv)
36
+	defer d.Stop(t)
37
+	client := d.NewClientT(t)
38
+	defer client.Close()
39
+
40
+	ctx := context.Background()
41
+
42
+	serviceCount := 3
43
+	// create some services.
44
+	for i := 0; i < serviceCount; i++ {
45
+		spec := fullSwarmServiceSpec(fmt.Sprintf("test-list-%d", i), uint64(i+1))
46
+		// for whatever reason, the args "-u root", when included, cause these
47
+		// tasks to fail and exit. instead, we'll just pass no args, which
48
+		// works.
49
+		spec.TaskTemplate.ContainerSpec.Args = []string{}
50
+		resp, err := client.ServiceCreate(ctx, spec, types.ServiceCreateOptions{
51
+			QueryRegistry: false,
52
+		})
53
+		assert.NilError(t, err)
54
+		id := resp.ID
55
+		// we need to wait specifically for the tasks to be running, which the
56
+		// serviceContainerCount function does not do. instead, we'll use a
57
+		// bespoke closure right here.
58
+		poll.WaitOn(t, func(log poll.LogT) poll.Result {
59
+			filter := filters.NewArgs()
60
+			filter.Add("service", id)
61
+			tasks, err := client.TaskList(context.Background(), types.TaskListOptions{
62
+				Filters: filter,
63
+			})
64
+
65
+			running := 0
66
+			for _, task := range tasks {
67
+				if task.Status.State == swarmtypes.TaskStateRunning {
68
+					running++
69
+				}
70
+			}
71
+
72
+			switch {
73
+			case err != nil:
74
+				return poll.Error(err)
75
+			case running == i+1:
76
+				return poll.Success()
77
+			default:
78
+				return poll.Continue(
79
+					"running task count %d (%d total), waiting for %d",
80
+					running, len(tasks), i+1,
81
+				)
82
+			}
83
+		})
84
+	}
85
+
86
+	// now, let's do the list operation with no status arg set.
87
+	resp, err := client.ServiceList(ctx, types.ServiceListOptions{})
88
+	assert.NilError(t, err)
89
+	assert.Check(t, is.Len(resp, serviceCount))
90
+	for _, service := range resp {
91
+		assert.Check(t, is.Nil(service.ServiceStatus))
92
+	}
93
+
94
+	// now try again, but with Status: true. This time, we should have statuses
95
+	resp, err = client.ServiceList(ctx, types.ServiceListOptions{Status: true})
96
+	assert.NilError(t, err)
97
+	assert.Check(t, is.Len(resp, serviceCount))
98
+	for _, service := range resp {
99
+		replicas := *service.Spec.Mode.Replicated.Replicas
100
+
101
+		assert.Assert(t, service.ServiceStatus != nil)
102
+		// Use assert.Check to not fail out of the test if this fails
103
+		assert.Check(t, is.Equal(service.ServiceStatus.DesiredTasks, replicas))
104
+		assert.Check(t, is.Equal(service.ServiceStatus.RunningTasks, replicas))
105
+	}
106
+
107
+}