Browse code

Service update failure thresholds and rollback

This adds support for two enhancements to swarm service rolling updates:

- Failure thresholds: In Docker 1.12, a service update could be set up
to either pause or continue after a single failure occurs. This adds
an --update-max-failure-ratio flag that controls how many tasks need to
fail to update for the update as a whole to be considered a failure. A
counterpart flag, --update-monitor, controls how long to monitor each
task for a failure after starting it during the update.

- Rollback flag: service update --rollback reverts the service to its
previous version. If a service update encounters task failures, or
fails to function properly for some other reason, the user can roll back
the update.

SwarmKit also has the ability to roll back updates automatically after
hitting the failure thresholds, but we've decided not to expose this in
the Docker API/CLI for now, favoring a workflow where the decision to
roll back is always made by an admin. Depending on user feedback, we may
add a "rollback" option to --update-failure-action in the future.

Signed-off-by: Aaron Lehmann <aaron.lehmann@docker.com>

Aaron Lehmann authored on 2016/09/03 06:12:05
Showing 17 changed files
... ...
@@ -15,7 +15,7 @@ type Backend interface {
15 15
 	GetServices(basictypes.ServiceListOptions) ([]types.Service, error)
16 16
 	GetService(string) (types.Service, error)
17 17
 	CreateService(types.ServiceSpec, string) (string, error)
18
-	UpdateService(string, uint64, types.ServiceSpec, string) error
18
+	UpdateService(string, uint64, types.ServiceSpec, string, string) error
19 19
 	RemoveService(string) error
20 20
 	GetNodes(basictypes.NodeListOptions) ([]types.Node, error)
21 21
 	GetNode(string) (types.Node, error)
... ...
@@ -156,7 +156,9 @@ func (sr *swarmRouter) updateService(ctx context.Context, w http.ResponseWriter,
156 156
 	// Get returns "" if the header does not exist
157 157
 	encodedAuth := r.Header.Get("X-Registry-Auth")
158 158
 
159
-	if err := sr.backend.UpdateService(vars["id"], version, service, encodedAuth); err != nil {
159
+	registryAuthFrom := r.URL.Query().Get("registryAuthFrom")
160
+
161
+	if err := sr.backend.UpdateService(vars["id"], version, service, encodedAuth, registryAuthFrom); err != nil {
160 162
 		logrus.Errorf("Error updating service %s: %v", vars["id"], err)
161 163
 		return err
162 164
 	}
... ...
@@ -90,16 +90,16 @@ type UpdateConfig struct {
90 90
 	// be used.
91 91
 	Monitor time.Duration `json:",omitempty"`
92 92
 
93
-	// AllowedFailureFraction is the fraction of tasks that may fail during
93
+	// MaxFailureRatio is the fraction of tasks that may fail during
94 94
 	// an update before the failure action is invoked. Any task created by
95 95
 	// the current update which ends up in one of the states REJECTED,
96 96
 	// COMPLETED or FAILED within Monitor from its creation counts as a
97 97
 	// failure. The number of failures is divided by the number of tasks
98 98
 	// being updated, and if this fraction is greater than
99
-	// AllowedFailureFraction, the failure action is invoked.
99
+	// MaxFailureRatio, the failure action is invoked.
100 100
 	//
101 101
 	// If the failure action is CONTINUE, there is no effect.
102 102
 	// If the failure action is PAUSE, no more tasks will be updated until
103 103
 	// another update is started.
104
-	AllowedFailureFraction float32
104
+	MaxFailureRatio float32
105 105
 }
... ...
@@ -41,10 +41,14 @@ Placement:
41 41
 {{- if .HasUpdateConfig }}
42 42
 UpdateConfig:
43 43
  Parallelism:	{{ .UpdateParallelism }}
44
-{{- if .HasUpdateDelay -}}
44
+{{- if .HasUpdateDelay}}
45 45
  Delay:		{{ .UpdateDelay }}
46 46
 {{- end }}
47 47
  On failure:	{{ .UpdateOnFailure }}
48
+{{- if .HasUpdateMonitor}}
49
+ Monitoring Period: {{ .UpdateMonitor }}
50
+{{- end }}
51
+ Max failure ratio: {{ .UpdateMaxFailureRatio }}
48 52
 {{- end }}
49 53
 ContainerSpec:
50 54
  Image:		{{ .ContainerImage }}
... ...
@@ -218,6 +222,18 @@ func (ctx *serviceInspectContext) UpdateOnFailure() string {
218 218
 	return ctx.Service.Spec.UpdateConfig.FailureAction
219 219
 }
220 220
 
221
+func (ctx *serviceInspectContext) HasUpdateMonitor() bool {
222
+	return ctx.Service.Spec.UpdateConfig.Monitor.Nanoseconds() > 0
223
+}
224
+
225
+func (ctx *serviceInspectContext) UpdateMonitor() time.Duration {
226
+	return ctx.Service.Spec.UpdateConfig.Monitor
227
+}
228
+
229
+func (ctx *serviceInspectContext) UpdateMaxFailureRatio() float32 {
230
+	return ctx.Service.Spec.UpdateConfig.MaxFailureRatio
231
+}
232
+
221 233
 func (ctx *serviceInspectContext) ContainerImage() string {
222 234
 	return ctx.Service.Spec.TaskTemplate.ContainerSpec.Image
223 235
 }
... ...
@@ -267,9 +267,11 @@ func (m *MountOpt) Value() []mounttypes.Mount {
267 267
 }
268 268
 
269 269
 type updateOptions struct {
270
-	parallelism uint64
271
-	delay       time.Duration
272
-	onFailure   string
270
+	parallelism     uint64
271
+	delay           time.Duration
272
+	monitor         time.Duration
273
+	onFailure       string
274
+	maxFailureRatio float32
273 275
 }
274 276
 
275 277
 type resourceOptions struct {
... ...
@@ -458,9 +460,11 @@ func (opts *serviceOptions) ToService() (swarm.ServiceSpec, error) {
458 458
 		Networks: convertNetworks(opts.networks),
459 459
 		Mode:     swarm.ServiceMode{},
460 460
 		UpdateConfig: &swarm.UpdateConfig{
461
-			Parallelism:   opts.update.parallelism,
462
-			Delay:         opts.update.delay,
463
-			FailureAction: opts.update.onFailure,
461
+			Parallelism:     opts.update.parallelism,
462
+			Delay:           opts.update.delay,
463
+			Monitor:         opts.update.monitor,
464
+			FailureAction:   opts.update.onFailure,
465
+			MaxFailureRatio: opts.update.maxFailureRatio,
464 466
 		},
465 467
 		EndpointSpec: opts.endpoint.ToEndpointSpec(),
466 468
 	}
... ...
@@ -507,7 +511,9 @@ func addServiceFlags(cmd *cobra.Command, opts *serviceOptions) {
507 507
 
508 508
 	flags.Uint64Var(&opts.update.parallelism, flagUpdateParallelism, 1, "Maximum number of tasks updated simultaneously (0 to update all at once)")
509 509
 	flags.DurationVar(&opts.update.delay, flagUpdateDelay, time.Duration(0), "Delay between updates")
510
+	flags.DurationVar(&opts.update.monitor, flagUpdateMonitor, time.Duration(0), "Duration after each task update to monitor for failure")
510 511
 	flags.StringVar(&opts.update.onFailure, flagUpdateFailureAction, "pause", "Action on update failure (pause|continue)")
512
+	flags.Float32Var(&opts.update.maxFailureRatio, flagUpdateMaxFailureRatio, 0, "Failure rate to tolerate during an update")
511 513
 
512 514
 	flags.StringVar(&opts.endpoint.mode, flagEndpointMode, "", "Endpoint mode (vip or dnsrr)")
513 515
 
... ...
@@ -518,46 +524,48 @@ func addServiceFlags(cmd *cobra.Command, opts *serviceOptions) {
518 518
 }
519 519
 
520 520
 const (
521
-	flagConstraint           = "constraint"
522
-	flagConstraintRemove     = "constraint-rm"
523
-	flagConstraintAdd        = "constraint-add"
524
-	flagContainerLabel       = "container-label"
525
-	flagContainerLabelRemove = "container-label-rm"
526
-	flagContainerLabelAdd    = "container-label-add"
527
-	flagEndpointMode         = "endpoint-mode"
528
-	flagEnv                  = "env"
529
-	flagEnvRemove            = "env-rm"
530
-	flagEnvAdd               = "env-add"
531
-	flagGroupAdd             = "group-add"
532
-	flagGroupRemove          = "group-rm"
533
-	flagLabel                = "label"
534
-	flagLabelRemove          = "label-rm"
535
-	flagLabelAdd             = "label-add"
536
-	flagLimitCPU             = "limit-cpu"
537
-	flagLimitMemory          = "limit-memory"
538
-	flagMode                 = "mode"
539
-	flagMount                = "mount"
540
-	flagMountRemove          = "mount-rm"
541
-	flagMountAdd             = "mount-add"
542
-	flagName                 = "name"
543
-	flagNetwork              = "network"
544
-	flagPublish              = "publish"
545
-	flagPublishRemove        = "publish-rm"
546
-	flagPublishAdd           = "publish-add"
547
-	flagReplicas             = "replicas"
548
-	flagReserveCPU           = "reserve-cpu"
549
-	flagReserveMemory        = "reserve-memory"
550
-	flagRestartCondition     = "restart-condition"
551
-	flagRestartDelay         = "restart-delay"
552
-	flagRestartMaxAttempts   = "restart-max-attempts"
553
-	flagRestartWindow        = "restart-window"
554
-	flagStopGracePeriod      = "stop-grace-period"
555
-	flagUpdateDelay          = "update-delay"
556
-	flagUpdateFailureAction  = "update-failure-action"
557
-	flagUpdateParallelism    = "update-parallelism"
558
-	flagUser                 = "user"
559
-	flagWorkdir              = "workdir"
560
-	flagRegistryAuth         = "with-registry-auth"
561
-	flagLogDriver            = "log-driver"
562
-	flagLogOpt               = "log-opt"
521
+	flagConstraint            = "constraint"
522
+	flagConstraintRemove      = "constraint-rm"
523
+	flagConstraintAdd         = "constraint-add"
524
+	flagContainerLabel        = "container-label"
525
+	flagContainerLabelRemove  = "container-label-rm"
526
+	flagContainerLabelAdd     = "container-label-add"
527
+	flagEndpointMode          = "endpoint-mode"
528
+	flagEnv                   = "env"
529
+	flagEnvRemove             = "env-rm"
530
+	flagEnvAdd                = "env-add"
531
+	flagGroupAdd              = "group-add"
532
+	flagGroupRemove           = "group-rm"
533
+	flagLabel                 = "label"
534
+	flagLabelRemove           = "label-rm"
535
+	flagLabelAdd              = "label-add"
536
+	flagLimitCPU              = "limit-cpu"
537
+	flagLimitMemory           = "limit-memory"
538
+	flagMode                  = "mode"
539
+	flagMount                 = "mount"
540
+	flagMountRemove           = "mount-rm"
541
+	flagMountAdd              = "mount-add"
542
+	flagName                  = "name"
543
+	flagNetwork               = "network"
544
+	flagPublish               = "publish"
545
+	flagPublishRemove         = "publish-rm"
546
+	flagPublishAdd            = "publish-add"
547
+	flagReplicas              = "replicas"
548
+	flagReserveCPU            = "reserve-cpu"
549
+	flagReserveMemory         = "reserve-memory"
550
+	flagRestartCondition      = "restart-condition"
551
+	flagRestartDelay          = "restart-delay"
552
+	flagRestartMaxAttempts    = "restart-max-attempts"
553
+	flagRestartWindow         = "restart-window"
554
+	flagStopGracePeriod       = "stop-grace-period"
555
+	flagUpdateDelay           = "update-delay"
556
+	flagUpdateFailureAction   = "update-failure-action"
557
+	flagUpdateMaxFailureRatio = "update-max-failure-ratio"
558
+	flagUpdateMonitor         = "update-monitor"
559
+	flagUpdateParallelism     = "update-parallelism"
560
+	flagUser                  = "user"
561
+	flagWorkdir               = "workdir"
562
+	flagRegistryAuth          = "with-registry-auth"
563
+	flagLogDriver             = "log-driver"
564
+	flagLogOpt                = "log-opt"
563 565
 )
... ...
@@ -36,6 +36,7 @@ func newUpdateCommand(dockerCli *command.DockerCli) *cobra.Command {
36 36
 	flags := cmd.Flags()
37 37
 	flags.String("image", "", "Service image tag")
38 38
 	flags.String("args", "", "Service command args")
39
+	flags.Bool("rollback", false, "Rollback to previous specification")
39 40
 	addServiceFlags(cmd, opts)
40 41
 
41 42
 	flags.Var(newListOptsVar(), flagEnvRemove, "Remove an environment variable")
... ...
@@ -68,7 +69,20 @@ func runUpdate(dockerCli *command.DockerCli, flags *pflag.FlagSet, serviceID str
68 68
 		return err
69 69
 	}
70 70
 
71
-	err = updateService(flags, &service.Spec)
71
+	rollback, err := flags.GetBool("rollback")
72
+	if err != nil {
73
+		return err
74
+	}
75
+
76
+	spec := &service.Spec
77
+	if rollback {
78
+		spec = service.PreviousSpec
79
+		if spec == nil {
80
+			return fmt.Errorf("service does not have a previous specification to roll back to")
81
+		}
82
+	}
83
+
84
+	err = updateService(flags, spec)
72 85
 	if err != nil {
73 86
 		return err
74 87
 	}
... ...
@@ -81,15 +95,19 @@ func runUpdate(dockerCli *command.DockerCli, flags *pflag.FlagSet, serviceID str
81 81
 	if sendAuth {
82 82
 		// Retrieve encoded auth token from the image reference
83 83
 		// This would be the old image if it didn't change in this update
84
-		image := service.Spec.TaskTemplate.ContainerSpec.Image
84
+		image := spec.TaskTemplate.ContainerSpec.Image
85 85
 		encodedAuth, err := command.RetrieveAuthTokenFromImage(ctx, dockerCli, image)
86 86
 		if err != nil {
87 87
 			return err
88 88
 		}
89 89
 		updateOpts.EncodedRegistryAuth = encodedAuth
90
+	} else if rollback {
91
+		updateOpts.RegistryAuthFrom = types.RegistryAuthFromPreviousSpec
92
+	} else {
93
+		updateOpts.RegistryAuthFrom = types.RegistryAuthFromSpec
90 94
 	}
91 95
 
92
-	err = apiClient.ServiceUpdate(ctx, service.ID, service.Version, service.Spec, updateOpts)
96
+	err = apiClient.ServiceUpdate(ctx, service.ID, service.Version, *spec, updateOpts)
93 97
 	if err != nil {
94 98
 		return err
95 99
 	}
... ...
@@ -111,6 +129,12 @@ func updateService(flags *pflag.FlagSet, spec *swarm.ServiceSpec) error {
111 111
 		}
112 112
 	}
113 113
 
114
+	updateFloat32 := func(flag string, field *float32) {
115
+		if flags.Changed(flag) {
116
+			*field, _ = flags.GetFloat32(flag)
117
+		}
118
+	}
119
+
114 120
 	updateDuration := func(flag string, field *time.Duration) {
115 121
 		if flags.Changed(flag) {
116 122
 			*field, _ = flags.GetDuration(flag)
... ...
@@ -195,13 +219,15 @@ func updateService(flags *pflag.FlagSet, spec *swarm.ServiceSpec) error {
195 195
 		return err
196 196
 	}
197 197
 
198
-	if anyChanged(flags, flagUpdateParallelism, flagUpdateDelay, flagUpdateFailureAction) {
198
+	if anyChanged(flags, flagUpdateParallelism, flagUpdateDelay, flagUpdateMonitor, flagUpdateFailureAction, flagUpdateMaxFailureRatio) {
199 199
 		if spec.UpdateConfig == nil {
200 200
 			spec.UpdateConfig = &swarm.UpdateConfig{}
201 201
 		}
202 202
 		updateUint64(flagUpdateParallelism, &spec.UpdateConfig.Parallelism)
203 203
 		updateDuration(flagUpdateDelay, &spec.UpdateConfig.Delay)
204
+		updateDuration(flagUpdateMonitor, &spec.UpdateConfig.Monitor)
204 205
 		updateString(flagUpdateFailureAction, &spec.UpdateConfig.FailureAction)
206
+		updateFloat32(flagUpdateMaxFailureRatio, &spec.UpdateConfig.MaxFailureRatio)
205 207
 	}
206 208
 
207 209
 	if flags.Changed(flagEndpointMode) {
... ...
@@ -1760,9 +1760,12 @@ _docker_service_update() {
1760 1760
 		--restart-delay
1761 1761
 		--restart-max-attempts
1762 1762
 		--restart-window
1763
+		--rollback
1763 1764
 		--stop-grace-period
1764 1765
 		--update-delay
1765 1766
 		--update-failure-action
1767
+		--update-max-failure-ratio
1768
+		--update-monitor
1766 1769
 		--update-parallelism
1767 1770
 		--user -u
1768 1771
 		--workdir -w
... ...
@@ -1108,6 +1108,8 @@ __docker_service_subcommand() {
1108 1108
         "($help)--stop-grace-period=[Time to wait before force killing a container]:grace period: "
1109 1109
         "($help)--update-delay=[Delay between updates]:delay: "
1110 1110
         "($help)--update-failure-action=[Action on update failure]:mode:(pause continue)"
1111
+        "($help)--update-max-failure-ratio=[Failure rate to tolerate during an update]:fraction: "
1112
+        "($help)--update-monitor=[Duration after each task update to monitor for failure]:window: "
1111 1113
         "($help)--update-parallelism=[Maximum number of tasks updated simultaneously]:number: "
1112 1114
         "($help -u --user)"{-u=,--user=}"[Username or UID]:user:_users"
1113 1115
         "($help)--with-registry-auth[Send registry authentication details to swarm agents]"
... ...
@@ -1185,6 +1187,7 @@ __docker_service_subcommand() {
1185 1185
                 "($help)*--container-label-rm=[Remove a container label by its key]:label: " \
1186 1186
                 "($help)*--group-rm=[Remove previously added user groups from the container]:group:_groups" \
1187 1187
                 "($help)--image=[Service image tag]:image:__docker_repositories" \
1188
+                "($help)--rollback[Rollback to previous specification]" \
1188 1189
                 "($help -)1:service:__docker_complete_services" && ret=0
1189 1190
             ;;
1190 1191
         (help)
... ...
@@ -913,7 +913,7 @@ func (c *Cluster) GetService(input string) (types.Service, error) {
913 913
 }
914 914
 
915 915
 // UpdateService updates existing service to match new properties.
916
-func (c *Cluster) UpdateService(serviceIDOrName string, version uint64, spec types.ServiceSpec, encodedAuth string) error {
916
+func (c *Cluster) UpdateService(serviceIDOrName string, version uint64, spec types.ServiceSpec, encodedAuth string, registryAuthFrom string) error {
917 917
 	c.RLock()
918 918
 	defer c.RUnlock()
919 919
 
... ...
@@ -948,7 +948,18 @@ func (c *Cluster) UpdateService(serviceIDOrName string, version uint64, spec typ
948 948
 	} else {
949 949
 		// this is needed because if the encodedAuth isn't being updated then we
950 950
 		// shouldn't lose it, and continue to use the one that was already present
951
-		ctnr := currentService.Spec.Task.GetContainer()
951
+		var ctnr *swarmapi.ContainerSpec
952
+		switch registryAuthFrom {
953
+		case apitypes.RegistryAuthFromSpec, "":
954
+			ctnr = currentService.Spec.Task.GetContainer()
955
+		case apitypes.RegistryAuthFromPreviousSpec:
956
+			if currentService.PreviousSpec == nil {
957
+				return fmt.Errorf("service does not have a previous spec")
958
+			}
959
+			ctnr = currentService.PreviousSpec.Task.GetContainer()
960
+		default:
961
+			return fmt.Errorf("unsupported registryAuthFromValue")
962
+		}
952 963
 		if ctnr == nil {
953 964
 			return fmt.Errorf("service does not use container tasks")
954 965
 		}
... ...
@@ -12,8 +12,43 @@ import (
12 12
 
13 13
 // ServiceFromGRPC converts a grpc Service to a Service.
14 14
 func ServiceFromGRPC(s swarmapi.Service) types.Service {
15
-	spec := s.Spec
16
-	containerConfig := spec.Task.Runtime.(*swarmapi.TaskSpec_Container).Container
15
+	service := types.Service{
16
+		ID:           s.ID,
17
+		Spec:         *serviceSpecFromGRPC(&s.Spec),
18
+		PreviousSpec: serviceSpecFromGRPC(s.PreviousSpec),
19
+
20
+		Endpoint: endpointFromGRPC(s.Endpoint),
21
+	}
22
+
23
+	// Meta
24
+	service.Version.Index = s.Meta.Version.Index
25
+	service.CreatedAt, _ = ptypes.Timestamp(s.Meta.CreatedAt)
26
+	service.UpdatedAt, _ = ptypes.Timestamp(s.Meta.UpdatedAt)
27
+
28
+	// UpdateStatus
29
+	service.UpdateStatus = types.UpdateStatus{}
30
+	if s.UpdateStatus != nil {
31
+		switch s.UpdateStatus.State {
32
+		case swarmapi.UpdateStatus_UPDATING:
33
+			service.UpdateStatus.State = types.UpdateStateUpdating
34
+		case swarmapi.UpdateStatus_PAUSED:
35
+			service.UpdateStatus.State = types.UpdateStatePaused
36
+		case swarmapi.UpdateStatus_COMPLETED:
37
+			service.UpdateStatus.State = types.UpdateStateCompleted
38
+		}
39
+
40
+		service.UpdateStatus.StartedAt, _ = ptypes.Timestamp(s.UpdateStatus.StartedAt)
41
+		service.UpdateStatus.CompletedAt, _ = ptypes.Timestamp(s.UpdateStatus.CompletedAt)
42
+		service.UpdateStatus.Message = s.UpdateStatus.Message
43
+	}
44
+
45
+	return service
46
+}
47
+
48
+func serviceSpecFromGRPC(spec *swarmapi.ServiceSpec) *types.ServiceSpec {
49
+	if spec == nil {
50
+		return nil
51
+	}
17 52
 
18 53
 	serviceNetworks := make([]types.NetworkAttachmentConfig, 0, len(spec.Networks))
19 54
 	for _, n := range spec.Networks {
... ...
@@ -25,78 +60,57 @@ func ServiceFromGRPC(s swarmapi.Service) types.Service {
25 25
 		taskNetworks = append(taskNetworks, types.NetworkAttachmentConfig{Target: n.Target, Aliases: n.Aliases})
26 26
 	}
27 27
 
28
-	service := types.Service{
29
-		ID: s.ID,
30
-
31
-		Spec: types.ServiceSpec{
32
-			TaskTemplate: types.TaskSpec{
33
-				ContainerSpec: containerSpecFromGRPC(containerConfig),
34
-				Resources:     resourcesFromGRPC(s.Spec.Task.Resources),
35
-				RestartPolicy: restartPolicyFromGRPC(s.Spec.Task.Restart),
36
-				Placement:     placementFromGRPC(s.Spec.Task.Placement),
37
-				LogDriver:     driverFromGRPC(s.Spec.Task.LogDriver),
38
-				Networks:      taskNetworks,
39
-			},
40
-
41
-			Networks:     serviceNetworks,
42
-			EndpointSpec: endpointSpecFromGRPC(s.Spec.Endpoint),
28
+	containerConfig := spec.Task.Runtime.(*swarmapi.TaskSpec_Container).Container
29
+	convertedSpec := &types.ServiceSpec{
30
+		Annotations: types.Annotations{
31
+			Name:   spec.Annotations.Name,
32
+			Labels: spec.Annotations.Labels,
43 33
 		},
44
-		Endpoint: endpointFromGRPC(s.Endpoint),
45
-	}
46 34
 
47
-	// Meta
48
-	service.Version.Index = s.Meta.Version.Index
49
-	service.CreatedAt, _ = ptypes.Timestamp(s.Meta.CreatedAt)
50
-	service.UpdatedAt, _ = ptypes.Timestamp(s.Meta.UpdatedAt)
35
+		TaskTemplate: types.TaskSpec{
36
+			ContainerSpec: containerSpecFromGRPC(containerConfig),
37
+			Resources:     resourcesFromGRPC(spec.Task.Resources),
38
+			RestartPolicy: restartPolicyFromGRPC(spec.Task.Restart),
39
+			Placement:     placementFromGRPC(spec.Task.Placement),
40
+			LogDriver:     driverFromGRPC(spec.Task.LogDriver),
41
+			Networks:      taskNetworks,
42
+		},
51 43
 
52
-	// Annotations
53
-	service.Spec.Name = s.Spec.Annotations.Name
54
-	service.Spec.Labels = s.Spec.Annotations.Labels
44
+		Networks:     serviceNetworks,
45
+		EndpointSpec: endpointSpecFromGRPC(spec.Endpoint),
46
+	}
55 47
 
56 48
 	// UpdateConfig
57
-	if s.Spec.Update != nil {
58
-		service.Spec.UpdateConfig = &types.UpdateConfig{
59
-			Parallelism: s.Spec.Update.Parallelism,
49
+	if spec.Update != nil {
50
+		convertedSpec.UpdateConfig = &types.UpdateConfig{
51
+			Parallelism:     spec.Update.Parallelism,
52
+			MaxFailureRatio: spec.Update.MaxFailureRatio,
60 53
 		}
61 54
 
62
-		service.Spec.UpdateConfig.Delay, _ = ptypes.Duration(&s.Spec.Update.Delay)
55
+		convertedSpec.UpdateConfig.Delay, _ = ptypes.Duration(&spec.Update.Delay)
56
+		if spec.Update.Monitor != nil {
57
+			convertedSpec.UpdateConfig.Monitor, _ = ptypes.Duration(spec.Update.Monitor)
58
+		}
63 59
 
64
-		switch s.Spec.Update.FailureAction {
60
+		switch spec.Update.FailureAction {
65 61
 		case swarmapi.UpdateConfig_PAUSE:
66
-			service.Spec.UpdateConfig.FailureAction = types.UpdateFailureActionPause
62
+			convertedSpec.UpdateConfig.FailureAction = types.UpdateFailureActionPause
67 63
 		case swarmapi.UpdateConfig_CONTINUE:
68
-			service.Spec.UpdateConfig.FailureAction = types.UpdateFailureActionContinue
64
+			convertedSpec.UpdateConfig.FailureAction = types.UpdateFailureActionContinue
69 65
 		}
70 66
 	}
71 67
 
72 68
 	// Mode
73
-	switch t := s.Spec.GetMode().(type) {
69
+	switch t := spec.GetMode().(type) {
74 70
 	case *swarmapi.ServiceSpec_Global:
75
-		service.Spec.Mode.Global = &types.GlobalService{}
71
+		convertedSpec.Mode.Global = &types.GlobalService{}
76 72
 	case *swarmapi.ServiceSpec_Replicated:
77
-		service.Spec.Mode.Replicated = &types.ReplicatedService{
73
+		convertedSpec.Mode.Replicated = &types.ReplicatedService{
78 74
 			Replicas: &t.Replicated.Replicas,
79 75
 		}
80 76
 	}
81 77
 
82
-	// UpdateStatus
83
-	service.UpdateStatus = types.UpdateStatus{}
84
-	if s.UpdateStatus != nil {
85
-		switch s.UpdateStatus.State {
86
-		case swarmapi.UpdateStatus_UPDATING:
87
-			service.UpdateStatus.State = types.UpdateStateUpdating
88
-		case swarmapi.UpdateStatus_PAUSED:
89
-			service.UpdateStatus.State = types.UpdateStatePaused
90
-		case swarmapi.UpdateStatus_COMPLETED:
91
-			service.UpdateStatus.State = types.UpdateStateCompleted
92
-		}
93
-
94
-		service.UpdateStatus.StartedAt, _ = ptypes.Timestamp(s.UpdateStatus.StartedAt)
95
-		service.UpdateStatus.CompletedAt, _ = ptypes.Timestamp(s.UpdateStatus.CompletedAt)
96
-		service.UpdateStatus.Message = s.UpdateStatus.Message
97
-	}
98
-
99
-	return service
78
+	return convertedSpec
100 79
 }
101 80
 
102 81
 // ServiceSpecToGRPC converts a ServiceSpec to a grpc ServiceSpec.
... ...
@@ -158,9 +172,13 @@ func ServiceSpecToGRPC(s types.ServiceSpec) (swarmapi.ServiceSpec, error) {
158 158
 			return swarmapi.ServiceSpec{}, fmt.Errorf("unrecongized update failure action %s", s.UpdateConfig.FailureAction)
159 159
 		}
160 160
 		spec.Update = &swarmapi.UpdateConfig{
161
-			Parallelism:   s.UpdateConfig.Parallelism,
162
-			Delay:         *ptypes.DurationProto(s.UpdateConfig.Delay),
163
-			FailureAction: failureAction,
161
+			Parallelism:     s.UpdateConfig.Parallelism,
162
+			Delay:           *ptypes.DurationProto(s.UpdateConfig.Delay),
163
+			FailureAction:   failureAction,
164
+			MaxFailureRatio: s.UpdateConfig.MaxFailureRatio,
165
+		}
166
+		if s.UpdateConfig.Monitor != 0 {
167
+			spec.Update.Monitor = ptypes.DurationProto(s.UpdateConfig.Monitor)
164 168
 		}
165 169
 	}
166 170
 
... ...
@@ -129,6 +129,7 @@ This section lists each version from latest to oldest.  Each listing includes a
129 129
 * `GET /containers/json` now supports a `is-task` filter to filter
130 130
   containers that are tasks (part of a service in swarm mode).
131 131
 * `POST /containers/create` now takes `StopTimeout` field.
132
+* `POST /services/create` and `POST /services/(id or name)/update` now accept `Monitor` and `MaxFailureRatio` parameters, which control the response to failures during service updates.
132 133
 
133 134
 ### v1.24 API changes
134 135
 
... ...
@@ -4877,7 +4877,9 @@ List services
4877 4877
           },
4878 4878
           "UpdateConfig": {
4879 4879
             "Parallelism": 1,
4880
-            "FailureAction": "pause"
4880
+            "FailureAction": "pause",
4881
+            "Monitor": 15000000000,
4882
+            "MaxFailureRatio": 0.15
4881 4883
           },
4882 4884
           "EndpointSpec": {
4883 4885
             "Mode": "vip",
... ...
@@ -5077,8 +5079,8 @@ image](#create-an-image) section for more details.
5077 5077
     - **RestartPolicy** – Specification for the restart policy which applies to containers created
5078 5078
       as part of this service.
5079 5079
         - **Condition** – Condition for restart (`none`, `on-failure`, or `any`).
5080
-        - **Delay** – Delay between restart attempts.
5081
-        - **Attempts** – Maximum attempts to restart a given container before giving up (default value
5080
+        - **Delay** – Delay between restart attempts, in nanoseconds.
5081
+        - **MaxAttempts** – Maximum attempts to restart a given container before giving up (default value
5082 5082
           is 0, which is ignored).
5083 5083
         - **Window** – Windows is the time window used to evaluate the restart policy (default value is
5084 5084
           0, which is unbounded).
... ...
@@ -5087,9 +5089,12 @@ image](#create-an-image) section for more details.
5087 5087
 - **UpdateConfig** – Specification for the update strategy of the service.
5088 5088
     - **Parallelism** – Maximum number of tasks to be updated in one iteration (0 means unlimited
5089 5089
       parallelism).
5090
-    - **Delay** – Amount of time between updates.
5090
+    - **Delay** – Amount of time between updates, in nanoseconds.
5091 5091
     - **FailureAction** - Action to take if an updated task fails to run, or stops running during the
5092 5092
       update. Values are `continue` and `pause`.
5093
+    - **Monitor** - Amount of time to monitor each updated task for failures, in nanoseconds.
5094
+    - **MaxFailureRatio** - The fraction of tasks that may fail during an update before the
5095
+      failure action is invoked, specified as a floating point number between 0 and 1. The default is 0.
5093 5096
 - **Networks** – Array of network names or IDs to attach the service to.
5094 5097
 - **EndpointSpec** – Properties that can be configured to access and load balance a service.
5095 5098
     - **Mode** – The mode of resolution to use for internal load balancing
... ...
@@ -5259,7 +5264,9 @@ image](#create-an-image) section for more details.
5259 5259
         }
5260 5260
       },
5261 5261
       "UpdateConfig": {
5262
-        "Parallelism": 1
5262
+        "Parallelism": 1,
5263
+        "Monitor": 15000000000,
5264
+        "MaxFailureRatio": 0.15
5263 5265
       },
5264 5266
       "EndpointSpec": {
5265 5267
         "Mode": "vip"
... ...
@@ -5314,7 +5321,7 @@ image](#create-an-image) section for more details.
5314 5314
     - **RestartPolicy** – Specification for the restart policy which applies to containers created
5315 5315
       as part of this service.
5316 5316
         - **Condition** – Condition for restart (`none`, `on-failure`, or `any`).
5317
-        - **Delay** – Delay between restart attempts.
5317
+        - **Delay** – Delay between restart attempts, in nanoseconds.
5318 5318
         - **MaxAttempts** – Maximum attempts to restart a given container before giving up (default value
5319 5319
           is 0, which is ignored).
5320 5320
         - **Window** – Windows is the time window used to evaluate the restart policy (default value is
... ...
@@ -5324,7 +5331,12 @@ image](#create-an-image) section for more details.
5324 5324
 - **UpdateConfig** – Specification for the update strategy of the service.
5325 5325
     - **Parallelism** – Maximum number of tasks to be updated in one iteration (0 means unlimited
5326 5326
       parallelism).
5327
-    - **Delay** – Amount of time between updates.
5327
+    - **Delay** – Amount of time between updates, in nanoseconds.
5328
+    - **FailureAction** - Action to take if an updated task fails to run, or stops running during the
5329
+      update. Values are `continue` and `pause`.
5330
+    - **Monitor** - Amount of time to monitor each updated task for failures, in nanoseconds.
5331
+    - **MaxFailureRatio** - The fraction of tasks that may fail during an update before the
5332
+      failure action is invoked, specified as a floating point number between 0 and 1. The default is 0.
5328 5333
 - **Networks** – Array of network names or IDs to attach the service to.
5329 5334
 - **EndpointSpec** – Properties that can be configured to access and load balance a service.
5330 5335
     - **Mode** – The mode of resolution to use for internal load balancing
... ...
@@ -5338,6 +5350,10 @@ image](#create-an-image) section for more details.
5338 5338
 
5339 5339
 - **version** – The version number of the service object being updated. This is
5340 5340
   required to avoid conflicting writes.
5341
+- **registryAuthFrom** - If the X-Registry-Auth header is not specified, this
5342
+  parameter indicates where to find registry authorization credentials. The
5343
+  valid values are `spec` and `previous-spec`. If unspecified, the default is
5344
+  `spec`.
5341 5345
 
5342 5346
 **Request Headers**:
5343 5347
 
... ...
@@ -12,36 +12,38 @@ Usage:  docker service create [OPTIONS] IMAGE [COMMAND] [ARG...]
12 12
 Create a new service
13 13
 
14 14
 Options:
15
-      --constraint value               Placement constraints (default [])
16
-      --container-label value          Service container labels (default [])
17
-      --endpoint-mode string           Endpoint mode (vip or dnsrr)
18
-  -e, --env value                      Set environment variables (default [])
19
-      --group-add value                Add additional user groups to the container (default [])
20
-      --help                           Print usage
21
-  -l, --label value                    Service labels (default [])
22
-      --limit-cpu value                Limit CPUs (default 0.000)
23
-      --limit-memory value             Limit Memory (default 0 B)
24
-      --log-driver string              Logging driver for service
25
-      --log-opt value                  Logging driver options (default [])
26
-      --mode string                    Service mode (replicated or global) (default "replicated")
27
-      --mount value                    Attach a mount to the service
28
-      --name string                    Service name
29
-      --network value                  Network attachments (default [])
30
-  -p, --publish value                  Publish a port as a node port (default [])
31
-      --replicas value                 Number of tasks (default none)
32
-      --reserve-cpu value              Reserve CPUs (default 0.000)
33
-      --reserve-memory value           Reserve Memory (default 0 B)
34
-      --restart-condition string       Restart when condition is met (none, on-failure, or any)
35
-      --restart-delay value            Delay between restart attempts (default none)
36
-      --restart-max-attempts value     Maximum number of restarts before giving up (default none)
37
-      --restart-window value           Window used to evaluate the restart policy (default none)
38
-      --stop-grace-period value        Time to wait before force killing a container (default none)
39
-      --update-delay duration          Delay between updates
40
-      --update-failure-action string   Action on update failure (pause|continue) (default "pause")
41
-      --update-parallelism uint        Maximum number of tasks updated simultaneously (0 to update all at once) (default 1)
42
-  -u, --user string                    Username or UID (format: <name|uid>[:<group|gid>])
43
-      --with-registry-auth             Send registry authentication details to Swarm agents
44
-  -w, --workdir string                 Working directory inside the container
15
+      --constraint value                 Placement constraints (default [])
16
+      --container-label value            Service container labels (default [])
17
+      --endpoint-mode string             Endpoint mode (vip or dnsrr)
18
+  -e, --env value                        Set environment variables (default [])
19
+      --group-add value                  Add additional user groups to the container (default [])
20
+      --help                             Print usage
21
+  -l, --label value                      Service labels (default [])
22
+      --limit-cpu value                  Limit CPUs (default 0.000)
23
+      --limit-memory value               Limit Memory (default 0 B)
24
+      --log-driver string                Logging driver for service
25
+      --log-opt value                    Logging driver options (default [])
26
+      --mode string                      Service mode (replicated or global) (default "replicated")
27
+      --mount value                      Attach a mount to the service
28
+      --name string                      Service name
29
+      --network value                    Network attachments (default [])
30
+  -p, --publish value                    Publish a port as a node port (default [])
31
+      --replicas value                   Number of tasks (default none)
32
+      --reserve-cpu value                Reserve CPUs (default 0.000)
33
+      --reserve-memory value             Reserve Memory (default 0 B)
34
+      --restart-condition string         Restart when condition is met (none, on-failure, or any)
35
+      --restart-delay value              Delay between restart attempts (default none)
36
+      --restart-max-attempts value       Maximum number of restarts before giving up (default none)
37
+      --restart-window value             Window used to evaluate the restart policy (default none)
38
+      --stop-grace-period value          Time to wait before force killing a container (default none)
39
+      --update-delay duration            Delay between updates
40
+      --update-failure-action string     Action on update failure (pause|continue) (default "pause")
41
+      --update-max-failure-ratio value   Failure rate to tolerate during an update
42
+      --update-monitor duration          Duration after each task update to monitor for failure (default 0s)
43
+      --update-parallelism uint          Maximum number of tasks updated simultaneously (0 to update all at once) (default 1)
44
+  -u, --user string                      Username or UID (format: <name|uid>[:<group|gid>])
45
+      --with-registry-auth               Send registry authentication details to Swarm agents
46
+  -w, --workdir string                   Working directory inside the container
45 47
 ```
46 48
 
47 49
 Creates a service as described by the specified parameters. You must run this
... ...
@@ -12,43 +12,46 @@ Usage:  docker service update [OPTIONS] SERVICE
12 12
 Update a service
13 13
 
14 14
 Options:
15
-      --args string                    Service command args
16
-      --constraint-add value           Add or update placement constraints (default [])
17
-      --constraint-rm value            Remove a constraint (default [])
18
-      --container-label-add value      Add or update container labels (default [])
19
-      --container-label-rm value       Remove a container label by its key (default [])
20
-      --endpoint-mode string           Endpoint mode (vip or dnsrr)
21
-      --env-add value                  Add or update environment variables (default [])
22
-      --env-rm value                   Remove an environment variable (default [])
23
-      --group-add value                Add additional user groups to the container (default [])
24
-      --group-rm value                 Remove previously added user groups from the container (default [])
25
-      --help                           Print usage
26
-      --image string                   Service image tag
27
-      --label-add value                Add or update service labels (default [])
28
-      --label-rm value                 Remove a label by its key (default [])
29
-      --limit-cpu value                Limit CPUs (default 0.000)
30
-      --limit-memory value             Limit Memory (default 0 B)
31
-      --log-driver string              Logging driver for service
32
-      --log-opt value                  Logging driver options (default [])
33
-      --mount-add value                Add or update a mount on a service
34
-      --mount-rm value                 Remove a mount by its target path (default [])
35
-      --name string                    Service name
36
-      --publish-add value              Add or update a published port (default [])
37
-      --publish-rm value               Remove a published port by its target port (default [])
38
-      --replicas value                 Number of tasks (default none)
39
-      --reserve-cpu value              Reserve CPUs (default 0.000)
40
-      --reserve-memory value           Reserve Memory (default 0 B)
41
-      --restart-condition string       Restart when condition is met (none, on-failure, or any)
42
-      --restart-delay value            Delay between restart attempts (default none)
43
-      --restart-max-attempts value     Maximum number of restarts before giving up (default none)
44
-      --restart-window value           Window used to evaluate the restart policy (default none)
45
-      --stop-grace-period value        Time to wait before force killing a container (default none)
46
-      --update-delay duration          Delay between updates
47
-      --update-failure-action string   Action on update failure (pause|continue) (default "pause")
48
-      --update-parallelism uint        Maximum number of tasks updated simultaneously (0 to update all at once) (default 1)
49
-  -u, --user string                    Username or UID (format: <name|uid>[:<group|gid>])
50
-      --with-registry-auth             Send registry authentication details to Swarm agents
51
-  -w, --workdir string                 Working directory inside the container
15
+      --args string                      Service command args
16
+      --constraint-add value             Add or update placement constraints (default [])
17
+      --constraint-rm value              Remove a constraint (default [])
18
+      --container-label-add value        Add or update container labels (default [])
19
+      --container-label-rm value         Remove a container label by its key (default [])
20
+      --endpoint-mode string             Endpoint mode (vip or dnsrr)
21
+      --env-add value                    Add or update environment variables (default [])
22
+      --env-rm value                     Remove an environment variable (default [])
23
+      --group-add value                  Add additional user groups to the container (default [])
24
+      --group-rm value                   Remove previously added user groups from the container (default [])
25
+      --help                             Print usage
26
+      --image string                     Service image tag
27
+      --label-add value                  Add or update service labels (default [])
28
+      --label-rm value                   Remove a label by its key (default [])
29
+      --limit-cpu value                  Limit CPUs (default 0.000)
30
+      --limit-memory value               Limit Memory (default 0 B)
31
+      --log-driver string                Logging driver for service
32
+      --log-opt value                    Logging driver options (default [])
33
+      --mount-add value                  Add or update a mount on a service
34
+      --mount-rm value                   Remove a mount by its target path (default [])
35
+      --name string                      Service name
36
+      --publish-add value                Add or update a published port (default [])
37
+      --publish-rm value                 Remove a published port by its target port (default [])
38
+      --replicas value                   Number of tasks (default none)
39
+      --reserve-cpu value                Reserve CPUs (default 0.000)
40
+      --reserve-memory value             Reserve Memory (default 0 B)
41
+      --restart-condition string         Restart when condition is met (none, on-failure, or any)
42
+      --restart-delay value              Delay between restart attempts (default none)
43
+      --restart-max-attempts value       Maximum number of restarts before giving up (default none)
44
+      --restart-window value             Window used to evaluate the restart policy (default none)
45
+      --rollback                         Rollback to previous specification
46
+      --stop-grace-period value          Time to wait before force killing a container (default none)
47
+      --update-delay duration            Delay between updates
48
+      --update-failure-action string     Action on update failure (pause|continue) (default "pause")
49
+      --update-max-failure-ratio value   Failure rate to tolerate during an update
50
+      --update-monitor duration          Duration after each task update to monitor for failure (default 0s)
51
+      --update-parallelism uint          Maximum number of tasks updated simultaneously (0 to update all at once) (default 1)
52
+  -u, --user string                      Username or UID (format: <name|uid>[:<group|gid>])
53
+      --with-registry-auth               Send registry authentication details to Swarm agents
54
+  -w, --workdir string                   Working directory inside the container
52 55
 ```
53 56
 
54 57
 Updates a service as described by the specified parameters. This command has to be run targeting a manager node.
... ...
@@ -139,8 +139,8 @@ func (d *SwarmDaemon) getServiceTasks(c *check.C, service string) []swarm.Task {
139 139
 	return tasks
140 140
 }
141 141
 
142
-func (d *SwarmDaemon) checkServiceRunningTasks(c *check.C, service string) func(*check.C) (interface{}, check.CommentInterface) {
143
-	return func(*check.C) (interface{}, check.CommentInterface) {
142
+func (d *SwarmDaemon) checkServiceRunningTasks(service string) func(*check.C) (interface{}, check.CommentInterface) {
143
+	return func(c *check.C) (interface{}, check.CommentInterface) {
144 144
 		tasks := d.getServiceTasks(c, service)
145 145
 		var runningCount int
146 146
 		for _, task := range tasks {
... ...
@@ -152,8 +152,15 @@ func (d *SwarmDaemon) checkServiceRunningTasks(c *check.C, service string) func(
152 152
 	}
153 153
 }
154 154
 
155
-func (d *SwarmDaemon) checkServiceTasks(c *check.C, service string) func(*check.C) (interface{}, check.CommentInterface) {
156
-	return func(*check.C) (interface{}, check.CommentInterface) {
155
+func (d *SwarmDaemon) checkServiceUpdateState(service string) func(*check.C) (interface{}, check.CommentInterface) {
156
+	return func(c *check.C) (interface{}, check.CommentInterface) {
157
+		service := d.getService(c, service)
158
+		return service.UpdateStatus.State, nil
159
+	}
160
+}
161
+
162
+func (d *SwarmDaemon) checkServiceTasks(service string) func(*check.C) (interface{}, check.CommentInterface) {
163
+	return func(c *check.C) (interface{}, check.CommentInterface) {
157 164
 		tasks := d.getServiceTasks(c, service)
158 165
 		return len(tasks), nil
159 166
 	}
... ...
@@ -310,6 +310,63 @@ func (s *DockerSwarmSuite) TestAPISwarmServicesUpdate(c *check.C) {
310 310
 	// 3nd batch
311 311
 	waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkRunningTaskImages, checker.DeepEquals,
312 312
 		map[string]int{image2: instances})
313
+
314
+	// Roll back to the previous version. This uses the CLI because
315
+	// rollback is a client-side operation.
316
+	out, err := daemons[0].Cmd("service", "update", "--rollback", id)
317
+	c.Assert(err, checker.IsNil, check.Commentf(out))
318
+
319
+	// first batch
320
+	waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkRunningTaskImages, checker.DeepEquals,
321
+		map[string]int{image2: instances - parallelism, image1: parallelism})
322
+
323
+	// 2nd batch
324
+	waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkRunningTaskImages, checker.DeepEquals,
325
+		map[string]int{image2: instances - 2*parallelism, image1: 2 * parallelism})
326
+
327
+	// 3nd batch
328
+	waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkRunningTaskImages, checker.DeepEquals,
329
+		map[string]int{image1: instances})
330
+}
331
+
332
+func (s *DockerSwarmSuite) TestApiSwarmServicesFailedUpdate(c *check.C) {
333
+	const nodeCount = 3
334
+	var daemons [nodeCount]*SwarmDaemon
335
+	for i := 0; i < nodeCount; i++ {
336
+		daemons[i] = s.AddDaemon(c, true, i == 0)
337
+	}
338
+	// wait for nodes ready
339
+	waitAndAssert(c, 5*time.Second, daemons[0].checkNodeReadyCount, checker.Equals, nodeCount)
340
+
341
+	// service image at start
342
+	image1 := "busybox:latest"
343
+	// target image in update
344
+	image2 := "busybox:badtag"
345
+
346
+	// create service
347
+	instances := 5
348
+	id := daemons[0].createService(c, serviceForUpdate, setInstances(instances))
349
+
350
+	// wait for tasks ready
351
+	waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkRunningTaskImages, checker.DeepEquals,
352
+		map[string]int{image1: instances})
353
+
354
+	// issue service update
355
+	service := daemons[0].getService(c, id)
356
+	daemons[0].updateService(c, service, setImage(image2), setFailureAction(swarm.UpdateFailureActionPause), setMaxFailureRatio(0.25), setParallelism(1))
357
+
358
+	// should update 2 tasks and then pause
359
+	waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkServiceUpdateState(id), checker.Equals, swarm.UpdateStatePaused)
360
+	v, _ := daemons[0].checkServiceRunningTasks(id)(c)
361
+	c.Assert(v, checker.Equals, instances-2)
362
+
363
+	// Roll back to the previous version. This uses the CLI because
364
+	// rollback is a client-side operation.
365
+	out, err := daemons[0].Cmd("service", "update", "--rollback", id)
366
+	c.Assert(err, checker.IsNil, check.Commentf(out))
367
+
368
+	waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkRunningTaskImages, checker.DeepEquals,
369
+		map[string]int{image1: instances})
313 370
 }
314 371
 
315 372
 func (s *DockerSwarmSuite) TestAPISwarmServiceConstraintRole(c *check.C) {
... ...
@@ -326,7 +383,7 @@ func (s *DockerSwarmSuite) TestAPISwarmServiceConstraintRole(c *check.C) {
326 326
 	instances := 3
327 327
 	id := daemons[0].createService(c, simpleTestService, setConstraints(constraints), setInstances(instances))
328 328
 	// wait for tasks ready
329
-	waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkServiceRunningTasks(c, id), checker.Equals, instances)
329
+	waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkServiceRunningTasks(id), checker.Equals, instances)
330 330
 	// validate tasks are running on worker nodes
331 331
 	tasks := daemons[0].getServiceTasks(c, id)
332 332
 	for _, task := range tasks {
... ...
@@ -340,7 +397,7 @@ func (s *DockerSwarmSuite) TestAPISwarmServiceConstraintRole(c *check.C) {
340 340
 	constraints = []string{"node.role!=worker"}
341 341
 	id = daemons[0].createService(c, simpleTestService, setConstraints(constraints), setInstances(instances))
342 342
 	// wait for tasks ready
343
-	waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkServiceRunningTasks(c, id), checker.Equals, instances)
343
+	waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkServiceRunningTasks(id), checker.Equals, instances)
344 344
 	tasks = daemons[0].getServiceTasks(c, id)
345 345
 	// validate tasks are running on manager nodes
346 346
 	for _, task := range tasks {
... ...
@@ -354,7 +411,7 @@ func (s *DockerSwarmSuite) TestAPISwarmServiceConstraintRole(c *check.C) {
354 354
 	constraints = []string{"node.role==nosuchrole"}
355 355
 	id = daemons[0].createService(c, simpleTestService, setConstraints(constraints), setInstances(instances))
356 356
 	// wait for tasks created
357
-	waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkServiceTasks(c, id), checker.Equals, instances)
357
+	waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkServiceTasks(id), checker.Equals, instances)
358 358
 	// let scheduler try
359 359
 	time.Sleep(250 * time.Millisecond)
360 360
 	// validate tasks are not assigned to any node
... ...
@@ -394,7 +451,7 @@ func (s *DockerSwarmSuite) TestAPISwarmServiceConstraintLabel(c *check.C) {
394 394
 	constraints := []string{"node.labels.security==high"}
395 395
 	id := daemons[0].createService(c, simpleTestService, setConstraints(constraints), setInstances(instances))
396 396
 	// wait for tasks ready
397
-	waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkServiceRunningTasks(c, id), checker.Equals, instances)
397
+	waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkServiceRunningTasks(id), checker.Equals, instances)
398 398
 	tasks := daemons[0].getServiceTasks(c, id)
399 399
 	// validate all tasks are running on nodes[0]
400 400
 	for _, task := range tasks {
... ...
@@ -407,7 +464,7 @@ func (s *DockerSwarmSuite) TestAPISwarmServiceConstraintLabel(c *check.C) {
407 407
 	constraints = []string{"node.labels.security!=high"}
408 408
 	id = daemons[0].createService(c, simpleTestService, setConstraints(constraints), setInstances(instances))
409 409
 	// wait for tasks ready
410
-	waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkServiceRunningTasks(c, id), checker.Equals, instances)
410
+	waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkServiceRunningTasks(id), checker.Equals, instances)
411 411
 	tasks = daemons[0].getServiceTasks(c, id)
412 412
 	// validate all tasks are NOT running on nodes[0]
413 413
 	for _, task := range tasks {
... ...
@@ -419,7 +476,7 @@ func (s *DockerSwarmSuite) TestAPISwarmServiceConstraintLabel(c *check.C) {
419 419
 	constraints = []string{"node.labels.security==medium"}
420 420
 	id = daemons[0].createService(c, simpleTestService, setConstraints(constraints), setInstances(instances))
421 421
 	// wait for tasks created
422
-	waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkServiceTasks(c, id), checker.Equals, instances)
422
+	waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkServiceTasks(id), checker.Equals, instances)
423 423
 	// let scheduler try
424 424
 	time.Sleep(250 * time.Millisecond)
425 425
 	tasks = daemons[0].getServiceTasks(c, id)
... ...
@@ -437,7 +494,7 @@ func (s *DockerSwarmSuite) TestAPISwarmServiceConstraintLabel(c *check.C) {
437 437
 	}
438 438
 	id = daemons[0].createService(c, simpleTestService, setConstraints(constraints), setInstances(instances))
439 439
 	// wait for tasks created
440
-	waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkServiceTasks(c, id), checker.Equals, instances)
440
+	waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkServiceTasks(id), checker.Equals, instances)
441 441
 	// let scheduler try
442 442
 	time.Sleep(250 * time.Millisecond)
443 443
 	tasks = daemons[0].getServiceTasks(c, id)
... ...
@@ -452,7 +509,7 @@ func (s *DockerSwarmSuite) TestAPISwarmServiceConstraintLabel(c *check.C) {
452 452
 		}
453 453
 	})
454 454
 	// wait for tasks ready
455
-	waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkServiceRunningTasks(c, id), checker.Equals, instances)
455
+	waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkServiceRunningTasks(id), checker.Equals, instances)
456 456
 	tasks = daemons[0].getServiceTasks(c, id)
457 457
 	for _, task := range tasks {
458 458
 		c.Assert(task.NodeID, checker.Equals, nodes[1].ID)
... ...
@@ -1022,6 +1079,24 @@ func setImage(image string) serviceConstructor {
1022 1022
 	}
1023 1023
 }
1024 1024
 
1025
+func setFailureAction(failureAction string) serviceConstructor {
1026
+	return func(s *swarm.Service) {
1027
+		s.Spec.UpdateConfig.FailureAction = failureAction
1028
+	}
1029
+}
1030
+
1031
+func setMaxFailureRatio(maxFailureRatio float32) serviceConstructor {
1032
+	return func(s *swarm.Service) {
1033
+		s.Spec.UpdateConfig.MaxFailureRatio = maxFailureRatio
1034
+	}
1035
+}
1036
+
1037
+func setParallelism(parallelism uint64) serviceConstructor {
1038
+	return func(s *swarm.Service) {
1039
+		s.Spec.UpdateConfig.Parallelism = parallelism
1040
+	}
1041
+}
1042
+
1025 1043
 func setConstraints(constraints []string) serviceConstructor {
1026 1044
 	return func(s *swarm.Service) {
1027 1045
 		if s.Spec.TaskTemplate.Placement == nil {
... ...
@@ -349,7 +349,7 @@ func (s *DockerSwarmSuite) TestPsListContainersFilterIsTask(c *check.C) {
349 349
 	c.Assert(strings.TrimSpace(out), checker.Not(checker.Equals), "")
350 350
 
351 351
 	// make sure task has been deployed.
352
-	waitAndAssert(c, defaultReconciliationTimeout, d.checkServiceRunningTasks(c, name), checker.Equals, 1)
352
+	waitAndAssert(c, defaultReconciliationTimeout, d.checkServiceRunningTasks(name), checker.Equals, 1)
353 353
 
354 354
 	// Filter non-tasks
355 355
 	out, err = d.Cmd("ps", "-a", "-q", "--filter=is-task=false")