GitList

Browse code

cli: Add options for Raft snapshotting

Add the following options to "swarm init" and "swarm update":

- --max-snapshots: Retain this many old Raft snapshots in addition
to the latest one

- --snapshot-interval: Number of log entries between Raft snapshots

These options already existed in SwarmKit and the Docker API but were
never exposed in the CLI. I'm adding them here to fix this oversight.

--max-snapshots may be useful for debugging purposes and more
conservative users who want to store rolling backups of old versions of
the Raft state.

--snapshot-interval is most useful for performance tuning. The default
value of 10000 may not be ideal for some setups.

There is also a LogEntriesForSlowFollowers option that is not exposed. I
decided not to expose it along with these others because I don't think
it's generally useful (and I'm not sure what I would call the CLI flag).
But if people want, I can expose it for the sake of completeness.

Signed-off-by: Aaron Lehmann <aaron.lehmann@docker.com>

Aaron Lehmann authored on 2016/11/03 04:29:51
Showing 9 changed files

api/types/swarm/swarm.go index e96d331..69ca53f 100644
cli/command/swarm/opts.go index 3659b55..af36a71 100644
cli/command/swarm/update.go index 71451e4..a39f34c 100644
cli/command/system/info.go index 7ab658c..5ea23ed 100644
contrib/completion/bash/docker index 11fe924..786bcd3 100644
contrib/completion/zsh/_docker index d113abe..88a8f4f 100644
daemon/cluster/convert/swarm.go index bb501fa..b2d604a 100644
docs/reference/commandline/swarm_init.md index bc1ce27..6c2ab81 100644
docs/reference/commandline/swarm_update.md index 7d49c83..46395cf 100644

api/types/swarm/swarm.go

History View file @ 842d11f

@@ -60,7 +60,7 @@ type RaftConfig struct {
                      	// KeepOldSnapshots is the number of snapshots to keep beyond the
                      	// current snapshot.
                     -	KeepOldSnapshots uint64 `json:",omitempty"`
                     +	KeepOldSnapshots *uint64 `json:",omitempty"`
                      	// LogEntriesForSlowFollowers is the number of log entries to keep
                      	// around to sync up slow followers after a snapshot is created.

cli/command/swarm/opts.go

History View file @ 842d11f

@@ -24,6 +24,8 @@ const (
                      	flagToken               = "token"
                      	flagTaskHistoryLimit    = "task-history-limit"
                      	flagExternalCA          = "external-ca"
                     +	flagMaxSnapshots        = "max-snapshots"
                     +	flagSnapshotInterval    = "snapshot-interval"
+                     )
                      type swarmOptions struct {
@@ -31,6 +33,8 @@ type swarmOptions struct {
                      	dispatcherHeartbeat time.Duration
                      	nodeCertExpiry      time.Duration
                      	externalCA          ExternalCAOption
                     +	maxSnapshots        uint64
                     +	snapshotInterval    uint64
+                     }
                      // NodeAddrOption is a pflag.Value for listening addresses
@@ -167,11 +171,11 @@ func addSwarmFlags(flags *pflag.FlagSet, opts *swarmOptions) {
                      	flags.DurationVar(&opts.dispatcherHeartbeat, flagDispatcherHeartbeat, time.Duration(5*time.Second), "Dispatcher heartbeat period")
                      	flags.DurationVar(&opts.nodeCertExpiry, flagCertExpiry, time.Duration(90*24*time.Hour), "Validity period for node certificates")
                      	flags.Var(&opts.externalCA, flagExternalCA, "Specifications of one or more certificate signing endpoints")
                     +	flags.Uint64Var(&opts.maxSnapshots, flagMaxSnapshots, 0, "Number of additional Raft snapshots to retain")
                     +	flags.Uint64Var(&opts.snapshotInterval, flagSnapshotInterval, 10000, "Number of log entries between Raft snapshots")
+                     }
                     -func (opts *swarmOptions) ToSpec(flags *pflag.FlagSet) swarm.Spec {
                     -	spec := swarm.Spec{}
+                    -
                     +func (opts *swarmOptions) mergeSwarmSpec(spec *swarm.Spec, flags *pflag.FlagSet) {
                      	if flags.Changed(flagTaskHistoryLimit) {
                      		spec.Orchestration.TaskHistoryRetentionLimit = &opts.taskHistoryLimit
+                     	}
@@ -184,5 +188,16 @@ func (opts *swarmOptions) ToSpec(flags *pflag.FlagSet) swarm.Spec {
                      	if flags.Changed(flagExternalCA) {
                      		spec.CAConfig.ExternalCAs = opts.externalCA.Value()
+                     	}
                     +	if flags.Changed(flagMaxSnapshots) {
                     +		spec.Raft.KeepOldSnapshots = &opts.maxSnapshots
                     +	}
                     +	if flags.Changed(flagSnapshotInterval) {
                     +		spec.Raft.SnapshotInterval = opts.snapshotInterval
                     +	}
                     +}
+                    +
                     +func (opts *swarmOptions) ToSpec(flags *pflag.FlagSet) swarm.Spec {
                     +	var spec swarm.Spec
                     +	opts.mergeSwarmSpec(&spec, flags)
                      	return spec
+                     }

cli/command/swarm/update.go

History View file @ 842d11f

@@ -39,10 +39,7 @@ func runUpdate(dockerCli *command.DockerCli, flags *pflag.FlagSet, opts swarmOpt
                      		return err
+                     	}
                     -	err = mergeSwarm(&swarm, flags)
                     -	if err != nil {
                     -		return err
                     -	}
                     +	opts.mergeSwarmSpec(&swarm.Spec, flags)
                      	err = client.SwarmUpdate(ctx, swarm.Version, swarm.Spec, updateFlags)
                      	if err != nil {
@@ -53,31 +50,3 @@ func runUpdate(dockerCli *command.DockerCli, flags *pflag.FlagSet, opts swarmOpt
                      	return nil
+                     }
+                    -
                     -func mergeSwarm(swarm *swarm.Swarm, flags *pflag.FlagSet) error {
                     -	spec := &swarm.Spec
+                    -
                     -	if flags.Changed(flagTaskHistoryLimit) {
                     -		taskHistoryRetentionLimit, _ := flags.GetInt64(flagTaskHistoryLimit)
                     -		spec.Orchestration.TaskHistoryRetentionLimit = &taskHistoryRetentionLimit
                     -	}
+                    -
                     -	if flags.Changed(flagDispatcherHeartbeat) {
                     -		if v, err := flags.GetDuration(flagDispatcherHeartbeat); err == nil {
                     -			spec.Dispatcher.HeartbeatPeriod = v
                     -		}
                     -	}
+                    -
                     -	if flags.Changed(flagCertExpiry) {
                     -		if v, err := flags.GetDuration(flagCertExpiry); err == nil {
                     -			spec.CAConfig.NodeCertExpiry = v
                     -		}
                     -	}
+                    -
                     -	if flags.Changed(flagExternalCA) {
                     -		value := flags.Lookup(flagExternalCA).Value.(*ExternalCAOption)
                     -		spec.CAConfig.ExternalCAs = value.Value()
                     -	}
+                    -
                     -	return nil
                     -}

cli/command/system/info.go

History View file @ 842d11f

@@ -114,6 +114,9 @@ func prettyPrintInfo(dockerCli *command.DockerCli, info types.Info) error {
                      			fmt.Fprintf(dockerCli.Out(), "  Task History Retention Limit: %d\n", taskHistoryRetentionLimit)
                      			fmt.Fprintf(dockerCli.Out(), " Raft:\n")
                      			fmt.Fprintf(dockerCli.Out(), "  Snapshot Interval: %d\n", info.Swarm.Cluster.Spec.Raft.SnapshotInterval)
                     +			if info.Swarm.Cluster.Spec.Raft.KeepOldSnapshots != nil {
                     +				fmt.Fprintf(dockerCli.Out(), "  Number of Old Snapshots to Retain: %d\n", *info.Swarm.Cluster.Spec.Raft.KeepOldSnapshots)
                     +			}
                      			fmt.Fprintf(dockerCli.Out(), "  Heartbeat Tick: %d\n", info.Swarm.Cluster.Spec.Raft.HeartbeatTick)
                      			fmt.Fprintf(dockerCli.Out(), "  Election Tick: %d\n", info.Swarm.Cluster.Spec.Raft.ElectionTick)
                      			fmt.Fprintf(dockerCli.Out(), " Dispatcher:\n")

contrib/completion/bash/docker

History View file @ 842d11f

@@ -2841,14 +2841,14 @@ _docker_swarm_leave() {
                      _docker_swarm_update() {
                      	case "$prev" in
                     -		--cert-expiry|--dispatcher-heartbeat|--task-history-limit)
                     +		--cert-expiry|--dispatcher-heartbeat|--max-snapshots|--snapshot-interval|--task-history-limit)
                      			return
                      			;;
                      	esac
                      	case "$cur" in
                      		-*)
                     -			COMPREPLY=( $( compgen -W "--cert-expiry --dispatcher-heartbeat --help --task-history-limit" -- "$cur" ) )
                     +			COMPREPLY=( $( compgen -W "--cert-expiry --dispatcher-heartbeat --help --max-snapshots --snapshot-interval --task-history-limit" -- "$cur" ) )
                      			;;
                      	esac
+                     }

contrib/completion/zsh/_docker

History View file @ 842d11f

@@ -1630,7 +1630,10 @@ __docker_swarm_subcommand() {
                                      "($help)--advertise-addr[Advertised address]:ip\:port: " \
                                      "($help)*--external-ca=[Specifications of one or more certificate signing endpoints]:endpoint: " \
                                      "($help)--force-new-cluster[Force create a new cluster from current state]" \
                     -                "($help)--listen-addr=[Listen address]:ip\:port: " && ret=0
                     +                "($help)--listen-addr=[Listen address]:ip\:port: " \
                     +                "($help)--max-snapshots[Number of additional Raft snapshots to retain]" \
                     +                "($help)--snapshot-interval[Number of log entries between Raft snapshots]" \
                     +                "($help)--task-history-limit=[Task history retention limit]:limit: " && ret=0
                                  ;;
                              (join)
                                  _arguments $(__docker_arguments) \
@@ -1655,7 +1658,10 @@ __docker_swarm_subcommand() {
                                  _arguments $(__docker_arguments) \
                                      $opts_help \
                                      "($help)--cert-expiry=[Validity period for node certificates]:duration: " \
                     +                "($help)*--external-ca=[Specifications of one or more certificate signing endpoints]:endpoint: " \
                                      "($help)--dispatcher-heartbeat=[Dispatcher heartbeat period]:duration: " \
                     +                "($help)--max-snapshots[Number of additional Raft snapshots to retain]" \
                     +                "($help)--snapshot-interval[Number of log entries between Raft snapshots]" \
                                      "($help)--task-history-limit=[Task history retention limit]:limit: " && ret=0
                                  ;;
                              (help)

daemon/cluster/convert/swarm.go

History View file @ 842d11f

@@ -21,7 +21,7 @@ func SwarmFromGRPC(c swarmapi.Cluster) types.Swarm {
                      				},
                      				Raft: types.RaftConfig{
                      					SnapshotInterval:           c.Spec.Raft.SnapshotInterval,
                     -					KeepOldSnapshots:           c.Spec.Raft.KeepOldSnapshots,
                     +					KeepOldSnapshots:           &c.Spec.Raft.KeepOldSnapshots,
                      					LogEntriesForSlowFollowers: c.Spec.Raft.LogEntriesForSlowFollowers,
                      					HeartbeatTick:              int(c.Spec.Raft.HeartbeatTick),
                      					ElectionTick:               int(c.Spec.Raft.ElectionTick),
@@ -82,8 +82,8 @@ func MergeSwarmSpecToGRPC(s types.Spec, spec swarmapi.ClusterSpec) (swarmapi.Clu
                      	if s.Raft.SnapshotInterval != 0 {
                      		spec.Raft.SnapshotInterval = s.Raft.SnapshotInterval
+                     	}
                     -	if s.Raft.KeepOldSnapshots != 0 {
                     -		spec.Raft.KeepOldSnapshots = s.Raft.KeepOldSnapshots
                     +	if s.Raft.KeepOldSnapshots != nil {
                     +		spec.Raft.KeepOldSnapshots = *s.Raft.KeepOldSnapshots
+                     	}
                      	if s.Raft.LogEntriesForSlowFollowers != 0 {
                      		spec.Raft.LogEntriesForSlowFollowers = s.Raft.LogEntriesForSlowFollowers

docs/reference/commandline/swarm_init.md

History View file @ 842d11f

@@ -28,6 +28,8 @@ Options:
                            --force-new-cluster               Force create a new cluster from current state
                            --help                            Print usage
                            --listen-addr value               Listen address (format: <ip|interface>[:port])
                     +      --max-snapshots int               Number of additional Raft snapshots to retain
                     +      --snapshot-interval int           Number of log entries between Raft snapshots
                            --task-history-limit int          Task history retention limit (default 5)
                      ```
@@ -64,7 +66,7 @@ This flag sets the validity period for node certificates.
                      This flags sets the frequency with which nodes are told to use as a
                      period to report their health.
                     -### `--external-ca value`
                     +### `--external-ca`
                      This flag sets up the swarm to use an external CA to issue node certificates. The value takes
                      the form `protocol=X,url=Y`. The value for `protocol` specifies what protocol should be used
@@ -75,7 +77,7 @@ The URL specifies the endpoint where signing requests should be submitted.
                      This flag forces an existing node that was part of a quorum that was lost to restart as a single node Manager without losing its data.
                     -### `--listen-addr value`
                     +### `--listen-addr`
                      The node listens for inbound swarm manager traffic on this address. The default is to listen on
 .0.0.0:2377. It is also possible to specify a network interface to listen on that interface's
@@ -84,7 +86,7 @@ address; for example `--listen-addr eth0:2377`.
                      Specifying a port is optional. If the value is a bare IP address or interface
                      name, the default port 2377 will be used.
                     -### `--advertise-addr value`
                     +### `--advertise-addr`
                      This flag specifies the address that will be advertised to other members of the
                      swarm for API access and overlay networking. If unspecified, Docker will check
@@ -103,6 +105,21 @@ name, the default port 2377 will be used.
                      This flag sets up task history retention limit.
                     +### `--max-snapshots`
+                    +
                     +This flag sets the number of old Raft snapshots to retain in addition to the
                     +current Raft snapshots. By default, no old snapshots are retained. This option
                     +may be used for debugging, or to store old snapshots of the swarm state for
                     +disaster recovery purposes.
+                    +
                     +### `--snapshot-interval`
+                    +
                     +This flag specifies how many log entries to allow in between Raft snapshots.
                     +Setting this to a higher number will trigger snapshots less frequently.
                     +Snapshots compact the Raft log and allow for more efficient transfer of the
                     +state to new managers. However, there is a performance cost to taking snapshots
                     +frequently.
+                    +
                      ## Related information
                      * [swarm join](swarm_join.md)

docs/reference/commandline/swarm_update.md

History View file @ 842d11f

@@ -25,6 +25,8 @@ Options:
                            --dispatcher-heartbeat duration   Dispatcher heartbeat period (default 5s)
                            --external-ca value               Specifications of one or more certificate signing endpoints
                            --help                            Print usage
                     +      --max-snapshots int               Number of additional Raft snapshots to retain
                     +      --snapshot-interval int           Number of log entries between Raft snapshots
                            --task-history-limit int          Task history retention limit (default 5)
                      ```