Browse code

Expose swarm raft tuning parameters in engine config

Signed-off-by: David Chung <david.chung@docker.com>

David Chung authored on 2018/03/29 08:54:43
Showing 4 changed files
... ...
@@ -257,6 +257,8 @@ func (cli *DaemonCli) start(opts *daemonOptions) (err error) {
257 257
 		PluginBackend:          d.PluginManager(),
258 258
 		NetworkSubnetsProvider: d,
259 259
 		DefaultAdvertiseAddr:   cli.Config.SwarmDefaultAdvertiseAddr,
260
+		RaftHeartbeatTick:      cli.Config.SwarmRaftHeartbeatTick,
261
+		RaftElectionTick:       cli.Config.SwarmRaftElectionTick,
260 262
 		RuntimeRoot:            cli.getSwarmRunRoot(),
261 263
 		WatchStream:            watchStream,
262 264
 	})
... ...
@@ -96,6 +96,13 @@ type Config struct {
96 96
 
97 97
 	// WatchStream is a channel to pass watch API notifications to daemon
98 98
 	WatchStream chan *swarmapi.WatchMessage
99
+
100
+	// RaftHeartbeatTick is the number of ticks for heartbeat of quorum members
101
+	RaftHeartbeatTick uint32
102
+
103
+	// RaftElectionTick is the number of ticks to elapse before followers propose a new round of leader election
104
+	// This value should be 10x that of RaftHeartbeatTick
105
+	RaftElectionTick uint32
99 106
 }
100 107
 
101 108
 // Cluster provides capabilities to participate in a cluster as a worker or a
... ...
@@ -134,6 +141,14 @@ func New(config Config) (*Cluster, error) {
134 134
 	if config.RuntimeRoot == "" {
135 135
 		config.RuntimeRoot = root
136 136
 	}
137
+	if config.RaftHeartbeatTick == 0 {
138
+		config.RaftHeartbeatTick = 1
139
+	}
140
+	if config.RaftElectionTick == 0 {
141
+		// 10X heartbeat tick is the recommended ratio according to etcd docs.
142
+		config.RaftElectionTick = 10 * config.RaftHeartbeatTick
143
+	}
144
+
137 145
 	if err := os.MkdirAll(config.RuntimeRoot, 0700); err != nil {
138 146
 		return nil, err
139 147
 	}
... ...
@@ -124,11 +124,11 @@ func (n *nodeRunner) start(conf nodeStartConfig) error {
124 124
 			n.cluster.config.Backend,
125 125
 			n.cluster.config.PluginBackend,
126 126
 			n.cluster.config.ImageBackend),
127
-		HeartbeatTick: 1,
127
+		HeartbeatTick: n.cluster.config.RaftHeartbeatTick,
128 128
 		// Recommended value in etcd/raft is 10 x (HeartbeatTick).
129 129
 		// Lower values were seen to have caused instability because of
130 130
 		// frequent leader elections when running on flakey networks.
131
-		ElectionTick:     10,
131
+		ElectionTick:     n.cluster.config.RaftElectionTick,
132 132
 		UnlockKey:        conf.lockKey,
133 133
 		AutoLockManagers: conf.autolock,
134 134
 		PluginGetter:     n.cluster.config.Backend.PluginGetter(),
... ...
@@ -158,7 +158,18 @@ type CommonConfig struct {
158 158
 	// given to the /swarm/init endpoint and no advertise address is
159 159
 	// specified.
160 160
 	SwarmDefaultAdvertiseAddr string `json:"swarm-default-advertise-addr"`
161
-	MetricsAddress            string `json:"metrics-addr"`
161
+
162
+	// SwarmRaftHeartbeatTick is the number of ticks in time for swarm mode raft quorum heartbeat
163
+	// Typical value is 1
164
+	SwarmRaftHeartbeatTick uint32 `json:"swarm-raft-heartbeat-tick"`
165
+
166
+	// SwarmRaftElectionTick is the number of ticks to elapse before followers in the quorum can propose
167
+	// a new round of leader election.  Default, recommended value is at least 10X that of Heartbeat tick.
168
+	// Higher values can make the quorum less sensitive to transient faults in the environment, but this also
169
+	// means it takes longer for the managers to detect a down leader.
170
+	SwarmRaftElectionTick uint32 `json:"swarm-raft-election-tick"`
171
+
172
+	MetricsAddress string `json:"metrics-addr"`
162 173
 
163 174
 	LogConfig
164 175
 	BridgeConfig // bridgeConfig holds bridge network specific configuration.