Browse code

Update libnetwork dependencies for b66c038

Signed-off-by: Alessandro Boch <aboch@docker.com>

Alessandro Boch authored on 2016/05/08 16:31:30
Showing 60 changed files
... ...
@@ -9,7 +9,7 @@ source 'hack/.vendor-helpers.sh'
9 9
 clone git github.com/Azure/go-ansiterm 388960b655244e76e24c75f48631564eaefade62
10 10
 clone git github.com/Microsoft/hcsshim v0.2.2
11 11
 clone git github.com/Microsoft/go-winio v0.3.4
12
-clone git github.com/Sirupsen/logrus v0.9.0 # logrus is a common dependency among multiple deps
12
+clone git github.com/Sirupsen/logrus v0.10.0 # logrus is a common dependency among multiple deps
13 13
 clone git github.com/docker/libtrust 9cbd2a1374f46905c68a4eb3694a130610adc62a
14 14
 clone git github.com/go-check/check 03a4d9dcf2f92eae8e90ed42aa2656f63fdd0b14 https://github.com/cpuguy83/check.git
15 15
 clone git github.com/gorilla/context 14f550f51a
... ...
@@ -30,11 +30,14 @@ clone git github.com/imdario/mergo 0.2.1
30 30
 
31 31
 #get libnetwork packages
32 32
 clone git github.com/docker/libnetwork v0.8.0-dev.1
33
+clone git github.com/docker/go-events 2e7d352816128aa84f4d29b2a21d400133701a0d
34
+clone git github.com/armon/go-radix e39d623f12e8e41c7b5529e9a9dd67a1e2261f80
33 35
 clone git github.com/armon/go-metrics eb0af217e5e9747e41dd5303755356b62d28e3ec
34 36
 clone git github.com/hashicorp/go-msgpack 71c2886f5a673a35f909803f38ece5810165097b
35
-clone git github.com/hashicorp/memberlist 9a1e242e454d2443df330bdd51a436d5a9058fc4
36
-clone git github.com/hashicorp/serf 7151adcef72687bf95f451a2e0ba15cb19412bf2
37
-clone git github.com/docker/libkv c2aac5dbbaa5c872211edea7c0f32b3bd67e7410
37
+clone git github.com/hashicorp/memberlist 88ac4de0d1a0ca6def284b571342db3b777a4c37
38
+clone git github.com/hashicorp/go-multierror fcdddc395df1ddf4247c69bd436e84cfa0733f7e
39
+clone git github.com/hashicorp/serf 598c54895cc5a7b1a24a398d635e8c0ea0959870
40
+clone git github.com/docker/libkv 7283ef27ed32fe267388510a91709b307bb9942c
38 41
 clone git github.com/vishvananda/netns 604eaf189ee867d8c147fafc28def2394e878d25
39 42
 clone git github.com/vishvananda/netlink 631962935bff4f3d20ff32a72e8944f6d2836a26
40 43
 clone git github.com/BurntSushi/toml f706d00e3de6abe700c994cdd545a1a4915af060
... ...
@@ -1,3 +1,10 @@
1
+# 0.10.0
2
+
3
+* feature: Add a test hook (#180)
4
+* feature: `ParseLevel` is now case-insensitive (#326)
5
+* feature: `FieldLogger` interface that generalizes `Logger` and `Entry` (#308)
6
+* performance: avoid re-allocations on `WithFields` (#335)
7
+
1 8
 # 0.9.0
2 9
 
3 10
 * logrus/text_formatter: don't emit empty msg
... ...
@@ -1,4 +1,4 @@
1
-# Logrus <img src="http://i.imgur.com/hTeVwmJ.png" width="40" height="40" alt=":walrus:" class="emoji" title=":walrus:"/>&nbsp;[![Build Status](https://travis-ci.org/Sirupsen/logrus.svg?branch=master)](https://travis-ci.org/Sirupsen/logrus)&nbsp;[![godoc reference](https://godoc.org/github.com/Sirupsen/logrus?status.png)][godoc]
1
+# Logrus <img src="http://i.imgur.com/hTeVwmJ.png" width="40" height="40" alt=":walrus:" class="emoji" title=":walrus:"/>&nbsp;[![Build Status](https://travis-ci.org/Sirupsen/logrus.svg?branch=master)](https://travis-ci.org/Sirupsen/logrus)&nbsp;[![GoDoc](https://godoc.org/github.com/Sirupsen/logrus?status.svg)](https://godoc.org/github.com/Sirupsen/logrus)
2 2
 
3 3
 Logrus is a structured logger for Go (golang), completely API compatible with
4 4
 the standard library logger. [Godoc][godoc]. **Please note the Logrus API is not
... ...
@@ -12,7 +12,7 @@ plain text):
12 12
 
13 13
 ![Colored](http://i.imgur.com/PY7qMwd.png)
14 14
 
15
-With `log.Formatter = new(logrus.JSONFormatter)`, for easy parsing by logstash
15
+With `log.SetFormatter(&log.JSONFormatter{})`, for easy parsing by logstash
16 16
 or Splunk:
17 17
 
18 18
 ```json
... ...
@@ -32,7 +32,7 @@ ocean","size":10,"time":"2014-03-10 19:57:38.562264131 -0400 EDT"}
32 32
 "time":"2014-03-10 19:57:38.562543128 -0400 EDT"}
33 33
 ```
34 34
 
35
-With the default `log.Formatter = new(&log.TextFormatter{})` when a TTY is not
35
+With the default `log.SetFormatter(&log.TextFormatter{})` when a TTY is not
36 36
 attached, the output is compatible with the
37 37
 [logfmt](http://godoc.org/github.com/kr/logfmt) format:
38 38
 
... ...
@@ -222,6 +222,11 @@ Note: Syslog hook also support connecting to local syslog (Ex. "/dev/log" or "/v
222 222
 | [Octokit](https://github.com/dorajistyle/logrus-octokit-hook) | Hook for logging to github via octokit |
223 223
 | [DeferPanic](https://github.com/deferpanic/dp-logrus) | Hook for logging to DeferPanic |
224 224
 | [Redis-Hook](https://github.com/rogierlommers/logrus-redis-hook) | Hook for logging to a ELK stack (through Redis) |
225
+| [Amqp-Hook](https://github.com/vladoatanasov/logrus_amqp) | Hook for logging to Amqp broker (Like RabbitMQ) |
226
+| [KafkaLogrus](https://github.com/goibibo/KafkaLogrus) | Hook for logging to kafka |
227
+| [Typetalk](https://github.com/dragon3/logrus-typetalk-hook) | Hook for logging to [Typetalk](https://www.typetalk.in/) |
228
+| [ElasticSearch](https://github.com/sohlich/elogrus) | Hook for logging to ElasticSearch|
229
+
225 230
 
226 231
 #### Level logging
227 232
 
... ...
@@ -363,4 +368,21 @@ entries. It should not be a feature of the application-level logger.
363 363
 | ---- | ----------- |
364 364
 |[Logrus Mate](https://github.com/gogap/logrus_mate)|Logrus mate is a tool for Logrus to manage loggers, you can initial logger's level, hook and formatter by config file, the logger will generated with different config at different environment.|
365 365
 
366
-[godoc]: https://godoc.org/github.com/Sirupsen/logrus
366
+#### Testing
367
+
368
+Logrus has a built in facility for asserting the presence of log messages. This is implemented through the `test` hook and provides:
369
+
370
+* decorators for existing logger (`test.NewLocal` and `test.NewGlobal`) which basically just add the `test` hook
371
+* a test logger (`test.NewNullLogger`) that just records log messages (and does not output any):
372
+
373
+```go
374
+logger, hook := NewNullLogger()
375
+logger.Error("Hello error")
376
+
377
+assert.Equal(1, len(hook.Entries))
378
+assert.Equal(logrus.ErrorLevel, hook.LastEntry().Level)
379
+assert.Equal("Hello error", hook.LastEntry().Message)
380
+
381
+hook.Reset()
382
+assert.Nil(hook.LastEntry())
383
+```
... ...
@@ -68,7 +68,7 @@ func (entry *Entry) WithField(key string, value interface{}) *Entry {
68 68
 
69 69
 // Add a map of fields to the Entry.
70 70
 func (entry *Entry) WithFields(fields Fields) *Entry {
71
-	data := Fields{}
71
+	data := make(Fields, len(entry.Data)+len(fields))
72 72
 	for k, v := range entry.Data {
73 73
 		data[k] = v
74 74
 	}
... ...
@@ -3,6 +3,7 @@ package logrus
3 3
 import (
4 4
 	"fmt"
5 5
 	"log"
6
+	"strings"
6 7
 )
7 8
 
8 9
 // Fields type, used to pass to `WithFields`.
... ...
@@ -33,7 +34,7 @@ func (level Level) String() string {
33 33
 
34 34
 // ParseLevel takes a string level and returns the Logrus log level constant.
35 35
 func ParseLevel(lvl string) (Level, error) {
36
-	switch lvl {
36
+	switch strings.ToLower(lvl) {
37 37
 	case "panic":
38 38
 		return PanicLevel, nil
39 39
 	case "fatal":
... ...
@@ -52,6 +53,16 @@ func ParseLevel(lvl string) (Level, error) {
52 52
 	return l, fmt.Errorf("not a valid logrus Level: %q", lvl)
53 53
 }
54 54
 
55
+// A constant exposing all logging levels
56
+var AllLevels = []Level{
57
+	PanicLevel,
58
+	FatalLevel,
59
+	ErrorLevel,
60
+	WarnLevel,
61
+	InfoLevel,
62
+	DebugLevel,
63
+}
64
+
55 65
 // These are the different logging levels. You can set the logging level to log
56 66
 // on your instance of logger, obtained with `logrus.New()`.
57 67
 const (
... ...
@@ -96,3 +107,37 @@ type StdLogger interface {
96 96
 	Panicf(string, ...interface{})
97 97
 	Panicln(...interface{})
98 98
 }
99
+
100
+// The FieldLogger interface generalizes the Entry and Logger types
101
+type FieldLogger interface {
102
+	WithField(key string, value interface{}) *Entry
103
+	WithFields(fields Fields) *Entry
104
+	WithError(err error) *Entry
105
+
106
+	Debugf(format string, args ...interface{})
107
+	Infof(format string, args ...interface{})
108
+	Printf(format string, args ...interface{})
109
+	Warnf(format string, args ...interface{})
110
+	Warningf(format string, args ...interface{})
111
+	Errorf(format string, args ...interface{})
112
+	Fatalf(format string, args ...interface{})
113
+	Panicf(format string, args ...interface{})
114
+
115
+	Debug(args ...interface{})
116
+	Info(args ...interface{})
117
+	Print(args ...interface{})
118
+	Warn(args ...interface{})
119
+	Warning(args ...interface{})
120
+	Error(args ...interface{})
121
+	Fatal(args ...interface{})
122
+	Panic(args ...interface{})
123
+
124
+	Debugln(args ...interface{})
125
+	Infoln(args ...interface{})
126
+	Println(args ...interface{})
127
+	Warnln(args ...interface{})
128
+	Warningln(args ...interface{})
129
+	Errorln(args ...interface{})
130
+	Fatalln(args ...interface{})
131
+	Panicln(args ...interface{})
132
+}
99 133
new file mode 100644
... ...
@@ -0,0 +1,22 @@
0
+# Compiled Object files, Static and Dynamic libs (Shared Objects)
1
+*.o
2
+*.a
3
+*.so
4
+
5
+# Folders
6
+_obj
7
+_test
8
+
9
+# Architecture specific extensions/prefixes
10
+*.[568vq]
11
+[568vq].out
12
+
13
+*.cgo1.go
14
+*.cgo2.c
15
+_cgo_defun.c
16
+_cgo_gotypes.go
17
+_cgo_export.*
18
+
19
+_testmain.go
20
+
21
+*.exe
0 22
new file mode 100644
... ...
@@ -0,0 +1,3 @@
0
+language: go
1
+go:
2
+  - tip
0 3
new file mode 100644
... ...
@@ -0,0 +1,20 @@
0
+The MIT License (MIT)
1
+
2
+Copyright (c) 2014 Armon Dadgar
3
+
4
+Permission is hereby granted, free of charge, to any person obtaining a copy of
5
+this software and associated documentation files (the "Software"), to deal in
6
+the Software without restriction, including without limitation the rights to
7
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
8
+the Software, and to permit persons to whom the Software is furnished to do so,
9
+subject to the following conditions:
10
+
11
+The above copyright notice and this permission notice shall be included in all
12
+copies or substantial portions of the Software.
13
+
14
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
16
+FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
17
+COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
18
+IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
19
+CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
0 20
new file mode 100644
... ...
@@ -0,0 +1,36 @@
0
+go-radix [![Build Status](https://travis-ci.org/armon/go-radix.png)](https://travis-ci.org/armon/go-radix)
1
+=========
2
+
3
+Provides the `radix` package that implements a [radix tree](http://en.wikipedia.org/wiki/Radix_tree).
4
+The package only provides a single `Tree` implementation, optimized for sparse nodes.
5
+
6
+As a radix tree, it provides the following:
7
+ * O(k) operations. In many cases, this can be faster than a hash table since
8
+   the hash function is an O(k) operation, and hash tables have very poor cache locality.
9
+ * Minimum / Maximum value lookups
10
+ * Ordered iteration
11
+
12
+Documentation
13
+=============
14
+
15
+The full documentation is available on [Godoc](http://godoc.org/github.com/armon/go-radix).
16
+
17
+Example
18
+=======
19
+
20
+Below is a simple example of usage
21
+
22
+```go
23
+// Create a tree
24
+r := radix.New()
25
+r.Insert("foo", 1)
26
+r.Insert("bar", 2)
27
+r.Insert("foobar", 2)
28
+
29
+// Find the longest prefix match
30
+m, _, _ := r.LongestPrefix("foozip")
31
+if m != "foo" {
32
+    panic("should be foo")
33
+}
34
+```
35
+
0 36
new file mode 100644
... ...
@@ -0,0 +1,467 @@
0
+package radix
1
+
2
+import (
3
+	"sort"
4
+	"strings"
5
+)
6
+
7
+// WalkFn is used when walking the tree. Takes a
8
+// key and value, returning if iteration should
9
+// be terminated.
10
+type WalkFn func(s string, v interface{}) bool
11
+
12
+// leafNode is used to represent a value
13
+type leafNode struct {
14
+	key string
15
+	val interface{}
16
+}
17
+
18
+// edge is used to represent an edge node
19
+type edge struct {
20
+	label byte
21
+	node  *node
22
+}
23
+
24
+type node struct {
25
+	// leaf is used to store possible leaf
26
+	leaf *leafNode
27
+
28
+	// prefix is the common prefix we ignore
29
+	prefix string
30
+
31
+	// Edges should be stored in-order for iteration.
32
+	// We avoid a fully materialized slice to save memory,
33
+	// since in most cases we expect to be sparse
34
+	edges edges
35
+}
36
+
37
+func (n *node) isLeaf() bool {
38
+	return n.leaf != nil
39
+}
40
+
41
+func (n *node) addEdge(e edge) {
42
+	n.edges = append(n.edges, e)
43
+	n.edges.Sort()
44
+}
45
+
46
+func (n *node) replaceEdge(e edge) {
47
+	num := len(n.edges)
48
+	idx := sort.Search(num, func(i int) bool {
49
+		return n.edges[i].label >= e.label
50
+	})
51
+	if idx < num && n.edges[idx].label == e.label {
52
+		n.edges[idx].node = e.node
53
+		return
54
+	}
55
+	panic("replacing missing edge")
56
+}
57
+
58
+func (n *node) getEdge(label byte) *node {
59
+	num := len(n.edges)
60
+	idx := sort.Search(num, func(i int) bool {
61
+		return n.edges[i].label >= label
62
+	})
63
+	if idx < num && n.edges[idx].label == label {
64
+		return n.edges[idx].node
65
+	}
66
+	return nil
67
+}
68
+
69
+type edges []edge
70
+
71
+func (e edges) Len() int {
72
+	return len(e)
73
+}
74
+
75
+func (e edges) Less(i, j int) bool {
76
+	return e[i].label < e[j].label
77
+}
78
+
79
+func (e edges) Swap(i, j int) {
80
+	e[i], e[j] = e[j], e[i]
81
+}
82
+
83
+func (e edges) Sort() {
84
+	sort.Sort(e)
85
+}
86
+
87
+// Tree implements a radix tree. This can be treated as a
88
+// Dictionary abstract data type. The main advantage over
89
+// a standard hash map is prefix-based lookups and
90
+// ordered iteration,
91
+type Tree struct {
92
+	root *node
93
+	size int
94
+}
95
+
96
+// New returns an empty Tree
97
+func New() *Tree {
98
+	return NewFromMap(nil)
99
+}
100
+
101
+// NewFromMap returns a new tree containing the keys
102
+// from an existing map
103
+func NewFromMap(m map[string]interface{}) *Tree {
104
+	t := &Tree{root: &node{}}
105
+	for k, v := range m {
106
+		t.Insert(k, v)
107
+	}
108
+	return t
109
+}
110
+
111
+// Len is used to return the number of elements in the tree
112
+func (t *Tree) Len() int {
113
+	return t.size
114
+}
115
+
116
+// longestPrefix finds the length of the shared prefix
117
+// of two strings
118
+func longestPrefix(k1, k2 string) int {
119
+	max := len(k1)
120
+	if l := len(k2); l < max {
121
+		max = l
122
+	}
123
+	var i int
124
+	for i = 0; i < max; i++ {
125
+		if k1[i] != k2[i] {
126
+			break
127
+		}
128
+	}
129
+	return i
130
+}
131
+
132
+// Insert is used to add a newentry or update
133
+// an existing entry. Returns if updated.
134
+func (t *Tree) Insert(s string, v interface{}) (interface{}, bool) {
135
+	var parent *node
136
+	n := t.root
137
+	search := s
138
+	for {
139
+		// Handle key exhaution
140
+		if len(search) == 0 {
141
+			if n.isLeaf() {
142
+				old := n.leaf.val
143
+				n.leaf.val = v
144
+				return old, true
145
+			} else {
146
+				n.leaf = &leafNode{
147
+					key: s,
148
+					val: v,
149
+				}
150
+				t.size++
151
+				return nil, false
152
+			}
153
+		}
154
+
155
+		// Look for the edge
156
+		parent = n
157
+		n = n.getEdge(search[0])
158
+
159
+		// No edge, create one
160
+		if n == nil {
161
+			e := edge{
162
+				label: search[0],
163
+				node: &node{
164
+					leaf: &leafNode{
165
+						key: s,
166
+						val: v,
167
+					},
168
+					prefix: search,
169
+				},
170
+			}
171
+			parent.addEdge(e)
172
+			t.size++
173
+			return nil, false
174
+		}
175
+
176
+		// Determine longest prefix of the search key on match
177
+		commonPrefix := longestPrefix(search, n.prefix)
178
+		if commonPrefix == len(n.prefix) {
179
+			search = search[commonPrefix:]
180
+			continue
181
+		}
182
+
183
+		// Split the node
184
+		t.size++
185
+		child := &node{
186
+			prefix: search[:commonPrefix],
187
+		}
188
+		parent.replaceEdge(edge{
189
+			label: search[0],
190
+			node:  child,
191
+		})
192
+
193
+		// Restore the existing node
194
+		child.addEdge(edge{
195
+			label: n.prefix[commonPrefix],
196
+			node:  n,
197
+		})
198
+		n.prefix = n.prefix[commonPrefix:]
199
+
200
+		// Create a new leaf node
201
+		leaf := &leafNode{
202
+			key: s,
203
+			val: v,
204
+		}
205
+
206
+		// If the new key is a subset, add to to this node
207
+		search = search[commonPrefix:]
208
+		if len(search) == 0 {
209
+			child.leaf = leaf
210
+			return nil, false
211
+		}
212
+
213
+		// Create a new edge for the node
214
+		child.addEdge(edge{
215
+			label: search[0],
216
+			node: &node{
217
+				leaf:   leaf,
218
+				prefix: search,
219
+			},
220
+		})
221
+		return nil, false
222
+	}
223
+	return nil, false
224
+}
225
+
226
+// Delete is used to delete a key, returning the previous
227
+// value and if it was deleted
228
+func (t *Tree) Delete(s string) (interface{}, bool) {
229
+	n := t.root
230
+	search := s
231
+	for {
232
+		// Check for key exhaution
233
+		if len(search) == 0 {
234
+			if !n.isLeaf() {
235
+				break
236
+			}
237
+			goto DELETE
238
+		}
239
+
240
+		// Look for an edge
241
+		n = n.getEdge(search[0])
242
+		if n == nil {
243
+			break
244
+		}
245
+
246
+		// Consume the search prefix
247
+		if strings.HasPrefix(search, n.prefix) {
248
+			search = search[len(n.prefix):]
249
+		} else {
250
+			break
251
+		}
252
+	}
253
+	return nil, false
254
+
255
+DELETE:
256
+	// Delete the leaf
257
+	leaf := n.leaf
258
+	n.leaf = nil
259
+	t.size--
260
+
261
+	// Check if we should merge this node
262
+	if len(n.edges) == 1 {
263
+		e := n.edges[0]
264
+		child := e.node
265
+		n.prefix = n.prefix + child.prefix
266
+		n.leaf = child.leaf
267
+		n.edges = child.edges
268
+	}
269
+	return leaf.val, true
270
+}
271
+
272
+// Get is used to lookup a specific key, returning
273
+// the value and if it was found
274
+func (t *Tree) Get(s string) (interface{}, bool) {
275
+	n := t.root
276
+	search := s
277
+	for {
278
+		// Check for key exhaution
279
+		if len(search) == 0 {
280
+			if n.isLeaf() {
281
+				return n.leaf.val, true
282
+			}
283
+			break
284
+		}
285
+
286
+		// Look for an edge
287
+		n = n.getEdge(search[0])
288
+		if n == nil {
289
+			break
290
+		}
291
+
292
+		// Consume the search prefix
293
+		if strings.HasPrefix(search, n.prefix) {
294
+			search = search[len(n.prefix):]
295
+		} else {
296
+			break
297
+		}
298
+	}
299
+	return nil, false
300
+}
301
+
302
+// LongestPrefix is like Get, but instead of an
303
+// exact match, it will return the longest prefix match.
304
+func (t *Tree) LongestPrefix(s string) (string, interface{}, bool) {
305
+	var last *leafNode
306
+	n := t.root
307
+	search := s
308
+	for {
309
+		// Look for a leaf node
310
+		if n.isLeaf() {
311
+			last = n.leaf
312
+		}
313
+
314
+		// Check for key exhaution
315
+		if len(search) == 0 {
316
+			break
317
+		}
318
+
319
+		// Look for an edge
320
+		n = n.getEdge(search[0])
321
+		if n == nil {
322
+			break
323
+		}
324
+
325
+		// Consume the search prefix
326
+		if strings.HasPrefix(search, n.prefix) {
327
+			search = search[len(n.prefix):]
328
+		} else {
329
+			break
330
+		}
331
+	}
332
+	if last != nil {
333
+		return last.key, last.val, true
334
+	}
335
+	return "", nil, false
336
+}
337
+
338
+// Minimum is used to return the minimum value in the tree
339
+func (t *Tree) Minimum() (string, interface{}, bool) {
340
+	n := t.root
341
+	for {
342
+		if n.isLeaf() {
343
+			return n.leaf.key, n.leaf.val, true
344
+		}
345
+		if len(n.edges) > 0 {
346
+			n = n.edges[0].node
347
+		} else {
348
+			break
349
+		}
350
+	}
351
+	return "", nil, false
352
+}
353
+
354
+// Maximum is used to return the maximum value in the tree
355
+func (t *Tree) Maximum() (string, interface{}, bool) {
356
+	n := t.root
357
+	for {
358
+		if num := len(n.edges); num > 0 {
359
+			n = n.edges[num-1].node
360
+			continue
361
+		}
362
+		if n.isLeaf() {
363
+			return n.leaf.key, n.leaf.val, true
364
+		} else {
365
+			break
366
+		}
367
+	}
368
+	return "", nil, false
369
+}
370
+
371
+// Walk is used to walk the tree
372
+func (t *Tree) Walk(fn WalkFn) {
373
+	recursiveWalk(t.root, fn)
374
+}
375
+
376
+// WalkPrefix is used to walk the tree under a prefix
377
+func (t *Tree) WalkPrefix(prefix string, fn WalkFn) {
378
+	n := t.root
379
+	search := prefix
380
+	for {
381
+		// Check for key exhaution
382
+		if len(search) == 0 {
383
+			recursiveWalk(n, fn)
384
+			return
385
+		}
386
+
387
+		// Look for an edge
388
+		n = n.getEdge(search[0])
389
+		if n == nil {
390
+			break
391
+		}
392
+
393
+		// Consume the search prefix
394
+		if strings.HasPrefix(search, n.prefix) {
395
+			search = search[len(n.prefix):]
396
+
397
+		} else if strings.HasPrefix(n.prefix, search) {
398
+			// Child may be under our search prefix
399
+			recursiveWalk(n, fn)
400
+			return
401
+		} else {
402
+			break
403
+		}
404
+	}
405
+
406
+}
407
+
408
+// WalkPath is used to walk the tree, but only visiting nodes
409
+// from the root down to a given leaf. Where WalkPrefix walks
410
+// all the entries *under* the given prefix, this walks the
411
+// entries *above* the given prefix.
412
+func (t *Tree) WalkPath(path string, fn WalkFn) {
413
+	n := t.root
414
+	search := path
415
+	for {
416
+		// Visit the leaf values if any
417
+		if n.leaf != nil && fn(n.leaf.key, n.leaf.val) {
418
+			return
419
+		}
420
+
421
+		// Check for key exhaution
422
+		if len(search) == 0 {
423
+			return
424
+		}
425
+
426
+		// Look for an edge
427
+		n = n.getEdge(search[0])
428
+		if n == nil {
429
+			return
430
+		}
431
+
432
+		// Consume the search prefix
433
+		if strings.HasPrefix(search, n.prefix) {
434
+			search = search[len(n.prefix):]
435
+		} else {
436
+			break
437
+		}
438
+	}
439
+}
440
+
441
+// recursiveWalk is used to do a pre-order walk of a node
442
+// recursively. Returns true if the walk should be aborted
443
+func recursiveWalk(n *node, fn WalkFn) bool {
444
+	// Visit the leaf values if any
445
+	if n.leaf != nil && fn(n.leaf.key, n.leaf.val) {
446
+		return true
447
+	}
448
+
449
+	// Recurse on the children
450
+	for _, e := range n.edges {
451
+		if recursiveWalk(e.node, fn) {
452
+			return true
453
+		}
454
+	}
455
+	return false
456
+}
457
+
458
+// ToMap is used to walk the tree and convert it into a map
459
+func (t *Tree) ToMap() map[string]interface{} {
460
+	out := make(map[string]interface{}, t.size)
461
+	t.Walk(func(k string, v interface{}) bool {
462
+		out[k] = v
463
+		return false
464
+	})
465
+	return out
466
+}
0 467
new file mode 100644
... ...
@@ -0,0 +1,24 @@
0
+# Compiled Object files, Static and Dynamic libs (Shared Objects)
1
+*.o
2
+*.a
3
+*.so
4
+
5
+# Folders
6
+_obj
7
+_test
8
+
9
+# Architecture specific extensions/prefixes
10
+*.[568vq]
11
+[568vq].out
12
+
13
+*.cgo1.go
14
+*.cgo2.c
15
+_cgo_defun.c
16
+_cgo_gotypes.go
17
+_cgo_export.*
18
+
19
+_testmain.go
20
+
21
+*.exe
22
+*.test
23
+*.prof
0 24
new file mode 100644
... ...
@@ -0,0 +1,201 @@
0
+                                 Apache License
1
+                           Version 2.0, January 2004
2
+                        http://www.apache.org/licenses/
3
+
4
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
5
+
6
+   1. Definitions.
7
+
8
+      "License" shall mean the terms and conditions for use, reproduction,
9
+      and distribution as defined by Sections 1 through 9 of this document.
10
+
11
+      "Licensor" shall mean the copyright owner or entity authorized by
12
+      the copyright owner that is granting the License.
13
+
14
+      "Legal Entity" shall mean the union of the acting entity and all
15
+      other entities that control, are controlled by, or are under common
16
+      control with that entity. For the purposes of this definition,
17
+      "control" means (i) the power, direct or indirect, to cause the
18
+      direction or management of such entity, whether by contract or
19
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
20
+      outstanding shares, or (iii) beneficial ownership of such entity.
21
+
22
+      "You" (or "Your") shall mean an individual or Legal Entity
23
+      exercising permissions granted by this License.
24
+
25
+      "Source" form shall mean the preferred form for making modifications,
26
+      including but not limited to software source code, documentation
27
+      source, and configuration files.
28
+
29
+      "Object" form shall mean any form resulting from mechanical
30
+      transformation or translation of a Source form, including but
31
+      not limited to compiled object code, generated documentation,
32
+      and conversions to other media types.
33
+
34
+      "Work" shall mean the work of authorship, whether in Source or
35
+      Object form, made available under the License, as indicated by a
36
+      copyright notice that is included in or attached to the work
37
+      (an example is provided in the Appendix below).
38
+
39
+      "Derivative Works" shall mean any work, whether in Source or Object
40
+      form, that is based on (or derived from) the Work and for which the
41
+      editorial revisions, annotations, elaborations, or other modifications
42
+      represent, as a whole, an original work of authorship. For the purposes
43
+      of this License, Derivative Works shall not include works that remain
44
+      separable from, or merely link (or bind by name) to the interfaces of,
45
+      the Work and Derivative Works thereof.
46
+
47
+      "Contribution" shall mean any work of authorship, including
48
+      the original version of the Work and any modifications or additions
49
+      to that Work or Derivative Works thereof, that is intentionally
50
+      submitted to Licensor for inclusion in the Work by the copyright owner
51
+      or by an individual or Legal Entity authorized to submit on behalf of
52
+      the copyright owner. For the purposes of this definition, "submitted"
53
+      means any form of electronic, verbal, or written communication sent
54
+      to the Licensor or its representatives, including but not limited to
55
+      communication on electronic mailing lists, source code control systems,
56
+      and issue tracking systems that are managed by, or on behalf of, the
57
+      Licensor for the purpose of discussing and improving the Work, but
58
+      excluding communication that is conspicuously marked or otherwise
59
+      designated in writing by the copyright owner as "Not a Contribution."
60
+
61
+      "Contributor" shall mean Licensor and any individual or Legal Entity
62
+      on behalf of whom a Contribution has been received by Licensor and
63
+      subsequently incorporated within the Work.
64
+
65
+   2. Grant of Copyright License. Subject to the terms and conditions of
66
+      this License, each Contributor hereby grants to You a perpetual,
67
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
68
+      copyright license to reproduce, prepare Derivative Works of,
69
+      publicly display, publicly perform, sublicense, and distribute the
70
+      Work and such Derivative Works in Source or Object form.
71
+
72
+   3. Grant of Patent License. Subject to the terms and conditions of
73
+      this License, each Contributor hereby grants to You a perpetual,
74
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
75
+      (except as stated in this section) patent license to make, have made,
76
+      use, offer to sell, sell, import, and otherwise transfer the Work,
77
+      where such license applies only to those patent claims licensable
78
+      by such Contributor that are necessarily infringed by their
79
+      Contribution(s) alone or by combination of their Contribution(s)
80
+      with the Work to which such Contribution(s) was submitted. If You
81
+      institute patent litigation against any entity (including a
82
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
83
+      or a Contribution incorporated within the Work constitutes direct
84
+      or contributory patent infringement, then any patent licenses
85
+      granted to You under this License for that Work shall terminate
86
+      as of the date such litigation is filed.
87
+
88
+   4. Redistribution. You may reproduce and distribute copies of the
89
+      Work or Derivative Works thereof in any medium, with or without
90
+      modifications, and in Source or Object form, provided that You
91
+      meet the following conditions:
92
+
93
+      (a) You must give any other recipients of the Work or
94
+          Derivative Works a copy of this License; and
95
+
96
+      (b) You must cause any modified files to carry prominent notices
97
+          stating that You changed the files; and
98
+
99
+      (c) You must retain, in the Source form of any Derivative Works
100
+          that You distribute, all copyright, patent, trademark, and
101
+          attribution notices from the Source form of the Work,
102
+          excluding those notices that do not pertain to any part of
103
+          the Derivative Works; and
104
+
105
+      (d) If the Work includes a "NOTICE" text file as part of its
106
+          distribution, then any Derivative Works that You distribute must
107
+          include a readable copy of the attribution notices contained
108
+          within such NOTICE file, excluding those notices that do not
109
+          pertain to any part of the Derivative Works, in at least one
110
+          of the following places: within a NOTICE text file distributed
111
+          as part of the Derivative Works; within the Source form or
112
+          documentation, if provided along with the Derivative Works; or,
113
+          within a display generated by the Derivative Works, if and
114
+          wherever such third-party notices normally appear. The contents
115
+          of the NOTICE file are for informational purposes only and
116
+          do not modify the License. You may add Your own attribution
117
+          notices within Derivative Works that You distribute, alongside
118
+          or as an addendum to the NOTICE text from the Work, provided
119
+          that such additional attribution notices cannot be construed
120
+          as modifying the License.
121
+
122
+      You may add Your own copyright statement to Your modifications and
123
+      may provide additional or different license terms and conditions
124
+      for use, reproduction, or distribution of Your modifications, or
125
+      for any such Derivative Works as a whole, provided Your use,
126
+      reproduction, and distribution of the Work otherwise complies with
127
+      the conditions stated in this License.
128
+
129
+   5. Submission of Contributions. Unless You explicitly state otherwise,
130
+      any Contribution intentionally submitted for inclusion in the Work
131
+      by You to the Licensor shall be under the terms and conditions of
132
+      this License, without any additional terms or conditions.
133
+      Notwithstanding the above, nothing herein shall supersede or modify
134
+      the terms of any separate license agreement you may have executed
135
+      with Licensor regarding such Contributions.
136
+
137
+   6. Trademarks. This License does not grant permission to use the trade
138
+      names, trademarks, service marks, or product names of the Licensor,
139
+      except as required for reasonable and customary use in describing the
140
+      origin of the Work and reproducing the content of the NOTICE file.
141
+
142
+   7. Disclaimer of Warranty. Unless required by applicable law or
143
+      agreed to in writing, Licensor provides the Work (and each
144
+      Contributor provides its Contributions) on an "AS IS" BASIS,
145
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
146
+      implied, including, without limitation, any warranties or conditions
147
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
148
+      PARTICULAR PURPOSE. You are solely responsible for determining the
149
+      appropriateness of using or redistributing the Work and assume any
150
+      risks associated with Your exercise of permissions under this License.
151
+
152
+   8. Limitation of Liability. In no event and under no legal theory,
153
+      whether in tort (including negligence), contract, or otherwise,
154
+      unless required by applicable law (such as deliberate and grossly
155
+      negligent acts) or agreed to in writing, shall any Contributor be
156
+      liable to You for damages, including any direct, indirect, special,
157
+      incidental, or consequential damages of any character arising as a
158
+      result of this License or out of the use or inability to use the
159
+      Work (including but not limited to damages for loss of goodwill,
160
+      work stoppage, computer failure or malfunction, or any and all
161
+      other commercial damages or losses), even if such Contributor
162
+      has been advised of the possibility of such damages.
163
+
164
+   9. Accepting Warranty or Additional Liability. While redistributing
165
+      the Work or Derivative Works thereof, You may choose to offer,
166
+      and charge a fee for, acceptance of support, warranty, indemnity,
167
+      or other liability obligations and/or rights consistent with this
168
+      License. However, in accepting such obligations, You may act only
169
+      on Your own behalf and on Your sole responsibility, not on behalf
170
+      of any other Contributor, and only if You agree to indemnify,
171
+      defend, and hold each Contributor harmless for any liability
172
+      incurred by, or claims asserted against, such Contributor by reason
173
+      of your accepting any such warranty or additional liability.
174
+
175
+   END OF TERMS AND CONDITIONS
176
+
177
+   APPENDIX: How to apply the Apache License to your work.
178
+
179
+      To apply the Apache License to your work, attach the following
180
+      boilerplate notice, with the fields enclosed by brackets "{}"
181
+      replaced with your own identifying information. (Don't include
182
+      the brackets!)  The text should be enclosed in the appropriate
183
+      comment syntax for the file format. We also recommend that a
184
+      file or class name and description of purpose be included on the
185
+      same "printed page" as the copyright notice for easier
186
+      identification within third-party archives.
187
+
188
+   Copyright {yyyy} {name of copyright owner}
189
+
190
+   Licensed under the Apache License, Version 2.0 (the "License");
191
+   you may not use this file except in compliance with the License.
192
+   You may obtain a copy of the License at
193
+
194
+       http://www.apache.org/licenses/LICENSE-2.0
195
+
196
+   Unless required by applicable law or agreed to in writing, software
197
+   distributed under the License is distributed on an "AS IS" BASIS,
198
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
199
+   See the License for the specific language governing permissions and
200
+   limitations under the License.
0 201
new file mode 100644
... ...
@@ -0,0 +1,112 @@
0
+# Docker Events Package
1
+
2
+[![GoDoc](https://godoc.org/github.com/docker/go-events?status.svg)](https://godoc.org/github.com/docker/go-events)
3
+[![Circle CI](https://circleci.com/gh/docker/go-events.svg?style=shield)](https://circleci.com/gh/docker/go-events)
4
+
5
+The Docker `events` package implements a composable event distribution package
6
+for Go.
7
+
8
+Originally created to implement the [notifications in Docker Registry
9
+2](https://github.com/docker/distribution/blob/master/docs/notifications.md),
10
+we've found the pattern to be useful in other applications. This package is
11
+most of the same code with slightly updated interfaces. Much of the internals
12
+have been made available.
13
+
14
+## Usage
15
+
16
+The `events` package centers around a `Sink` type.  Events are written with
17
+calls to `Sink.Write(event Event)`. Sinks can be wired up in various
18
+configurations to achieve interesting behavior.
19
+
20
+The canonical example is that employed by the
21
+[docker/distribution/notifications](https://godoc.org/github.com/docker/distribution/notifications)
22
+package. Let's say we have a type `httpSink` where we'd like to queue
23
+notifications. As a rule, it should send a single http request and return an
24
+error if it fails:
25
+
26
+```go
27
+func (h *httpSink) Write(event Event) error {
28
+	p, err := json.Marshal(event)
29
+	if err != nil {
30
+		return err
31
+	}
32
+	body := bytes.NewReader(p)
33
+	resp, err := h.client.Post(h.url, "application/json", body)
34
+	if err != nil {
35
+		return err
36
+	}
37
+	defer resp.Body.Close()
38
+	
39
+	if resp.Status != 200 {
40
+		return errors.New("unexpected status")
41
+	}
42
+
43
+	return nil
44
+}
45
+
46
+// implement (*httpSink).Close()
47
+```
48
+
49
+With just that, we can start using components from this package. One can call
50
+`(*httpSink).Write` to send events as the body of a post request to a
51
+configured URL.
52
+
53
+### Retries
54
+
55
+HTTP can be unreliable. The first feature we'd like is to have some retry:
56
+
57
+```go
58
+hs := newHTTPSink(/*...*/)
59
+retry := NewRetryingSink(hs, NewBreaker(5, time.Second))
60
+```
61
+
62
+We now have a sink that will retry events against the `httpSink` until they
63
+succeed. The retry will backoff for one second after 5 consecutive failures
64
+using the breaker strategy.
65
+
66
+### Queues
67
+
68
+This isn't quite enough. We we want a sink that doesn't block while we are
69
+waiting for events to be sent. Let's add a `Queue`:
70
+
71
+```go
72
+queue := NewQueue(retry)
73
+```
74
+
75
+Now, we have an unbounded queue that will work through all events sent with
76
+`(*Queue).Write`. Events can be added asynchronously to the queue without
77
+blocking the current execution path. This is ideal for use in an http request.
78
+
79
+### Broadcast
80
+
81
+It usually turns out that you want to send to more than one listener. We can
82
+use `Broadcaster` to support this:
83
+
84
+```go
85
+var broadcast = NewBroadcaster() // make it available somewhere in your application.
86
+broadcast.Add(queue) // add your queue!
87
+broadcast.Add(queue2) // and another!
88
+```
89
+
90
+With the above, we can now call `broadcast.Write` in our http handlers and have
91
+all the events distributed to each queue. Because the events are queued, not
92
+listener blocks another.
93
+
94
+### Extending
95
+
96
+For the most part, the above is sufficient for a lot of applications. However,
97
+extending the above functionality can be done implementing your own `Sink`. The
98
+behavior and semantics of the sink can be completely dependent on the
99
+application requirements. The interface is provided below for reference:
100
+
101
+```go
102
+type Sink {
103
+	Write(Event) error
104
+	Close() error
105
+}
106
+```
107
+
108
+Application behavior can be controlled by how `Write` behaves. The examples
109
+above are designed to queue the message and return as quickly as possible.
110
+Other implementations may block until the event is committed to durable
111
+storage.
0 112
new file mode 100644
... ...
@@ -0,0 +1,158 @@
0
+package events
1
+
2
+import "github.com/Sirupsen/logrus"
3
+
4
+// Broadcaster sends events to multiple, reliable Sinks. The goal of this
5
+// component is to dispatch events to configured endpoints. Reliability can be
6
+// provided by wrapping incoming sinks.
7
+type Broadcaster struct {
8
+	sinks   []Sink
9
+	events  chan Event
10
+	adds    chan configureRequest
11
+	removes chan configureRequest
12
+	closed  chan chan struct{}
13
+}
14
+
15
+// NewBroadcaster appends one or more sinks to the list of sinks. The
16
+// broadcaster behavior will be affected by the properties of the sink.
17
+// Generally, the sink should accept all messages and deal with reliability on
18
+// its own. Use of EventQueue and RetryingSink should be used here.
19
+func NewBroadcaster(sinks ...Sink) *Broadcaster {
20
+	b := Broadcaster{
21
+		sinks:   sinks,
22
+		events:  make(chan Event),
23
+		adds:    make(chan configureRequest),
24
+		removes: make(chan configureRequest),
25
+		closed:  make(chan chan struct{}),
26
+	}
27
+
28
+	// Start the broadcaster
29
+	go b.run()
30
+
31
+	return &b
32
+}
33
+
34
+// Write accepts an event to be dispatched to all sinks. This method will never
35
+// fail and should never block (hopefully!). The caller cedes the memory to the
36
+// broadcaster and should not modify it after calling write.
37
+func (b *Broadcaster) Write(event Event) error {
38
+	select {
39
+	case b.events <- event:
40
+	case <-b.closed:
41
+		return ErrSinkClosed
42
+	}
43
+	return nil
44
+}
45
+
46
+// Add the sink to the broadcaster.
47
+//
48
+// The provided sink must be comparable with equality. Typically, this just
49
+// works with a regular pointer type.
50
+func (b *Broadcaster) Add(sink Sink) error {
51
+	return b.configure(b.adds, sink)
52
+}
53
+
54
+// Remove the provided sink.
55
+func (b *Broadcaster) Remove(sink Sink) error {
56
+	return b.configure(b.removes, sink)
57
+}
58
+
59
+type configureRequest struct {
60
+	sink     Sink
61
+	response chan error
62
+}
63
+
64
+func (b *Broadcaster) configure(ch chan configureRequest, sink Sink) error {
65
+	response := make(chan error, 1)
66
+
67
+	for {
68
+		select {
69
+		case ch <- configureRequest{
70
+			sink:     sink,
71
+			response: response}:
72
+			ch = nil
73
+		case err := <-response:
74
+			return err
75
+		case <-b.closed:
76
+			return ErrSinkClosed
77
+		}
78
+	}
79
+}
80
+
81
+// Close the broadcaster, ensuring that all messages are flushed to the
82
+// underlying sink before returning.
83
+func (b *Broadcaster) Close() error {
84
+	select {
85
+	case <-b.closed:
86
+		// already closed
87
+		return ErrSinkClosed
88
+	default:
89
+		// do a little chan handoff dance to synchronize closing
90
+		closed := make(chan struct{})
91
+		b.closed <- closed
92
+		close(b.closed)
93
+		<-closed
94
+		return nil
95
+	}
96
+}
97
+
98
+// run is the main broadcast loop, started when the broadcaster is created.
99
+// Under normal conditions, it waits for events on the event channel. After
100
+// Close is called, this goroutine will exit.
101
+func (b *Broadcaster) run() {
102
+	remove := func(target Sink) {
103
+		for i, sink := range b.sinks {
104
+			if sink == target {
105
+				b.sinks = append(b.sinks[:i], b.sinks[i+1:]...)
106
+				break
107
+			}
108
+		}
109
+	}
110
+
111
+	for {
112
+		select {
113
+		case event := <-b.events:
114
+			for _, sink := range b.sinks {
115
+				if err := sink.Write(event); err != nil {
116
+					if err == ErrSinkClosed {
117
+						// remove closed sinks
118
+						remove(sink)
119
+						continue
120
+					}
121
+					logrus.WithField("event", event).WithField("events.sink", sink).WithError(err).
122
+						Errorf("broadcaster: dropping event")
123
+				}
124
+			}
125
+		case request := <-b.adds:
126
+			// while we have to iterate for add/remove, common iteration for
127
+			// send is faster against slice.
128
+
129
+			var found bool
130
+			for _, sink := range b.sinks {
131
+				if request.sink == sink {
132
+					found = true
133
+					break
134
+				}
135
+			}
136
+
137
+			if !found {
138
+				b.sinks = append(b.sinks, request.sink)
139
+			}
140
+			// b.sinks[request.sink] = struct{}{}
141
+			request.response <- nil
142
+		case request := <-b.removes:
143
+			remove(request.sink)
144
+			request.response <- nil
145
+		case closing := <-b.closed:
146
+			// close all the underlying sinks
147
+			for _, sink := range b.sinks {
148
+				if err := sink.Close(); err != nil && err != ErrSinkClosed {
149
+					logrus.WithField("events.sink", sink).WithError(err).
150
+						Errorf("broadcaster: closing sink failed")
151
+				}
152
+			}
153
+			closing <- struct{}{}
154
+			return
155
+		}
156
+	}
157
+}
0 158
new file mode 100644
... ...
@@ -0,0 +1,47 @@
0
+package events
1
+
2
+// Channel provides a sink that can be listened on. The writer and channel
3
+// listener must operate in separate goroutines.
4
+//
5
+// Consumers should listen on Channel.C until Closed is closed.
6
+type Channel struct {
7
+	C chan Event
8
+
9
+	closed chan struct{}
10
+}
11
+
12
+// NewChannel returns a channel. If buffer is non-zero, the channel is
13
+// unbuffered.
14
+func NewChannel(buffer int) *Channel {
15
+	return &Channel{
16
+		C:      make(chan Event, buffer),
17
+		closed: make(chan struct{}),
18
+	}
19
+}
20
+
21
+// Done returns a channel that will always proceed once the sink is closed.
22
+func (ch *Channel) Done() chan struct{} {
23
+	return ch.closed
24
+}
25
+
26
+// Write the event to the channel. Must be called in a separate goroutine from
27
+// the listener.
28
+func (ch *Channel) Write(event Event) error {
29
+	select {
30
+	case ch.C <- event:
31
+		return nil
32
+	case <-ch.closed:
33
+		return ErrSinkClosed
34
+	}
35
+}
36
+
37
+// Close the channel sink.
38
+func (ch *Channel) Close() error {
39
+	select {
40
+	case <-ch.closed:
41
+		return ErrSinkClosed
42
+	default:
43
+		close(ch.closed)
44
+		return nil
45
+	}
46
+}
0 47
new file mode 100644
... ...
@@ -0,0 +1,10 @@
0
+package events
1
+
2
+import "fmt"
3
+
4
+var (
5
+	// ErrSinkClosed is returned if a write is issued to a sink that has been
6
+	// closed. If encountered, the error should be considered terminal and
7
+	// retries will not be successful.
8
+	ErrSinkClosed = fmt.Errorf("events: sink closed")
9
+)
0 10
new file mode 100644
... ...
@@ -0,0 +1,15 @@
0
+package events
1
+
2
+// Event marks items that can be sent as events.
3
+type Event interface{}
4
+
5
+// Sink accepts and sends events.
6
+type Sink interface {
7
+	// Write an event to the Sink. If no error is returned, the caller will
8
+	// assume that all events have been committed to the sink. If an error is
9
+	// received, the caller may retry sending the event.
10
+	Write(event Event) error
11
+
12
+	// Close the sink, possibly waiting for pending events to flush.
13
+	Close() error
14
+}
0 15
new file mode 100644
... ...
@@ -0,0 +1,52 @@
0
+package events
1
+
2
+// Matcher matches events.
3
+type Matcher interface {
4
+	Match(event Event) bool
5
+}
6
+
7
+// MatcherFunc implements matcher with just a function.
8
+type MatcherFunc func(event Event) bool
9
+
10
+// Match calls the wrapped function.
11
+func (fn MatcherFunc) Match(event Event) bool {
12
+	return fn(event)
13
+}
14
+
15
+// Filter provides an event sink that sends only events that are accepted by a
16
+// Matcher. No methods on filter are goroutine safe.
17
+type Filter struct {
18
+	dst     Sink
19
+	matcher Matcher
20
+	closed  bool
21
+}
22
+
23
+// NewFilter returns a new filter that will send to events to dst that return
24
+// true for Matcher.
25
+func NewFilter(dst Sink, matcher Matcher) Sink {
26
+	return &Filter{dst: dst, matcher: matcher}
27
+}
28
+
29
+// Write an event to the filter.
30
+func (f *Filter) Write(event Event) error {
31
+	if f.closed {
32
+		return ErrSinkClosed
33
+	}
34
+
35
+	if f.matcher.Match(event) {
36
+		return f.dst.Write(event)
37
+	}
38
+
39
+	return nil
40
+}
41
+
42
+// Close the filter and allow no more events to pass through.
43
+func (f *Filter) Close() error {
44
+	// TODO(stevvooe): Not all sinks should have Close.
45
+	if f.closed {
46
+		return ErrSinkClosed
47
+	}
48
+
49
+	f.closed = true
50
+	return f.dst.Close()
51
+}
0 52
new file mode 100644
... ...
@@ -0,0 +1,104 @@
0
+package events
1
+
2
+import (
3
+	"container/list"
4
+	"sync"
5
+
6
+	"github.com/Sirupsen/logrus"
7
+)
8
+
9
+// Queue accepts all messages into a queue for asynchronous consumption
10
+// by a sink. It is unbounded and thread safe but the sink must be reliable or
11
+// events will be dropped.
12
+type Queue struct {
13
+	dst    Sink
14
+	events *list.List
15
+	cond   *sync.Cond
16
+	mu     sync.Mutex
17
+	closed bool
18
+}
19
+
20
+// NewQueue returns a queue to the provided Sink dst.
21
+func NewQueue(dst Sink) *Queue {
22
+	eq := Queue{
23
+		dst:    dst,
24
+		events: list.New(),
25
+	}
26
+
27
+	eq.cond = sync.NewCond(&eq.mu)
28
+	go eq.run()
29
+	return &eq
30
+}
31
+
32
+// Write accepts the events into the queue, only failing if the queue has
33
+// beend closed.
34
+func (eq *Queue) Write(event Event) error {
35
+	eq.mu.Lock()
36
+	defer eq.mu.Unlock()
37
+
38
+	if eq.closed {
39
+		return ErrSinkClosed
40
+	}
41
+
42
+	eq.events.PushBack(event)
43
+	eq.cond.Signal() // signal waiters
44
+
45
+	return nil
46
+}
47
+
48
+// Close shutsdown the event queue, flushing
49
+func (eq *Queue) Close() error {
50
+	eq.mu.Lock()
51
+	defer eq.mu.Unlock()
52
+
53
+	if eq.closed {
54
+		return ErrSinkClosed
55
+	}
56
+
57
+	// set closed flag
58
+	eq.closed = true
59
+	eq.cond.Signal() // signal flushes queue
60
+	eq.cond.Wait()   // wait for signal from last flush
61
+	return eq.dst.Close()
62
+}
63
+
64
+// run is the main goroutine to flush events to the target sink.
65
+func (eq *Queue) run() {
66
+	for {
67
+		event := eq.next()
68
+
69
+		if event == nil {
70
+			return // nil block means event queue is closed.
71
+		}
72
+
73
+		if err := eq.dst.Write(event); err != nil {
74
+			logrus.WithFields(logrus.Fields{
75
+				"event": event,
76
+				"sink":  eq.dst,
77
+			}).WithError(err).Warnf("eventqueue: dropped event")
78
+		}
79
+	}
80
+}
81
+
82
+// next encompasses the critical section of the run loop. When the queue is
83
+// empty, it will block on the condition. If new data arrives, it will wake
84
+// and return a block. When closed, a nil slice will be returned.
85
+func (eq *Queue) next() Event {
86
+	eq.mu.Lock()
87
+	defer eq.mu.Unlock()
88
+
89
+	for eq.events.Len() < 1 {
90
+		if eq.closed {
91
+			eq.cond.Broadcast()
92
+			return nil
93
+		}
94
+
95
+		eq.cond.Wait()
96
+	}
97
+
98
+	front := eq.events.Front()
99
+	block := front.Value.(Event)
100
+	eq.events.Remove(front)
101
+
102
+	return block
103
+}
0 104
new file mode 100644
... ...
@@ -0,0 +1,168 @@
0
+package events
1
+
2
+import (
3
+	"sync"
4
+	"time"
5
+
6
+	"github.com/Sirupsen/logrus"
7
+)
8
+
9
+// RetryingSink retries the write until success or an ErrSinkClosed is
10
+// returned. Underlying sink must have p > 0 of succeeding or the sink will
11
+// block. Retry is configured with a RetryStrategy.  Concurrent calls to a
12
+// retrying sink are serialized through the sink, meaning that if one is
13
+// in-flight, another will not proceed.
14
+type RetryingSink struct {
15
+	sink     Sink
16
+	strategy RetryStrategy
17
+	closed   chan struct{}
18
+}
19
+
20
+// NewRetryingSink returns a sink that will retry writes to a sink, backing
21
+// off on failure. Parameters threshold and backoff adjust the behavior of the
22
+// circuit breaker.
23
+func NewRetryingSink(sink Sink, strategy RetryStrategy) *RetryingSink {
24
+	rs := &RetryingSink{
25
+		sink:     sink,
26
+		strategy: strategy,
27
+		closed:   make(chan struct{}),
28
+	}
29
+
30
+	return rs
31
+}
32
+
33
+// Write attempts to flush the events to the downstream sink until it succeeds
34
+// or the sink is closed.
35
+func (rs *RetryingSink) Write(event Event) error {
36
+	logger := logrus.WithField("event", event)
37
+	var timer *time.Timer
38
+
39
+retry:
40
+	select {
41
+	case <-rs.closed:
42
+		return ErrSinkClosed
43
+	default:
44
+	}
45
+
46
+	if backoff := rs.strategy.Proceed(event); backoff > 0 {
47
+		if timer == nil {
48
+			timer = time.NewTimer(backoff)
49
+			defer timer.Stop()
50
+		} else {
51
+			timer.Reset(backoff)
52
+		}
53
+
54
+		select {
55
+		case <-timer.C:
56
+			goto retry
57
+		case <-rs.closed:
58
+			return ErrSinkClosed
59
+		}
60
+	}
61
+
62
+	if err := rs.sink.Write(event); err != nil {
63
+		if err == ErrSinkClosed {
64
+			// terminal!
65
+			return err
66
+		}
67
+
68
+		logger := logger.WithError(err) // shadow!!
69
+
70
+		if rs.strategy.Failure(event, err) {
71
+			logger.Errorf("retryingsink: dropped event")
72
+			return nil
73
+		}
74
+
75
+		logger.Errorf("retryingsink: error writing event, retrying")
76
+		goto retry
77
+	}
78
+
79
+	rs.strategy.Success(event)
80
+	return nil
81
+}
82
+
83
+// Close closes the sink and the underlying sink.
84
+func (rs *RetryingSink) Close() error {
85
+	select {
86
+	case <-rs.closed:
87
+		return ErrSinkClosed
88
+	default:
89
+		close(rs.closed)
90
+		return rs.sink.Close()
91
+	}
92
+}
93
+
94
+// RetryStrategy defines a strategy for retrying event sink writes.
95
+//
96
+// All methods should be goroutine safe.
97
+type RetryStrategy interface {
98
+	// Proceed is called before every event send. If proceed returns a
99
+	// positive, non-zero integer, the retryer will back off by the provided
100
+	// duration.
101
+	//
102
+	// An event is provided, by may be ignored.
103
+	Proceed(event Event) time.Duration
104
+
105
+	// Failure reports a failure to the strategy. If this method returns true,
106
+	// the event should be dropped.
107
+	Failure(event Event, err error) bool
108
+
109
+	// Success should be called when an event is sent successfully.
110
+	Success(event Event)
111
+}
112
+
113
+// TODO(stevvooe): We are using circuit breaker here. May want to provide
114
+// bounded exponential backoff, as well.
115
+
116
+// Breaker implements a circuit breaker retry strategy.
117
+//
118
+// The current implementation never drops events.
119
+type Breaker struct {
120
+	threshold int
121
+	recent    int
122
+	last      time.Time
123
+	backoff   time.Duration // time after which we retry after failure.
124
+	mu        sync.Mutex
125
+}
126
+
127
+var _ RetryStrategy = &Breaker{}
128
+
129
+// NewBreaker returns a breaker that will backoff after the threshold has been
130
+// tripped. A Breaker is thread safe and may be shared by many goroutines.
131
+func NewBreaker(threshold int, backoff time.Duration) *Breaker {
132
+	return &Breaker{
133
+		threshold: threshold,
134
+		backoff:   backoff,
135
+	}
136
+}
137
+
138
+// Proceed checks the failures against the threshold.
139
+func (b *Breaker) Proceed(event Event) time.Duration {
140
+	b.mu.Lock()
141
+	defer b.mu.Unlock()
142
+
143
+	if b.recent < b.threshold {
144
+		return 0
145
+	}
146
+
147
+	return b.last.Add(b.backoff).Sub(time.Now())
148
+}
149
+
150
+// Success resets the breaker.
151
+func (b *Breaker) Success(event Event) {
152
+	b.mu.Lock()
153
+	defer b.mu.Unlock()
154
+
155
+	b.recent = 0
156
+	b.last = time.Time{}
157
+}
158
+
159
+// Failure records the failure and latest failure time.
160
+func (b *Breaker) Failure(event Event, err error) bool {
161
+	b.mu.Lock()
162
+	defer b.mu.Unlock()
163
+
164
+	b.recent++
165
+	b.last = time.Now().UTC()
166
+	return false // never drop events.
167
+}
... ...
@@ -1,9 +1,7 @@
1 1
 language: go
2 2
 
3 3
 go:
4
-  - 1.3
5
-#  - 1.4
6
-# see https://github.com/moovweb/gvm/pull/116 for why Go 1.4 is currently disabled
4
+  - 1.5.3
7 5
 
8 6
 # let us have speedy Docker-based Travis workers
9 7
 sudo: false
... ...
@@ -11,19 +9,18 @@ sudo: false
11 11
 before_install:
12 12
   # Symlink below is needed for Travis CI to work correctly on personal forks of libkv
13 13
   - ln -s $HOME/gopath/src/github.com/${TRAVIS_REPO_SLUG///libkv/} $HOME/gopath/src/github.com/docker
14
-  - go get golang.org/x/tools/cmd/vet
15 14
   - go get golang.org/x/tools/cmd/cover
16 15
   - go get github.com/mattn/goveralls
17 16
   - go get github.com/golang/lint/golint
18 17
   - go get github.com/GeertJohan/fgt
19 18
 
20 19
 before_script:
21
-  - script/travis_consul.sh 0.5.2 
22
-  - script/travis_etcd.sh 2.2.0
23
-  - script/travis_zk.sh 3.4.6
20
+  - script/travis_consul.sh 0.6.3
21
+  - script/travis_etcd.sh 2.2.5
22
+  - script/travis_zk.sh 3.5.1-alpha
24 23
 
25 24
 script:
26
-  - ./consul agent -server -bootstrap-expect 1 -data-dir /tmp/consul -config-file=./config.json 1>/dev/null &
25
+  - ./consul agent -server -bootstrap -advertise=127.0.0.1 -data-dir /tmp/consul -config-file=./config.json 1>/dev/null &
27 26
   - ./etcd/etcd --listen-client-urls 'http://0.0.0.0:4001' --advertise-client-urls 'http://127.0.0.1:4001' >/dev/null 2>&1 &
28 27
   - ./zk/bin/zkServer.sh start ./zk/conf/zoo.cfg 1> /dev/null
29 28
   - script/validate-gofmt
... ...
@@ -176,7 +176,7 @@
176 176
 
177 177
    END OF TERMS AND CONDITIONS
178 178
 
179
-   Copyright 2014-2015 Docker, Inc.
179
+   Copyright 2014-2016 Docker, Inc.
180 180
 
181 181
    Licensed under the Apache License, Version 2.0 (the "License");
182 182
    you may not use this file except in compliance with the License.
183 183
new file mode 100644
... ...
@@ -0,0 +1,46 @@
0
+# Libkv maintainers file
1
+#
2
+# This file describes who runs the docker/libkv project and how.
3
+# This is a living document - if you see something out of date or missing, speak up!
4
+#
5
+# It is structured to be consumable by both humans and programs.
6
+# To extract its contents programmatically, use any TOML-compliant parser.
7
+#
8
+# This file is compiled into the MAINTAINERS file in docker/opensource.
9
+#
10
+[Org]
11
+	[Org."Core maintainers"]
12
+		people = [
13
+			"abronan",
14
+			"aluzzardi",
15
+			"sanimej",
16
+			"vieux",
17
+		]
18
+
19
+[people]
20
+
21
+# A reference list of all people associated with the project.
22
+# All other sections should refer to people by their canonical key
23
+# in the people section.
24
+
25
+	# ADD YOURSELF HERE IN ALPHABETICAL ORDER
26
+
27
+	[people.abronan]
28
+	Name = "Alexandre Beslic"
29
+	Email = "abronan@docker.com"
30
+	GitHub = "abronan"
31
+
32
+	[people.aluzzardi]
33
+	Name = "Andrea Luzzardi"
34
+	Email = "al@docker.com"
35
+	GitHub = "aluzzardi"
36
+
37
+	[people.sanimej]
38
+	Name = "Santhosh Manohar"
39
+	Email = "santhosh@docker.com"
40
+	GitHub = "sanimej"
41
+
42
+	[people.vieux]
43
+	Name = "Victor Vieux"
44
+	Email = "vieux@docker.com"
45
+	GitHub = "vieux"
... ...
@@ -3,6 +3,7 @@
3 3
 [![GoDoc](https://godoc.org/github.com/docker/libkv?status.png)](https://godoc.org/github.com/docker/libkv)
4 4
 [![Build Status](https://travis-ci.org/docker/libkv.svg?branch=master)](https://travis-ci.org/docker/libkv)
5 5
 [![Coverage Status](https://coveralls.io/repos/docker/libkv/badge.svg)](https://coveralls.io/r/docker/libkv)
6
+[![Go Report Card](https://goreportcard.com/badge/github.com/docker/libkv)](https://goreportcard.com/report/github.com/docker/libkv)
6 7
 
7 8
 `libkv` provides a `Go` native library to store metadata.
8 9
 
... ...
@@ -10,7 +11,7 @@ The goal of `libkv` is to abstract common store operations for multiple distribu
10 10
 
11 11
 For example, you can use it to store your metadata or for service discovery to register machines and endpoints inside your cluster.
12 12
 
13
-You can also easily implement a generic *Leader Election* on top of it (see the [swarm/leadership](https://github.com/docker/swarm/tree/master/leadership) package).
13
+You can also easily implement a generic *Leader Election* on top of it (see the [docker/leadership](https://github.com/docker/leadership) repository).
14 14
 
15 15
 As of now, `libkv` offers support for `Consul`, `Etcd`, `Zookeeper` (**Distributed** store) and `BoltDB` (**Local** store).
16 16
 
... ...
@@ -30,7 +31,7 @@ You can find examples of usage for `libkv` under in `docs/examples.go`. Optional
30 30
 
31 31
 `libkv` supports:
32 32
 - Consul versions >= `0.5.1` because it uses Sessions with `Delete` behavior for the use of `TTLs` (mimics zookeeper's Ephemeral node support), If you don't plan to use `TTLs`: you can use Consul version `0.4.0+`.
33
-- Etcd versions >= `2.0` because it uses the new `coreos/etcd/client`, this might change in the future as the support for `APIv3` comes along and adds mor capabilities.
33
+- Etcd versions >= `2.0` because it uses the new `coreos/etcd/client`, this might change in the future as the support for `APIv3` comes along and adds more capabilities.
34 34
 - Zookeeper versions >= `3.4.5`. Although this might work with previous version but this remains untested as of now.
35 35
 - Boltdb, which shouldn't be subject to any version dependencies.
36 36
 
... ...
@@ -83,7 +84,7 @@ Please refer to the `docs/compatibility.md` to see what are the special cases fo
83 83
 
84 84
 Other than those special cases, you should expect the same experience for basic operations like `Get`/`Put`, etc.
85 85
 
86
-Calls like `WatchTree` may return different events (or number of events) depending on the backend (for now, `Etcd` and `Consul` will likely return more events than `Zookeeper` that you should triage properly). Although you should be able to use it successfully to watch on events in an interchangeable way (see the **swarm/leadership** or **swarm/discovery** packages in **docker/swarm**).
86
+Calls like `WatchTree` may return different events (or number of events) depending on the backend (for now, `Etcd` and `Consul` will likely return more events than `Zookeeper` that you should triage properly). Although you should be able to use it successfully to watch on events in an interchangeable way (see the **docker/leadership** repository or the **pkg/discovery/kv** package in **docker/docker**).
87 87
 
88 88
 ## TLS
89 89
 
... ...
@@ -103,4 +104,4 @@ Want to hack on libkv? [Docker's contributions guidelines](https://github.com/do
103 103
 
104 104
 ##Copyright and license
105 105
 
106
-Copyright © 2014-2015 Docker, Inc. All rights reserved, except as follows. Code is released under the Apache 2.0 license. The README.md file, and files in the "docs" folder are licensed under the Creative Commons Attribution 4.0 International License under the terms and conditions set forth in the file "LICENSE.docs". You may obtain a duplicate copy of the same license, titled CC-BY-SA-4.0, at http://creativecommons.org/licenses/by/4.0/.
106
+Copyright © 2014-2016 Docker, Inc. All rights reserved, except as follows. Code is released under the Apache 2.0 license. The README.md file, and files in the "docs" folder are licensed under the Creative Commons Attribution 4.0 International License under the terms and conditions set forth in the file "LICENSE.docs". You may obtain a duplicate copy of the same license, titled CC-BY-SA-4.0, at http://creativecommons.org/licenses/by/4.0/.
... ...
@@ -25,7 +25,7 @@ var (
25 25
 	}()
26 26
 )
27 27
 
28
-// NewStore creates a an instance of store
28
+// NewStore creates an instance of store
29 29
 func NewStore(backend store.Backend, addrs []string, options *store.Config) (store.Store, error) {
30 30
 	if init, exists := initializers[backend]; exists {
31 31
 		return init(addrs, options)
... ...
@@ -19,8 +19,6 @@ var (
19 19
 	// ErrMultipleEndpointsUnsupported is thrown when multiple endpoints specified for
20 20
 	// BoltDB. Endpoint has to be a local file path
21 21
 	ErrMultipleEndpointsUnsupported = errors.New("boltdb supports one endpoint and should be a file path")
22
-	// ErrBoltBucketNotFound is thrown when specified BoltBD bucket doesn't exist in the DB
23
-	ErrBoltBucketNotFound = errors.New("boltdb bucket doesn't exist")
24 22
 	// ErrBoltBucketOptionMissing is thrown when boltBcuket config option is missing
25 23
 	ErrBoltBucketOptionMissing = errors.New("boltBucket config option missing")
26 24
 )
... ...
@@ -141,7 +139,7 @@ func (b *BoltDB) Get(key string) (*store.KVPair, error) {
141 141
 	err = db.View(func(tx *bolt.Tx) error {
142 142
 		bucket := tx.Bucket(b.boltBucket)
143 143
 		if bucket == nil {
144
-			return ErrBoltBucketNotFound
144
+			return store.ErrKeyNotFound
145 145
 		}
146 146
 
147 147
 		v := bucket.Get([]byte(key))
... ...
@@ -217,7 +215,7 @@ func (b *BoltDB) Delete(key string) error {
217 217
 	err = db.Update(func(tx *bolt.Tx) error {
218 218
 		bucket := tx.Bucket(b.boltBucket)
219 219
 		if bucket == nil {
220
-			return ErrBoltBucketNotFound
220
+			return store.ErrKeyNotFound
221 221
 		}
222 222
 		err := bucket.Delete([]byte(key))
223 223
 		return err
... ...
@@ -243,7 +241,7 @@ func (b *BoltDB) Exists(key string) (bool, error) {
243 243
 	err = db.View(func(tx *bolt.Tx) error {
244 244
 		bucket := tx.Bucket(b.boltBucket)
245 245
 		if bucket == nil {
246
-			return ErrBoltBucketNotFound
246
+			return store.ErrKeyNotFound
247 247
 		}
248 248
 
249 249
 		val = bucket.Get([]byte(key))
... ...
@@ -276,7 +274,7 @@ func (b *BoltDB) List(keyPrefix string) ([]*store.KVPair, error) {
276 276
 	err = db.View(func(tx *bolt.Tx) error {
277 277
 		bucket := tx.Bucket(b.boltBucket)
278 278
 		if bucket == nil {
279
-			return ErrBoltBucketNotFound
279
+			return store.ErrKeyNotFound
280 280
 		}
281 281
 
282 282
 		cursor := bucket.Cursor()
... ...
@@ -326,7 +324,7 @@ func (b *BoltDB) AtomicDelete(key string, previous *store.KVPair) (bool, error)
326 326
 	err = db.Update(func(tx *bolt.Tx) error {
327 327
 		bucket := tx.Bucket(b.boltBucket)
328 328
 		if bucket == nil {
329
-			return ErrBoltBucketNotFound
329
+			return store.ErrKeyNotFound
330 330
 		}
331 331
 
332 332
 		val = bucket.Get([]byte(key))
... ...
@@ -370,7 +368,7 @@ func (b *BoltDB) AtomicPut(key string, value []byte, previous *store.KVPair, opt
370 370
 		bucket := tx.Bucket(b.boltBucket)
371 371
 		if bucket == nil {
372 372
 			if previous != nil {
373
-				return ErrBoltBucketNotFound
373
+				return store.ErrKeyNotFound
374 374
 			}
375 375
 			bucket, err = tx.CreateBucket(b.boltBucket)
376 376
 			if err != nil {
... ...
@@ -381,7 +379,7 @@ func (b *BoltDB) AtomicPut(key string, value []byte, previous *store.KVPair, opt
381 381
 		// doesn't exist in the DB.
382 382
 		val = bucket.Get([]byte(key))
383 383
 		if previous == nil && len(val) != 0 {
384
-			return store.ErrKeyModified
384
+			return store.ErrKeyExists
385 385
 		}
386 386
 		if previous != nil {
387 387
 			if len(val) == 0 {
... ...
@@ -440,7 +438,7 @@ func (b *BoltDB) DeleteTree(keyPrefix string) error {
440 440
 	err = db.Update(func(tx *bolt.Tx) error {
441 441
 		bucket := tx.Bucket(b.boltBucket)
442 442
 		if bucket == nil {
443
-			return ErrBoltBucketNotFound
443
+			return store.ErrKeyNotFound
444 444
 		}
445 445
 
446 446
 		cursor := bucket.Cursor()
... ...
@@ -22,6 +22,14 @@ const (
22 22
 	// RenewSessionRetryMax is the number of time we should try
23 23
 	// to renew the session before giving up and throwing an error
24 24
 	RenewSessionRetryMax = 5
25
+
26
+	// MaxSessionDestroyAttempts is the maximum times we will try
27
+	// to explicitely destroy the session attached to a lock after
28
+	// the connectivity to the store has been lost
29
+	MaxSessionDestroyAttempts = 5
30
+
31
+	// defaultLockTTL is the default ttl for the consul lock
32
+	defaultLockTTL = 20 * time.Second
25 33
 )
26 34
 
27 35
 var (
... ...
@@ -186,6 +194,7 @@ func (s *Consul) Put(key string, value []byte, opts *store.WriteOptions) error {
186 186
 	p := &api.KVPair{
187 187
 		Key:   key,
188 188
 		Value: value,
189
+		Flags: api.LockFlagValue,
189 190
 	}
190 191
 
191 192
 	if opts != nil && opts.TTL > 0 {
... ...
@@ -378,44 +387,99 @@ func (s *Consul) NewLock(key string, options *store.LockOptions) (store.Locker,
378 378
 
379 379
 	lock := &consulLock{}
380 380
 
381
+	ttl := defaultLockTTL
382
+
381 383
 	if options != nil {
382 384
 		// Set optional TTL on Lock
383 385
 		if options.TTL != 0 {
384
-			entry := &api.SessionEntry{
385
-				Behavior:  api.SessionBehaviorRelease, // Release the lock when the session expires
386
-				TTL:       (options.TTL / 2).String(), // Consul multiplies the TTL by 2x
387
-				LockDelay: 1 * time.Millisecond,       // Virtually disable lock delay
388
-			}
389
-
390
-			// Create the key session
391
-			session, _, err := s.client.Session().Create(entry, nil)
392
-			if err != nil {
393
-				return nil, err
394
-			}
395
-
396
-			// Place the session on lock
397
-			lockOpts.Session = session
398
-
399
-			// Renew the session ttl lock periodically
400
-			go s.client.Session().RenewPeriodic(entry.TTL, session, nil, options.RenewLock)
401
-			lock.renewCh = options.RenewLock
386
+			ttl = options.TTL
402 387
 		}
403
-
404 388
 		// Set optional value on Lock
405 389
 		if options.Value != nil {
406 390
 			lockOpts.Value = options.Value
407 391
 		}
408 392
 	}
409 393
 
394
+	entry := &api.SessionEntry{
395
+		Behavior:  api.SessionBehaviorRelease, // Release the lock when the session expires
396
+		TTL:       (ttl / 2).String(),         // Consul multiplies the TTL by 2x
397
+		LockDelay: 1 * time.Millisecond,       // Virtually disable lock delay
398
+	}
399
+
400
+	// Create the key session
401
+	session, _, err := s.client.Session().Create(entry, nil)
402
+	if err != nil {
403
+		return nil, err
404
+	}
405
+
406
+	// Place the session and renew chan on lock
407
+	lockOpts.Session = session
408
+	lock.renewCh = options.RenewLock
409
+
410 410
 	l, err := s.client.LockOpts(lockOpts)
411 411
 	if err != nil {
412 412
 		return nil, err
413 413
 	}
414 414
 
415
+	// Renew the session ttl lock periodically
416
+	s.renewLockSession(entry.TTL, session, options.RenewLock)
417
+
415 418
 	lock.lock = l
416 419
 	return lock, nil
417 420
 }
418 421
 
422
+// renewLockSession is used to renew a session Lock, it takes
423
+// a stopRenew chan which is used to explicitely stop the session
424
+// renew process. The renew routine never stops until a signal is
425
+// sent to this channel. If deleting the session fails because the
426
+// connection to the store is lost, it keeps trying to delete the
427
+// session periodically until it can contact the store, this ensures
428
+// that the lock is not maintained indefinitely which ensures liveness
429
+// over safety for the lock when the store becomes unavailable.
430
+func (s *Consul) renewLockSession(initialTTL string, id string, stopRenew chan struct{}) {
431
+	sessionDestroyAttempts := 0
432
+	ttl, err := time.ParseDuration(initialTTL)
433
+	if err != nil {
434
+		return
435
+	}
436
+	go func() {
437
+		for {
438
+			select {
439
+			case <-time.After(ttl / 2):
440
+				entry, _, err := s.client.Session().Renew(id, nil)
441
+				if err != nil {
442
+					// If an error occurs, continue until the
443
+					// session gets destroyed explicitely or
444
+					// the session ttl times out
445
+					continue
446
+				}
447
+				if entry == nil {
448
+					return
449
+				}
450
+
451
+				// Handle the server updating the TTL
452
+				ttl, _ = time.ParseDuration(entry.TTL)
453
+
454
+			case <-stopRenew:
455
+				// Attempt a session destroy
456
+				_, err := s.client.Session().Destroy(id, nil)
457
+				if err == nil {
458
+					return
459
+				}
460
+
461
+				if sessionDestroyAttempts >= MaxSessionDestroyAttempts {
462
+					return
463
+				}
464
+
465
+				// We can't destroy the session because the store
466
+				// is unavailable, wait for the session renew period
467
+				sessionDestroyAttempts++
468
+				time.Sleep(ttl / 2)
469
+			}
470
+		}
471
+	}()
472
+}
473
+
419 474
 // Lock attempts to acquire the lock and blocks while
420 475
 // doing so. It returns a channel that is closed if our
421 476
 // lock is lost or if an error occurs
... ...
@@ -436,7 +500,7 @@ func (l *consulLock) Unlock() error {
436 436
 // modified in the meantime, throws an error if this is the case
437 437
 func (s *Consul) AtomicPut(key string, value []byte, previous *store.KVPair, options *store.WriteOptions) (bool, *store.KVPair, error) {
438 438
 
439
-	p := &api.KVPair{Key: s.normalize(key), Value: value}
439
+	p := &api.KVPair{Key: s.normalize(key), Value: value, Flags: api.LockFlagValue}
440 440
 
441 441
 	if previous == nil {
442 442
 		// Consul interprets ModifyIndex = 0 as new key.
... ...
@@ -445,9 +509,14 @@ func (s *Consul) AtomicPut(key string, value []byte, previous *store.KVPair, opt
445 445
 		p.ModifyIndex = previous.LastIndex
446 446
 	}
447 447
 
448
-	if work, _, err := s.client.KV().CAS(p, nil); err != nil {
448
+	ok, _, err := s.client.KV().CAS(p, nil)
449
+	if err != nil {
449 450
 		return false, nil, err
450
-	} else if !work {
451
+	}
452
+	if !ok {
453
+		if previous == nil {
454
+			return false, nil, store.ErrKeyExists
455
+		}
451 456
 		return false, nil, store.ErrKeyModified
452 457
 	}
453 458
 
... ...
@@ -466,7 +535,7 @@ func (s *Consul) AtomicDelete(key string, previous *store.KVPair) (bool, error)
466 466
 		return false, store.ErrPreviousNotSpecified
467 467
 	}
468 468
 
469
-	p := &api.KVPair{Key: s.normalize(key), ModifyIndex: previous.LastIndex}
469
+	p := &api.KVPair{Key: s.normalize(key), ModifyIndex: previous.LastIndex, Flags: api.LockFlagValue}
470 470
 
471 471
 	// Extra Get operation to check on the key
472 472
 	_, err := s.Get(key)
... ...
@@ -75,6 +75,9 @@ func New(addrs []string, options *store.Config) (store.Store, error) {
75 75
 		if options.ConnectionTimeout != 0 {
76 76
 			setTimeout(cfg, options.ConnectionTimeout)
77 77
 		}
78
+		if options.Username != "" {
79
+			setCredentials(cfg, options.Username, options.Password)
80
+		}
78 81
 	}
79 82
 
80 83
 	c, err := etcd.New(*cfg)
... ...
@@ -119,6 +122,12 @@ func setTimeout(cfg *etcd.Config, time time.Duration) {
119 119
 	cfg.HeaderTimeoutPerRequest = time
120 120
 }
121 121
 
122
+// setCredentials sets the username/password credentials for connecting to Etcd
123
+func setCredentials(cfg *etcd.Config, username, password string) {
124
+	cfg.Username = username
125
+	cfg.Password = password
126
+}
127
+
122 128
 // Normalize the key for usage in Etcd
123 129
 func (s *Etcd) normalize(key string) string {
124 130
 	key = store.Normalize(key)
... ...
@@ -335,6 +344,10 @@ func (s *Etcd) AtomicPut(key string, value []byte, previous *store.KVPair, opts
335 335
 			if etcdError.Code == etcd.ErrorCodeTestFailed {
336 336
 				return false, nil, store.ErrKeyModified
337 337
 			}
338
+			// Node exists error (when PrevNoExist)
339
+			if etcdError.Code == etcd.ErrorCodeNodeExist {
340
+				return false, nil, store.ErrKeyExists
341
+			}
338 342
 		}
339 343
 		return false, nil, err
340 344
 	}
... ...
@@ -508,15 +521,15 @@ func (l *etcdLock) Lock(stopChan chan struct{}) (<-chan struct{}, error) {
508 508
 			// Wait for the key to be available or for
509 509
 			// a signal to stop trying to lock the key
510 510
 			select {
511
-			case _ = <-free:
511
+			case <-free:
512 512
 				break
513 513
 			case err := <-errorCh:
514 514
 				return nil, err
515
-			case _ = <-stopChan:
515
+			case <-stopChan:
516 516
 				return nil, ErrAbortTryLock
517 517
 			}
518 518
 
519
-			// Delete or Expire event occured
519
+			// Delete or Expire event occurred
520 520
 			// Retry
521 521
 		}
522 522
 	}
... ...
@@ -35,6 +35,8 @@ var (
35 35
 	ErrKeyNotFound = errors.New("Key not found in store")
36 36
 	// ErrPreviousNotSpecified is thrown when the previous value is not specified for an atomic operation
37 37
 	ErrPreviousNotSpecified = errors.New("Previous K/V pair should be provided for the Atomic operation")
38
+	// ErrKeyExists is thrown when the previous value exists in the case of an AtomicPut
39
+	ErrKeyExists = errors.New("Previous K/V pair exists, cannot complete Atomic operation")
38 40
 )
39 41
 
40 42
 // Config contains the options for a storage client
... ...
@@ -44,6 +46,8 @@ type Config struct {
44 44
 	ConnectionTimeout time.Duration
45 45
 	Bucket            string
46 46
 	PersistConnection bool
47
+	Username          string
48
+	Password          string
47 49
 }
48 50
 
49 51
 // ClientTLSConfig contains data for a Client TLS configuration in the form
... ...
@@ -291,8 +291,8 @@ func (s *Zookeeper) DeleteTree(directory string) error {
291 291
 // AtomicPut put a value at "key" if the key has not been
292 292
 // modified in the meantime, throws an error if this is the case
293 293
 func (s *Zookeeper) AtomicPut(key string, value []byte, previous *store.KVPair, _ *store.WriteOptions) (bool, *store.KVPair, error) {
294
-
295 294
 	var lastIndex uint64
295
+
296 296
 	if previous != nil {
297 297
 		meta, err := s.client.Set(s.normalize(key), value, int32(previous.LastIndex))
298 298
 		if err != nil {
... ...
@@ -307,8 +307,9 @@ func (s *Zookeeper) AtomicPut(key string, value []byte, previous *store.KVPair,
307 307
 		// Interpret previous == nil as create operation.
308 308
 		_, err := s.client.Create(s.normalize(key), value, 0, zk.WorldACL(zk.PermAll))
309 309
 		if err != nil {
310
-			// Zookeeper will complain if the directory doesn't exist.
310
+			// Directory does not exist
311 311
 			if err == zk.ErrNoNode {
312
+
312 313
 				// Create the directory
313 314
 				parts := store.SplitKey(strings.TrimSuffix(key, "/"))
314 315
 				parts = parts[:len(parts)-1]
... ...
@@ -316,11 +317,22 @@ func (s *Zookeeper) AtomicPut(key string, value []byte, previous *store.KVPair,
316 316
 					// Failed to create the directory.
317 317
 					return false, nil, err
318 318
 				}
319
+
320
+				// Create the node
319 321
 				if _, err := s.client.Create(s.normalize(key), value, 0, zk.WorldACL(zk.PermAll)); err != nil {
322
+					// Node exist error (when previous nil)
323
+					if err == zk.ErrNodeExists {
324
+						return false, nil, store.ErrKeyExists
325
+					}
320 326
 					return false, nil, err
321 327
 				}
322 328
 
323 329
 			} else {
330
+				// Node Exists error (when previous nil)
331
+				if err == zk.ErrNodeExists {
332
+					return false, nil, store.ErrKeyExists
333
+				}
334
+
324 335
 				// Unhandled error
325 336
 				return false, nil, err
326 337
 			}
327 338
new file mode 100644
... ...
@@ -0,0 +1,353 @@
0
+Mozilla Public License, version 2.0
1
+
2
+1. Definitions
3
+
4
+1.1. “Contributor”
5
+
6
+     means each individual or legal entity that creates, contributes to the
7
+     creation of, or owns Covered Software.
8
+
9
+1.2. “Contributor Version”
10
+
11
+     means the combination of the Contributions of others (if any) used by a
12
+     Contributor and that particular Contributor’s Contribution.
13
+
14
+1.3. “Contribution”
15
+
16
+     means Covered Software of a particular Contributor.
17
+
18
+1.4. “Covered Software”
19
+
20
+     means Source Code Form to which the initial Contributor has attached the
21
+     notice in Exhibit A, the Executable Form of such Source Code Form, and
22
+     Modifications of such Source Code Form, in each case including portions
23
+     thereof.
24
+
25
+1.5. “Incompatible With Secondary Licenses”
26
+     means
27
+
28
+     a. that the initial Contributor has attached the notice described in
29
+        Exhibit B to the Covered Software; or
30
+
31
+     b. that the Covered Software was made available under the terms of version
32
+        1.1 or earlier of the License, but not also under the terms of a
33
+        Secondary License.
34
+
35
+1.6. “Executable Form”
36
+
37
+     means any form of the work other than Source Code Form.
38
+
39
+1.7. “Larger Work”
40
+
41
+     means a work that combines Covered Software with other material, in a separate
42
+     file or files, that is not Covered Software.
43
+
44
+1.8. “License”
45
+
46
+     means this document.
47
+
48
+1.9. “Licensable”
49
+
50
+     means having the right to grant, to the maximum extent possible, whether at the
51
+     time of the initial grant or subsequently, any and all of the rights conveyed by
52
+     this License.
53
+
54
+1.10. “Modifications”
55
+
56
+     means any of the following:
57
+
58
+     a. any file in Source Code Form that results from an addition to, deletion
59
+        from, or modification of the contents of Covered Software; or
60
+
61
+     b. any new file in Source Code Form that contains any Covered Software.
62
+
63
+1.11. “Patent Claims” of a Contributor
64
+
65
+      means any patent claim(s), including without limitation, method, process,
66
+      and apparatus claims, in any patent Licensable by such Contributor that
67
+      would be infringed, but for the grant of the License, by the making,
68
+      using, selling, offering for sale, having made, import, or transfer of
69
+      either its Contributions or its Contributor Version.
70
+
71
+1.12. “Secondary License”
72
+
73
+      means either the GNU General Public License, Version 2.0, the GNU Lesser
74
+      General Public License, Version 2.1, the GNU Affero General Public
75
+      License, Version 3.0, or any later versions of those licenses.
76
+
77
+1.13. “Source Code Form”
78
+
79
+      means the form of the work preferred for making modifications.
80
+
81
+1.14. “You” (or “Your”)
82
+
83
+      means an individual or a legal entity exercising rights under this
84
+      License. For legal entities, “You” includes any entity that controls, is
85
+      controlled by, or is under common control with You. For purposes of this
86
+      definition, “control” means (a) the power, direct or indirect, to cause
87
+      the direction or management of such entity, whether by contract or
88
+      otherwise, or (b) ownership of more than fifty percent (50%) of the
89
+      outstanding shares or beneficial ownership of such entity.
90
+
91
+
92
+2. License Grants and Conditions
93
+
94
+2.1. Grants
95
+
96
+     Each Contributor hereby grants You a world-wide, royalty-free,
97
+     non-exclusive license:
98
+
99
+     a. under intellectual property rights (other than patent or trademark)
100
+        Licensable by such Contributor to use, reproduce, make available,
101
+        modify, display, perform, distribute, and otherwise exploit its
102
+        Contributions, either on an unmodified basis, with Modifications, or as
103
+        part of a Larger Work; and
104
+
105
+     b. under Patent Claims of such Contributor to make, use, sell, offer for
106
+        sale, have made, import, and otherwise transfer either its Contributions
107
+        or its Contributor Version.
108
+
109
+2.2. Effective Date
110
+
111
+     The licenses granted in Section 2.1 with respect to any Contribution become
112
+     effective for each Contribution on the date the Contributor first distributes
113
+     such Contribution.
114
+
115
+2.3. Limitations on Grant Scope
116
+
117
+     The licenses granted in this Section 2 are the only rights granted under this
118
+     License. No additional rights or licenses will be implied from the distribution
119
+     or licensing of Covered Software under this License. Notwithstanding Section
120
+     2.1(b) above, no patent license is granted by a Contributor:
121
+
122
+     a. for any code that a Contributor has removed from Covered Software; or
123
+
124
+     b. for infringements caused by: (i) Your and any other third party’s
125
+        modifications of Covered Software, or (ii) the combination of its
126
+        Contributions with other software (except as part of its Contributor
127
+        Version); or
128
+
129
+     c. under Patent Claims infringed by Covered Software in the absence of its
130
+        Contributions.
131
+
132
+     This License does not grant any rights in the trademarks, service marks, or
133
+     logos of any Contributor (except as may be necessary to comply with the
134
+     notice requirements in Section 3.4).
135
+
136
+2.4. Subsequent Licenses
137
+
138
+     No Contributor makes additional grants as a result of Your choice to
139
+     distribute the Covered Software under a subsequent version of this License
140
+     (see Section 10.2) or under the terms of a Secondary License (if permitted
141
+     under the terms of Section 3.3).
142
+
143
+2.5. Representation
144
+
145
+     Each Contributor represents that the Contributor believes its Contributions
146
+     are its original creation(s) or it has sufficient rights to grant the
147
+     rights to its Contributions conveyed by this License.
148
+
149
+2.6. Fair Use
150
+
151
+     This License is not intended to limit any rights You have under applicable
152
+     copyright doctrines of fair use, fair dealing, or other equivalents.
153
+
154
+2.7. Conditions
155
+
156
+     Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted in
157
+     Section 2.1.
158
+
159
+
160
+3. Responsibilities
161
+
162
+3.1. Distribution of Source Form
163
+
164
+     All distribution of Covered Software in Source Code Form, including any
165
+     Modifications that You create or to which You contribute, must be under the
166
+     terms of this License. You must inform recipients that the Source Code Form
167
+     of the Covered Software is governed by the terms of this License, and how
168
+     they can obtain a copy of this License. You may not attempt to alter or
169
+     restrict the recipients’ rights in the Source Code Form.
170
+
171
+3.2. Distribution of Executable Form
172
+
173
+     If You distribute Covered Software in Executable Form then:
174
+
175
+     a. such Covered Software must also be made available in Source Code Form,
176
+        as described in Section 3.1, and You must inform recipients of the
177
+        Executable Form how they can obtain a copy of such Source Code Form by
178
+        reasonable means in a timely manner, at a charge no more than the cost
179
+        of distribution to the recipient; and
180
+
181
+     b. You may distribute such Executable Form under the terms of this License,
182
+        or sublicense it under different terms, provided that the license for
183
+        the Executable Form does not attempt to limit or alter the recipients’
184
+        rights in the Source Code Form under this License.
185
+
186
+3.3. Distribution of a Larger Work
187
+
188
+     You may create and distribute a Larger Work under terms of Your choice,
189
+     provided that You also comply with the requirements of this License for the
190
+     Covered Software. If the Larger Work is a combination of Covered Software
191
+     with a work governed by one or more Secondary Licenses, and the Covered
192
+     Software is not Incompatible With Secondary Licenses, this License permits
193
+     You to additionally distribute such Covered Software under the terms of
194
+     such Secondary License(s), so that the recipient of the Larger Work may, at
195
+     their option, further distribute the Covered Software under the terms of
196
+     either this License or such Secondary License(s).
197
+
198
+3.4. Notices
199
+
200
+     You may not remove or alter the substance of any license notices (including
201
+     copyright notices, patent notices, disclaimers of warranty, or limitations
202
+     of liability) contained within the Source Code Form of the Covered
203
+     Software, except that You may alter any license notices to the extent
204
+     required to remedy known factual inaccuracies.
205
+
206
+3.5. Application of Additional Terms
207
+
208
+     You may choose to offer, and to charge a fee for, warranty, support,
209
+     indemnity or liability obligations to one or more recipients of Covered
210
+     Software. However, You may do so only on Your own behalf, and not on behalf
211
+     of any Contributor. You must make it absolutely clear that any such
212
+     warranty, support, indemnity, or liability obligation is offered by You
213
+     alone, and You hereby agree to indemnify every Contributor for any
214
+     liability incurred by such Contributor as a result of warranty, support,
215
+     indemnity or liability terms You offer. You may include additional
216
+     disclaimers of warranty and limitations of liability specific to any
217
+     jurisdiction.
218
+
219
+4. Inability to Comply Due to Statute or Regulation
220
+
221
+   If it is impossible for You to comply with any of the terms of this License
222
+   with respect to some or all of the Covered Software due to statute, judicial
223
+   order, or regulation then You must: (a) comply with the terms of this License
224
+   to the maximum extent possible; and (b) describe the limitations and the code
225
+   they affect. Such description must be placed in a text file included with all
226
+   distributions of the Covered Software under this License. Except to the
227
+   extent prohibited by statute or regulation, such description must be
228
+   sufficiently detailed for a recipient of ordinary skill to be able to
229
+   understand it.
230
+
231
+5. Termination
232
+
233
+5.1. The rights granted under this License will terminate automatically if You
234
+     fail to comply with any of its terms. However, if You become compliant,
235
+     then the rights granted under this License from a particular Contributor
236
+     are reinstated (a) provisionally, unless and until such Contributor
237
+     explicitly and finally terminates Your grants, and (b) on an ongoing basis,
238
+     if such Contributor fails to notify You of the non-compliance by some
239
+     reasonable means prior to 60 days after You have come back into compliance.
240
+     Moreover, Your grants from a particular Contributor are reinstated on an
241
+     ongoing basis if such Contributor notifies You of the non-compliance by
242
+     some reasonable means, this is the first time You have received notice of
243
+     non-compliance with this License from such Contributor, and You become
244
+     compliant prior to 30 days after Your receipt of the notice.
245
+
246
+5.2. If You initiate litigation against any entity by asserting a patent
247
+     infringement claim (excluding declaratory judgment actions, counter-claims,
248
+     and cross-claims) alleging that a Contributor Version directly or
249
+     indirectly infringes any patent, then the rights granted to You by any and
250
+     all Contributors for the Covered Software under Section 2.1 of this License
251
+     shall terminate.
252
+
253
+5.3. In the event of termination under Sections 5.1 or 5.2 above, all end user
254
+     license agreements (excluding distributors and resellers) which have been
255
+     validly granted by You or Your distributors under this License prior to
256
+     termination shall survive termination.
257
+
258
+6. Disclaimer of Warranty
259
+
260
+   Covered Software is provided under this License on an “as is” basis, without
261
+   warranty of any kind, either expressed, implied, or statutory, including,
262
+   without limitation, warranties that the Covered Software is free of defects,
263
+   merchantable, fit for a particular purpose or non-infringing. The entire
264
+   risk as to the quality and performance of the Covered Software is with You.
265
+   Should any Covered Software prove defective in any respect, You (not any
266
+   Contributor) assume the cost of any necessary servicing, repair, or
267
+   correction. This disclaimer of warranty constitutes an essential part of this
268
+   License. No use of  any Covered Software is authorized under this License
269
+   except under this disclaimer.
270
+
271
+7. Limitation of Liability
272
+
273
+   Under no circumstances and under no legal theory, whether tort (including
274
+   negligence), contract, or otherwise, shall any Contributor, or anyone who
275
+   distributes Covered Software as permitted above, be liable to You for any
276
+   direct, indirect, special, incidental, or consequential damages of any
277
+   character including, without limitation, damages for lost profits, loss of
278
+   goodwill, work stoppage, computer failure or malfunction, or any and all
279
+   other commercial damages or losses, even if such party shall have been
280
+   informed of the possibility of such damages. This limitation of liability
281
+   shall not apply to liability for death or personal injury resulting from such
282
+   party’s negligence to the extent applicable law prohibits such limitation.
283
+   Some jurisdictions do not allow the exclusion or limitation of incidental or
284
+   consequential damages, so this exclusion and limitation may not apply to You.
285
+
286
+8. Litigation
287
+
288
+   Any litigation relating to this License may be brought only in the courts of
289
+   a jurisdiction where the defendant maintains its principal place of business
290
+   and such litigation shall be governed by laws of that jurisdiction, without
291
+   reference to its conflict-of-law provisions. Nothing in this Section shall
292
+   prevent a party’s ability to bring cross-claims or counter-claims.
293
+
294
+9. Miscellaneous
295
+
296
+   This License represents the complete agreement concerning the subject matter
297
+   hereof. If any provision of this License is held to be unenforceable, such
298
+   provision shall be reformed only to the extent necessary to make it
299
+   enforceable. Any law or regulation which provides that the language of a
300
+   contract shall be construed against the drafter shall not be used to construe
301
+   this License against a Contributor.
302
+
303
+
304
+10. Versions of the License
305
+
306
+10.1. New Versions
307
+
308
+      Mozilla Foundation is the license steward. Except as provided in Section
309
+      10.3, no one other than the license steward has the right to modify or
310
+      publish new versions of this License. Each version will be given a
311
+      distinguishing version number.
312
+
313
+10.2. Effect of New Versions
314
+
315
+      You may distribute the Covered Software under the terms of the version of
316
+      the License under which You originally received the Covered Software, or
317
+      under the terms of any subsequent version published by the license
318
+      steward.
319
+
320
+10.3. Modified Versions
321
+
322
+      If you create software not governed by this License, and you want to
323
+      create a new license for such software, you may create and use a modified
324
+      version of this License if you rename the license and remove any
325
+      references to the name of the license steward (except to note that such
326
+      modified license differs from this License).
327
+
328
+10.4. Distributing Source Code Form that is Incompatible With Secondary Licenses
329
+      If You choose to distribute Source Code Form that is Incompatible With
330
+      Secondary Licenses under the terms of this version of the License, the
331
+      notice described in Exhibit B of this License must be attached.
332
+
333
+Exhibit A - Source Code Form License Notice
334
+
335
+      This Source Code Form is subject to the
336
+      terms of the Mozilla Public License, v.
337
+      2.0. If a copy of the MPL was not
338
+      distributed with this file, You can
339
+      obtain one at
340
+      http://mozilla.org/MPL/2.0/.
341
+
342
+If it is not possible or desirable to put the notice in a particular file, then
343
+You may include the notice in a location (such as a LICENSE file in a relevant
344
+directory) where a recipient would be likely to look for such a notice.
345
+
346
+You may add additional accurate notices of copyright ownership.
347
+
348
+Exhibit B - “Incompatible With Secondary Licenses” Notice
349
+
350
+      This Source Code Form is “Incompatible
351
+      With Secondary Licenses”, as defined by
352
+      the Mozilla Public License, v. 2.0.
0 353
new file mode 100644
... ...
@@ -0,0 +1,91 @@
0
+# go-multierror
1
+
2
+`go-multierror` is a package for Go that provides a mechanism for
3
+representing a list of `error` values as a single `error`.
4
+
5
+This allows a function in Go to return an `error` that might actually
6
+be a list of errors. If the caller knows this, they can unwrap the
7
+list and access the errors. If the caller doesn't know, the error
8
+formats to a nice human-readable format.
9
+
10
+`go-multierror` implements the
11
+[errwrap](https://github.com/hashicorp/errwrap) interface so that it can
12
+be used with that library, as well.
13
+
14
+## Installation and Docs
15
+
16
+Install using `go get github.com/hashicorp/go-multierror`.
17
+
18
+Full documentation is available at
19
+http://godoc.org/github.com/hashicorp/go-multierror
20
+
21
+## Usage
22
+
23
+go-multierror is easy to use and purposely built to be unobtrusive in
24
+existing Go applications/libraries that may not be aware of it.
25
+
26
+**Building a list of errors**
27
+
28
+The `Append` function is used to create a list of errors. This function
29
+behaves a lot like the Go built-in `append` function: it doesn't matter
30
+if the first argument is nil, a `multierror.Error`, or any other `error`,
31
+the function behaves as you would expect.
32
+
33
+```go
34
+var result error
35
+
36
+if err := step1(); err != nil {
37
+	result = multierror.Append(result, err)
38
+}
39
+if err := step2(); err != nil {
40
+	result = multierror.Append(result, err)
41
+}
42
+
43
+return result
44
+```
45
+
46
+**Customizing the formatting of the errors**
47
+
48
+By specifying a custom `ErrorFormat`, you can customize the format
49
+of the `Error() string` function:
50
+
51
+```go
52
+var result *multierror.Error
53
+
54
+// ... accumulate errors here, maybe using Append
55
+
56
+if result != nil {
57
+	result.ErrorFormat = func([]error) string {
58
+		return "errors!"
59
+	}
60
+}
61
+```
62
+
63
+**Accessing the list of errors**
64
+
65
+`multierror.Error` implements `error` so if the caller doesn't know about
66
+multierror, it will work just fine. But if you're aware a multierror might
67
+be returned, you can use type switches to access the list of errors:
68
+
69
+```go
70
+if err := something(); err != nil {
71
+	if merr, ok := err.(*multierror.Error); ok {
72
+		// Use merr.Errors
73
+	}
74
+}
75
+```
76
+
77
+**Returning a multierror only if there are errors**
78
+
79
+If you build a `multierror.Error`, you can use the `ErrorOrNil` function
80
+to return an `error` implementation only if there are errors to return:
81
+
82
+```go
83
+var result *multierror.Error
84
+
85
+// ... accumulate errors here
86
+
87
+// Return the `error` only if errors were added to the multierror, otherwise
88
+// return nil since there are no errors.
89
+return result.ErrorOrNil()
90
+```
0 91
new file mode 100644
... ...
@@ -0,0 +1,30 @@
0
+package multierror
1
+
2
+// Append is a helper function that will append more errors
3
+// onto an Error in order to create a larger multi-error.
4
+//
5
+// If err is not a multierror.Error, then it will be turned into
6
+// one. If any of the errs are multierr.Error, they will be flattened
7
+// one level into err.
8
+func Append(err error, errs ...error) *Error {
9
+	switch err := err.(type) {
10
+	case *Error:
11
+		// Typed nils can reach here, so initialize if we are nil
12
+		if err == nil {
13
+			err = new(Error)
14
+		}
15
+
16
+		err.Errors = append(err.Errors, errs...)
17
+		return err
18
+	default:
19
+		newErrs := make([]error, 0, len(errs)+1)
20
+		if err != nil {
21
+			newErrs = append(newErrs, err)
22
+		}
23
+		newErrs = append(newErrs, errs...)
24
+
25
+		return &Error{
26
+			Errors: newErrs,
27
+		}
28
+	}
29
+}
0 30
new file mode 100644
... ...
@@ -0,0 +1,23 @@
0
+package multierror
1
+
2
+import (
3
+	"fmt"
4
+	"strings"
5
+)
6
+
7
+// ErrorFormatFunc is a function callback that is called by Error to
8
+// turn the list of errors into a string.
9
+type ErrorFormatFunc func([]error) string
10
+
11
+// ListFormatFunc is a basic formatter that outputs the number of errors
12
+// that occurred along with a bullet point list of the errors.
13
+func ListFormatFunc(es []error) string {
14
+	points := make([]string, len(es))
15
+	for i, err := range es {
16
+		points[i] = fmt.Sprintf("* %s", err)
17
+	}
18
+
19
+	return fmt.Sprintf(
20
+		"%d error(s) occurred:\n\n%s",
21
+		len(es), strings.Join(points, "\n"))
22
+}
0 23
new file mode 100644
... ...
@@ -0,0 +1,51 @@
0
+package multierror
1
+
2
+import (
3
+	"fmt"
4
+)
5
+
6
+// Error is an error type to track multiple errors. This is used to
7
+// accumulate errors in cases and return them as a single "error".
8
+type Error struct {
9
+	Errors      []error
10
+	ErrorFormat ErrorFormatFunc
11
+}
12
+
13
+func (e *Error) Error() string {
14
+	fn := e.ErrorFormat
15
+	if fn == nil {
16
+		fn = ListFormatFunc
17
+	}
18
+
19
+	return fn(e.Errors)
20
+}
21
+
22
+// ErrorOrNil returns an error interface if this Error represents
23
+// a list of errors, or returns nil if the list of errors is empty. This
24
+// function is useful at the end of accumulation to make sure that the value
25
+// returned represents the existence of errors.
26
+func (e *Error) ErrorOrNil() error {
27
+	if e == nil {
28
+		return nil
29
+	}
30
+	if len(e.Errors) == 0 {
31
+		return nil
32
+	}
33
+
34
+	return e
35
+}
36
+
37
+func (e *Error) GoString() string {
38
+	return fmt.Sprintf("*%#v", *e)
39
+}
40
+
41
+// WrappedErrors returns the list of errors that this Error is wrapping.
42
+// It is an implementatin of the errwrap.Wrapper interface so that
43
+// multierror.Error can be used with that library.
44
+//
45
+// This method is not safe to be called concurrently and is no different
46
+// than accessing the Errors field directly. It is implementd only to
47
+// satisfy the errwrap.Wrapper interface.
48
+func (e *Error) WrappedErrors() []error {
49
+	return e.Errors
50
+}
... ...
@@ -1,4 +1,4 @@
1
-# memberlist
1
+# memberlist [![GoDoc](https://godoc.org/github.com/hashicorp/memberlist?status.png)](https://godoc.org/github.com/hashicorp/memberlist)
2 2
 
3 3
 memberlist is a [Go](http://www.golang.org) library that manages cluster
4 4
 membership and member failure detection using a gossip based protocol.
... ...
@@ -64,7 +64,7 @@ For complete documentation, see the associated [Godoc](http://godoc.org/github.c
64 64
 ## Protocol
65 65
 
66 66
 memberlist is based on ["SWIM: Scalable Weakly-consistent Infection-style Process Group Membership Protocol"](http://www.cs.cornell.edu/~asdas/research/dsn02-swim.pdf),
67
-with a few minor adaptations, mostly to increase propogation speed and
67
+with a few minor adaptations, mostly to increase propagation speed and
68 68
 convergence rate.
69 69
 
70 70
 A high level overview of the memberlist protocol (based on SWIM) is
... ...
@@ -93,15 +93,22 @@ be disabled entirely.
93 93
 
94 94
 Failure detection is done by periodic random probing using a configurable interval.
95 95
 If the node fails to ack within a reasonable time (typically some multiple
96
-of RTT), then an indirect probe is attempted. An indirect probe asks a
97
-configurable number of random nodes to probe the same node, in case there
98
-are network issues causing our own node to fail the probe. If both our
99
-probe and the indirect probes fail within a reasonable time, then the
100
-node is marked "suspicious" and this knowledge is gossiped to the cluster.
101
-A suspicious node is still considered a member of cluster. If the suspect member
102
-of the cluster does not disputes the suspicion within a configurable period of
103
-time, the node is finally considered dead, and this state is then gossiped
104
-to the cluster.
96
+of RTT), then an indirect probe as well as a direct TCP probe are attempted. An
97
+indirect probe asks a configurable number of random nodes to probe the same node,
98
+in case there are network issues causing our own node to fail the probe. The direct
99
+TCP probe is used to help identify the common situation where networking is
100
+misconfigured to allow TCP but not UDP. Without the TCP probe, a UDP-isolated node
101
+would think all other nodes were suspect and could cause churn in the cluster when
102
+it attempts a TCP-based state exchange with another node. It is not desirable to
103
+operate with only TCP connectivity because convergence will be much slower, but it
104
+is enabled so that memberlist can detect this situation and alert operators.
105
+
106
+If both our probe, the indirect probes, and the direct TCP probe fail within a
107
+configurable time, then the node is marked "suspicious" and this knowledge is
108
+gossiped to the cluster. A suspicious node is still considered a member of
109
+cluster. If the suspect member of the cluster does not dispute the suspicion
110
+within a configurable period of time, the node is finally considered dead,
111
+and this state is then gossiped to the cluster.
105 112
 
106 113
 This is a brief and incomplete description of the protocol. For a better idea,
107 114
 please read the
... ...
@@ -111,7 +118,7 @@ in its entirety, along with the memberlist source code.
111 111
 ### Changes from SWIM
112 112
 
113 113
 As mentioned earlier, the memberlist protocol is based on SWIM but includes
114
-minor changes, mostly to increase propogation speed and convergence rates.
114
+minor changes, mostly to increase propagation speed and convergence rates.
115 115
 
116 116
 The changes from SWIM are noted here:
117 117
 
... ...
@@ -127,7 +134,7 @@ The changes from SWIM are noted here:
127 127
   also will periodically send out dedicated gossip messages on their own. This
128 128
   feature lets you have a higher gossip rate (for example once per 200ms)
129 129
   and a slower failure detection rate (such as once per second), resulting
130
-  in overall faster convergence rates and data propogation speeds. This feature
130
+  in overall faster convergence rates and data propagation speeds. This feature
131 131
   can be totally disabed as well, if you wish.
132 132
 
133 133
 * memberlist stores around the state of dead nodes for a set amount of time,
134 134
new file mode 100644
... ...
@@ -0,0 +1,14 @@
0
+package memberlist
1
+
2
+// AliveDelegate is used to involve a client in processing
3
+// a node "alive" message. When a node joins, either through
4
+// a UDP gossip or TCP push/pull, we update the state of
5
+// that node via an alive message. This can be used to filter
6
+// a node out and prevent it from being considered a peer
7
+// using application specific logic.
8
+type AliveDelegate interface {
9
+	// NotifyMerge is invoked when a merge could take place.
10
+	// Provides a list of the nodes known by the peer. If
11
+	// the return value is non-nil, the merge is canceled.
12
+	NotifyAlive(peer *Node) error
13
+}
... ...
@@ -2,6 +2,7 @@ package memberlist
2 2
 
3 3
 import (
4 4
 	"io"
5
+	"log"
5 6
 	"os"
6 7
 	"time"
7 8
 )
... ...
@@ -85,6 +86,11 @@ type Config struct {
85 85
 	ProbeInterval time.Duration
86 86
 	ProbeTimeout  time.Duration
87 87
 
88
+	// DisableTcpPings will turn off the fallback TCP pings that are attempted
89
+	// if the direct UDP ping fails. These get pipelined along with the
90
+	// indirect UDP pings.
91
+	DisableTcpPings bool
92
+
88 93
 	// GossipInterval and GossipNodes are used to configure the gossip
89 94
 	// behavior of memberlist.
90 95
 	//
... ...
@@ -111,6 +117,8 @@ type Config struct {
111 111
 	// the first key used while attempting to decrypt messages. Providing a
112 112
 	// value for this primary key will enable message-level encryption and
113 113
 	// verification, and automatically install the key onto the keyring.
114
+	// The value should be either 16, 24, or 32 bytes to select AES-128,
115
+	// AES-192, or AES-256.
114 116
 	SecretKey []byte
115 117
 
116 118
 	// The keyring holds all of the encryption keys used internally. It is
... ...
@@ -132,16 +140,29 @@ type Config struct {
132 132
 	Events                  EventDelegate
133 133
 	Conflict                ConflictDelegate
134 134
 	Merge                   MergeDelegate
135
+	Ping                    PingDelegate
136
+	Alive                   AliveDelegate
137
+
138
+	// DNSConfigPath points to the system's DNS config file, usually located
139
+	// at /etc/resolv.conf. It can be overridden via config for easier testing.
140
+	DNSConfigPath string
135 141
 
136 142
 	// LogOutput is the writer where logs should be sent. If this is not
137
-	// set, logging will go to stderr by default.
143
+	// set, logging will go to stderr by default. You cannot specify both LogOutput
144
+	// and Logger at the same time.
138 145
 	LogOutput io.Writer
146
+
147
+	// Logger is a custom logger which you provide. If Logger is set, it will use
148
+	// this for the internal logger. If Logger is not set, it will fall back to the
149
+	// behavior for using LogOutput. You cannot specify both LogOutput and Logger
150
+	// at the same time.
151
+	Logger *log.Logger
139 152
 }
140 153
 
141 154
 // DefaultLANConfig returns a sane set of configurations for Memberlist.
142 155
 // It uses the hostname as the node name, and otherwise sets very conservative
143 156
 // values that are sane for most LAN environments. The default configuration
144
-// errs on the side on the side of caution, choosing values that are optimized
157
+// errs on the side of caution, choosing values that are optimized
145 158
 // for higher convergence at the cost of higher bandwidth usage. Regardless,
146 159
 // these values are a good starting point when getting started with memberlist.
147 160
 func DefaultLANConfig() *Config {
... ...
@@ -152,7 +173,7 @@ func DefaultLANConfig() *Config {
152 152
 		BindPort:         7946,
153 153
 		AdvertiseAddr:    "",
154 154
 		AdvertisePort:    7946,
155
-		ProtocolVersion:  ProtocolVersionMax,
155
+		ProtocolVersion:  ProtocolVersion2Compatible,
156 156
 		TCPTimeout:       10 * time.Second,       // Timeout after 10 seconds
157 157
 		IndirectChecks:   3,                      // Use 3 nodes for the indirect ping
158 158
 		RetransmitMult:   4,                      // Retransmit a message 4 * log(N+1) nodes
... ...
@@ -160,6 +181,7 @@ func DefaultLANConfig() *Config {
160 160
 		PushPullInterval: 30 * time.Second,       // Low frequency
161 161
 		ProbeTimeout:     500 * time.Millisecond, // Reasonable RTT time for LAN
162 162
 		ProbeInterval:    1 * time.Second,        // Failure check every second
163
+		DisableTcpPings:  false,                  // TCP pings are safe, even with mixed versions
163 164
 
164 165
 		GossipNodes:    3,                      // Gossip to 3 nodes
165 166
 		GossipInterval: 200 * time.Millisecond, // Gossip more rapidly
... ...
@@ -167,8 +189,9 @@ func DefaultLANConfig() *Config {
167 167
 		EnableCompression: true, // Enable compression by default
168 168
 
169 169
 		SecretKey: nil,
170
+		Keyring:   nil,
170 171
 
171
-		Keyring: nil,
172
+		DNSConfigPath: "/etc/resolv.conf",
172 173
 	}
173 174
 }
174 175
 
... ...
@@ -19,7 +19,8 @@ type Delegate interface {
19 19
 	// It can return a list of buffers to send. Each buffer should assume an
20 20
 	// overhead as provided with a limit on the total byte size allowed.
21 21
 	// The total byte size of the resulting data to send must not exceed
22
-	// the limit.
22
+	// the limit. Care should be taken that this method does not block,
23
+	// since doing so would block the entire UDP packet receive loop.
23 24
 	GetBroadcasts(overhead, limit int) [][]byte
24 25
 
25 26
 	// LocalState is used for a TCP Push/Pull. This is sent to
... ...
@@ -34,6 +34,9 @@ func (k *Keyring) init() {
34 34
 // keyring. If creating a keyring with multiple keys, one key must be designated
35 35
 // primary by passing it as the primaryKey. If the primaryKey does not exist in
36 36
 // the list of secondary keys, it will be automatically added at position 0.
37
+//
38
+// A key should be either 16, 24, or 32 bytes to select AES-128,
39
+// AES-192, or AES-256.
37 40
 func NewKeyring(keys [][]byte, primaryKey []byte) (*Keyring, error) {
38 41
 	keyring := &Keyring{}
39 42
 	keyring.init()
... ...
@@ -58,10 +61,12 @@ func NewKeyring(keys [][]byte, primaryKey []byte) (*Keyring, error) {
58 58
 // AddKey will install a new key on the ring. Adding a key to the ring will make
59 59
 // it available for use in decryption. If the key already exists on the ring,
60 60
 // this function will just return noop.
61
+//
62
+// key should be either 16, 24, or 32 bytes to select AES-128,
63
+// AES-192, or AES-256.
61 64
 func (k *Keyring) AddKey(key []byte) error {
62
-	// Encorce 16-byte key size
63
-	if len(key) != 16 {
64
-		return fmt.Errorf("key size must be 16 bytes")
65
+	if l := len(key); l != 16 && l != 24 && l != 32 {
66
+		return fmt.Errorf("key size must be 16, 24 or 32 bytes")
65 67
 	}
66 68
 
67 69
 	// No-op if key is already installed
68 70
new file mode 100644
... ...
@@ -0,0 +1,22 @@
0
+package memberlist
1
+
2
+import (
3
+	"fmt"
4
+	"net"
5
+)
6
+
7
+func LogAddress(addr net.Addr) string {
8
+	if addr == nil {
9
+		return "from=<unknown address>"
10
+	}
11
+
12
+	return fmt.Sprintf("from=%s", addr.String())
13
+}
14
+
15
+func LogConn(conn net.Conn) string {
16
+	if conn == nil {
17
+		return LogAddress(nil)
18
+	}
19
+
20
+	return LogAddress(conn.RemoteAddr())
21
+}
... ...
@@ -20,11 +20,19 @@ import (
20 20
 	"net"
21 21
 	"os"
22 22
 	"strconv"
23
+	"strings"
23 24
 	"sync"
24 25
 	"time"
26
+
27
+	"github.com/hashicorp/go-multierror"
28
+	"github.com/miekg/dns"
25 29
 )
26 30
 
27 31
 type Memberlist struct {
32
+	sequenceNum uint32 // Local sequence number
33
+	incarnation uint32 // Local incarnation number
34
+	numNodes    uint32 // Number of known nodes (estimate)
35
+
28 36
 	config         *Config
29 37
 	shutdown       bool
30 38
 	shutdownCh     chan struct{}
... ...
@@ -35,9 +43,6 @@ type Memberlist struct {
35 35
 	tcpListener *net.TCPListener
36 36
 	handoff     chan msgHandoff
37 37
 
38
-	sequenceNum uint32 // Local sequence number
39
-	incarnation uint32 // Local incarnation number
40
-
41 38
 	nodeLock sync.RWMutex
42 39
 	nodes    []*nodeState          // Known nodes
43 40
 	nodeMap  map[string]*nodeState // Maps Addr.String() -> NodeState
... ...
@@ -52,8 +57,6 @@ type Memberlist struct {
52 52
 
53 53
 	broadcasts *TransmitLimitedQueue
54 54
 
55
-	startStopLock sync.Mutex
56
-
57 55
 	logger *log.Logger
58 56
 }
59 57
 
... ...
@@ -90,6 +93,9 @@ func newMemberlist(conf *Config) (*Memberlist, error) {
90 90
 	if err != nil {
91 91
 		return nil, fmt.Errorf("Failed to start TCP listener. Err: %s", err)
92 92
 	}
93
+	if conf.BindPort == 0 {
94
+		conf.BindPort = tcpLn.Addr().(*net.TCPAddr).Port
95
+	}
93 96
 
94 97
 	udpAddr := &net.UDPAddr{IP: net.ParseIP(conf.BindAddr), Port: conf.BindPort}
95 98
 	udpLn, err := net.ListenUDP("udp", udpAddr)
... ...
@@ -101,10 +107,19 @@ func newMemberlist(conf *Config) (*Memberlist, error) {
101 101
 	// Set the UDP receive window size
102 102
 	setUDPRecvBuf(udpLn)
103 103
 
104
-	if conf.LogOutput == nil {
105
-		conf.LogOutput = os.Stderr
104
+	if conf.LogOutput != nil && conf.Logger != nil {
105
+		return nil, fmt.Errorf("Cannot specify both LogOutput and Logger. Please choose a single log configuration setting.")
106
+	}
107
+
108
+	logDest := conf.LogOutput
109
+	if logDest == nil {
110
+		logDest = os.Stderr
111
+	}
112
+
113
+	logger := conf.Logger
114
+	if logger == nil {
115
+		logger = log.New(logDest, "", log.LstdFlags)
106 116
 	}
107
-	logger := log.New(conf.LogOutput, "", log.LstdFlags)
108 117
 
109 118
 	m := &Memberlist{
110 119
 		config:         conf,
... ...
@@ -118,7 +133,9 @@ func newMemberlist(conf *Config) (*Memberlist, error) {
118 118
 		broadcasts:     &TransmitLimitedQueue{RetransmitMult: conf.RetransmitMult},
119 119
 		logger:         logger,
120 120
 	}
121
-	m.broadcasts.NumNodes = func() int { return len(m.nodes) }
121
+	m.broadcasts.NumNodes = func() int {
122
+		return m.estNumNodes()
123
+	}
122 124
 	go m.tcpListen()
123 125
 	go m.udpListen()
124 126
 	go m.udpHandler()
... ...
@@ -153,79 +170,158 @@ func Create(conf *Config) (*Memberlist, error) {
153 153
 // none could be reached. If an error is returned, the node did not successfully
154 154
 // join the cluster.
155 155
 func (m *Memberlist) Join(existing []string) (int, error) {
156
-	// Attempt to join any of them
157 156
 	numSuccess := 0
158
-	var retErr error
157
+	var errs error
159 158
 	for _, exist := range existing {
160
-		addrs, port, err := m.resolveAddr(exist)
159
+		addrs, err := m.resolveAddr(exist)
161 160
 		if err != nil {
162
-			m.logger.Printf("[WARN] memberlist: Failed to resolve %s: %v", exist, err)
163
-			retErr = err
161
+			err = fmt.Errorf("Failed to resolve %s: %v", exist, err)
162
+			errs = multierror.Append(errs, err)
163
+			m.logger.Printf("[WARN] memberlist: %v", err)
164 164
 			continue
165 165
 		}
166 166
 
167 167
 		for _, addr := range addrs {
168
-			if err := m.pushPullNode(addr, port, true); err != nil {
169
-				retErr = err
168
+			if err := m.pushPullNode(addr.ip, addr.port, true); err != nil {
169
+				err = fmt.Errorf("Failed to join %s: %v", addr.ip, err)
170
+				errs = multierror.Append(errs, err)
171
+				m.logger.Printf("[DEBUG] memberlist: %v", err)
170 172
 				continue
171 173
 			}
172 174
 			numSuccess++
173 175
 		}
174 176
 
175 177
 	}
176
-
177 178
 	if numSuccess > 0 {
178
-		retErr = nil
179
+		errs = nil
179 180
 	}
181
+	return numSuccess, errs
182
+}
180 183
 
181
-	return numSuccess, retErr
184
+// ipPort holds information about a node we want to try to join.
185
+type ipPort struct {
186
+	ip   net.IP
187
+	port uint16
188
+}
189
+
190
+// tcpLookupIP is a helper to initiate a TCP-based DNS lookup for the given host.
191
+// The built-in Go resolver will do a UDP lookup first, and will only use TCP if
192
+// the response has the truncate bit set, which isn't common on DNS servers like
193
+// Consul's. By doing the TCP lookup directly, we get the best chance for the
194
+// largest list of hosts to join. Since joins are relatively rare events, it's ok
195
+// to do this rather expensive operation.
196
+func (m *Memberlist) tcpLookupIP(host string, defaultPort uint16) ([]ipPort, error) {
197
+	// Don't attempt any TCP lookups against non-fully qualified domain
198
+	// names, since those will likely come from the resolv.conf file.
199
+	if !strings.Contains(host, ".") {
200
+		return nil, nil
201
+	}
202
+
203
+	// Make sure the domain name is terminated with a dot (we know there's
204
+	// at least one character at this point).
205
+	dn := host
206
+	if dn[len(dn)-1] != '.' {
207
+		dn = dn + "."
208
+	}
209
+
210
+	// See if we can find a server to try.
211
+	cc, err := dns.ClientConfigFromFile(m.config.DNSConfigPath)
212
+	if err != nil {
213
+		return nil, err
214
+	}
215
+	if len(cc.Servers) > 0 {
216
+		// We support host:port in the DNS config, but need to add the
217
+		// default port if one is not supplied.
218
+		server := cc.Servers[0]
219
+		if !hasPort(server) {
220
+			server = net.JoinHostPort(server, cc.Port)
221
+		}
222
+
223
+		// Do the lookup.
224
+		c := new(dns.Client)
225
+		c.Net = "tcp"
226
+		msg := new(dns.Msg)
227
+		msg.SetQuestion(dn, dns.TypeANY)
228
+		in, _, err := c.Exchange(msg, server)
229
+		if err != nil {
230
+			return nil, err
231
+		}
232
+
233
+		// Handle any IPs we get back that we can attempt to join.
234
+		var ips []ipPort
235
+		for _, r := range in.Answer {
236
+			switch rr := r.(type) {
237
+			case (*dns.A):
238
+				ips = append(ips, ipPort{rr.A, defaultPort})
239
+			case (*dns.AAAA):
240
+				ips = append(ips, ipPort{rr.AAAA, defaultPort})
241
+			case (*dns.CNAME):
242
+				m.logger.Printf("[DEBUG] memberlist: Ignoring CNAME RR in TCP-first answer for '%s'", host)
243
+			}
244
+		}
245
+		return ips, nil
246
+	}
247
+
248
+	return nil, nil
182 249
 }
183 250
 
184 251
 // resolveAddr is used to resolve the address into an address,
185 252
 // port, and error. If no port is given, use the default
186
-func (m *Memberlist) resolveAddr(hostStr string) ([][]byte, uint16, error) {
187
-	ips := make([][]byte, 0)
253
+func (m *Memberlist) resolveAddr(hostStr string) ([]ipPort, error) {
254
+	// Normalize the incoming string to host:port so we can apply Go's
255
+	// parser to it.
188 256
 	port := uint16(0)
257
+	if !hasPort(hostStr) {
258
+		hostStr += ":" + strconv.Itoa(m.config.BindPort)
259
+	}
189 260
 	host, sport, err := net.SplitHostPort(hostStr)
190
-	if ae, ok := err.(*net.AddrError); ok && ae.Err == "missing port in address" {
191
-		// error, port missing - we can solve this
192
-		port = uint16(m.config.BindPort)
193
-		host = hostStr
194
-	} else if err != nil {
195
-		// error, but not missing port
196
-		return ips, port, err
197
-	} else if lport, err := strconv.ParseUint(sport, 10, 16); err != nil {
198
-		// error, when parsing port
199
-		return ips, port, err
200
-	} else {
201
-		// no error
202
-		port = uint16(lport)
203
-	}
204
-
205
-	// Get the addresses that hostPort might resolve to
206
-	// ResolveTcpAddr requres ipv6 brackets to separate
207
-	// port numbers whereas ParseIP doesn't, but luckily
208
-	// SplitHostPort takes care of the brackets
209
-	if ip := net.ParseIP(host); ip == nil {
210
-		if pre, err := net.LookupIP(host); err == nil {
211
-			for _, ip := range pre {
212
-				ips = append(ips, ip)
213
-			}
214
-		} else {
215
-			return ips, port, err
216
-		}
217
-	} else {
218
-		ips = append(ips, ip)
261
+	if err != nil {
262
+		return nil, err
263
+	}
264
+
265
+	// This will capture the supplied port, or the default one added above.
266
+	lport, err := strconv.ParseUint(sport, 10, 16)
267
+	if err != nil {
268
+		return nil, err
269
+	}
270
+	port = uint16(lport)
271
+
272
+	// If it looks like an IP address we are done. The SplitHostPort() above
273
+	// will make sure the host part is in good shape for parsing, even for
274
+	// IPv6 addresses.
275
+	if ip := net.ParseIP(host); ip != nil {
276
+		return []ipPort{ipPort{ip, port}}, nil
277
+	}
278
+
279
+	// First try TCP so we have the best chance for the largest list of
280
+	// hosts to join. If this fails it's not fatal since this isn't a standard
281
+	// way to query DNS, and we have a fallback below.
282
+	ips, err := m.tcpLookupIP(host, port)
283
+	if err != nil {
284
+		m.logger.Printf("[DEBUG] memberlist: TCP-first lookup failed for '%s', falling back to UDP: %s", hostStr, err)
285
+	}
286
+	if len(ips) > 0 {
287
+		return ips, nil
219 288
 	}
220 289
 
221
-	return ips, port, nil
290
+	// If TCP didn't yield anything then use the normal Go resolver which
291
+	// will try UDP, then might possibly try TCP again if the UDP response
292
+	// indicates it was truncated.
293
+	ans, err := net.LookupIP(host)
294
+	if err != nil {
295
+		return nil, err
296
+	}
297
+	ips = make([]ipPort, 0, len(ans))
298
+	for _, ip := range ans {
299
+		ips = append(ips, ipPort{ip, port})
300
+	}
301
+	return ips, nil
222 302
 }
223 303
 
224 304
 // setAlive is used to mark this node as being alive. This is the same
225 305
 // as if we received an alive notification our own network channel for
226 306
 // ourself.
227 307
 func (m *Memberlist) setAlive() error {
228
-
229 308
 	var advertiseAddr []byte
230 309
 	var advertisePort int
231 310
 	if m.config.AdvertiseAddr != "" {
... ...
@@ -268,7 +364,7 @@ func (m *Memberlist) setAlive() error {
268 268
 				if ip.To4() == nil {
269 269
 					continue
270 270
 				}
271
-				if !isPrivateIP(ip.String()) {
271
+				if !IsPrivateIP(ip.String()) {
272 272
 					continue
273 273
 				}
274 274
 
... ...
@@ -286,12 +382,14 @@ func (m *Memberlist) setAlive() error {
286 286
 			addr := m.tcpListener.Addr().(*net.TCPAddr)
287 287
 			advertiseAddr = addr.IP
288 288
 		}
289
-		advertisePort = m.config.BindPort
289
+
290
+		// Use the port we are bound to.
291
+		advertisePort = m.tcpListener.Addr().(*net.TCPAddr).Port
290 292
 	}
291 293
 
292 294
 	// Check if this is a public address without encryption
293 295
 	addrStr := net.IP(advertiseAddr).String()
294
-	if !isPrivateIP(addrStr) && !isLoopbackIP(addrStr) && !m.config.EncryptionEnabled() {
296
+	if !IsPrivateIP(addrStr) && !isLoopbackIP(addrStr) && !m.config.EncryptionEnabled() {
295 297
 		m.logger.Printf("[WARN] memberlist: Binding to public address without encryption!")
296 298
 	}
297 299
 
... ...
@@ -385,7 +483,8 @@ func (m *Memberlist) UpdateNode(timeout time.Duration) error {
385 385
 // user-data message, which a delegate will receive through NotifyMsg
386 386
 // The actual data is transmitted over UDP, which means this is a
387 387
 // best-effort transmission mechanism, and the maximum size of the
388
-// message is the size of a single UDP datagram, after compression
388
+// message is the size of a single UDP datagram, after compression.
389
+// This method is DEPRECATED in favor or SendToUDP
389 390
 func (m *Memberlist) SendTo(to net.Addr, msg []byte) error {
390 391
 	// Encode as a user message
391 392
 	buf := make([]byte, 1, len(msg)+1)
... ...
@@ -393,7 +492,36 @@ func (m *Memberlist) SendTo(to net.Addr, msg []byte) error {
393 393
 	buf = append(buf, msg...)
394 394
 
395 395
 	// Send the message
396
-	return m.rawSendMsg(to, buf)
396
+	return m.rawSendMsgUDP(to, buf)
397
+}
398
+
399
+// SendToUDP is used to directly send a message to another node, without
400
+// the use of the gossip mechanism. This will encode the message as a
401
+// user-data message, which a delegate will receive through NotifyMsg
402
+// The actual data is transmitted over UDP, which means this is a
403
+// best-effort transmission mechanism, and the maximum size of the
404
+// message is the size of a single UDP datagram, after compression
405
+func (m *Memberlist) SendToUDP(to *Node, msg []byte) error {
406
+	// Encode as a user message
407
+	buf := make([]byte, 1, len(msg)+1)
408
+	buf[0] = byte(userMsg)
409
+	buf = append(buf, msg...)
410
+
411
+	// Send the message
412
+	destAddr := &net.UDPAddr{IP: to.Addr, Port: int(to.Port)}
413
+	return m.rawSendMsgUDP(destAddr, buf)
414
+}
415
+
416
+// SendToTCP is used to directly send a message to another node, without
417
+// the use of the gossip mechanism. This will encode the message as a
418
+// user-data message, which a delegate will receive through NotifyMsg
419
+// The actual data is transmitted over TCP, which means delivery
420
+// is guaranteed if no error is returned. There is no limit
421
+// to the size of the message
422
+func (m *Memberlist) SendToTCP(to *Node, msg []byte) error {
423
+	// Send the message
424
+	destAddr := &net.TCPAddr{IP: to.Addr, Port: int(to.Port)}
425
+	return m.sendTCPUserMsg(destAddr, msg)
397 426
 }
398 427
 
399 428
 // Members returns a list of all known live nodes. The node structures
... ...
@@ -441,10 +569,12 @@ func (m *Memberlist) NumMembers() (alive int) {
441 441
 // This method is safe to call multiple times, but must not be called
442 442
 // after the cluster is already shut down.
443 443
 func (m *Memberlist) Leave(timeout time.Duration) error {
444
-	m.startStopLock.Lock()
445
-	defer m.startStopLock.Unlock()
444
+	m.nodeLock.Lock()
445
+	// We can't defer m.nodeLock.Unlock() because m.deadNode will also try to
446
+	// acquire a lock so we need to Unlock before that.
446 447
 
447 448
 	if m.shutdown {
449
+		m.nodeLock.Unlock()
448 450
 		panic("leave after shutdown")
449 451
 	}
450 452
 
... ...
@@ -452,6 +582,7 @@ func (m *Memberlist) Leave(timeout time.Duration) error {
452 452
 		m.leave = true
453 453
 
454 454
 		state, ok := m.nodeMap[m.config.Name]
455
+		m.nodeLock.Unlock()
455 456
 		if !ok {
456 457
 			m.logger.Printf("[WARN] memberlist: Leave but we're not in the node map.")
457 458
 			return nil
... ...
@@ -475,6 +606,8 @@ func (m *Memberlist) Leave(timeout time.Duration) error {
475 475
 				return fmt.Errorf("timeout waiting for leave broadcast")
476 476
 			}
477 477
 		}
478
+	} else {
479
+		m.nodeLock.Unlock()
478 480
 	}
479 481
 
480 482
 	return nil
... ...
@@ -509,8 +642,8 @@ func (m *Memberlist) ProtocolVersion() uint8 {
509 509
 //
510 510
 // This method is safe to call multiple times.
511 511
 func (m *Memberlist) Shutdown() error {
512
-	m.startStopLock.Lock()
513
-	defer m.startStopLock.Unlock()
512
+	m.nodeLock.Lock()
513
+	defer m.nodeLock.Unlock()
514 514
 
515 515
 	if m.shutdown {
516 516
 		return nil
... ...
@@ -8,6 +8,7 @@ package memberlist
8 8
 // as part of the push-pull anti-entropy.
9 9
 type MergeDelegate interface {
10 10
 	// NotifyMerge is invoked when a merge could take place.
11
-	// Provides a list of the nodes known by the peer.
12
-	NotifyMerge(peers []*Node) (cancel bool)
11
+	// Provides a list of the nodes known by the peer. If
12
+	// the return value is non-nil, the merge is canceled.
13
+	NotifyMerge(peers []*Node) error
13 14
 }
... ...
@@ -18,7 +18,15 @@ import (
18 18
 // range. This range is inclusive.
19 19
 const (
20 20
 	ProtocolVersionMin uint8 = 1
21
-	ProtocolVersionMax       = 2
21
+
22
+	// Version 3 added support for TCP pings but we kept the default
23
+	// protocol version at 2 to ease transition to this new feature.
24
+	// A memberlist speaking version 2 of the protocol will attempt
25
+	// to TCP ping another memberlist who understands version 3 or
26
+	// greater.
27
+	ProtocolVersion2Compatible = 2
28
+
29
+	ProtocolVersionMax = 3
22 30
 )
23 31
 
24 32
 // messageType is an integer ID of a type of message that can be received
... ...
@@ -79,7 +87,8 @@ type indirectPingReq struct {
79 79
 
80 80
 // ack response is sent for a ping
81 81
 type ackResp struct {
82
-	SeqNo uint32
82
+	SeqNo   uint32
83
+	Payload []byte
83 84
 }
84 85
 
85 86
 // suspect is broadcast when we suspect a node is dead
... ...
@@ -119,6 +128,11 @@ type pushPullHeader struct {
119 119
 	Join         bool // Is this a join request or a anti-entropy run
120 120
 }
121 121
 
122
+// userMsgHeader is used to encapsulate a userMsg
123
+type userMsgHeader struct {
124
+	UserMsgLen int // Encodes the byte lengh of user state
125
+}
126
+
122 127
 // pushNodeState is used for pushPullReq when we are
123 128
 // transfering out node states
124 129
 type pushNodeState struct {
... ...
@@ -185,54 +199,65 @@ func (m *Memberlist) tcpListen() {
185 185
 
186 186
 // handleConn handles a single incoming TCP connection
187 187
 func (m *Memberlist) handleConn(conn *net.TCPConn) {
188
-	m.logger.Printf("[DEBUG] memberlist: Responding to push/pull sync with: %s", conn.RemoteAddr())
188
+	m.logger.Printf("[DEBUG] memberlist: TCP connection %s", LogConn(conn))
189
+
189 190
 	defer conn.Close()
190 191
 	metrics.IncrCounter([]string{"memberlist", "tcp", "accept"}, 1)
191 192
 
192
-	join, remoteNodes, userState, err := m.readRemoteState(conn)
193
+	conn.SetDeadline(time.Now().Add(m.config.TCPTimeout))
194
+	msgType, bufConn, dec, err := m.readTCP(conn)
193 195
 	if err != nil {
194
-		m.logger.Printf("[ERR] memberlist: Failed to receive remote state: %s", err)
196
+		m.logger.Printf("[ERR] memberlist: failed to receive: %s %s", err, LogConn(conn))
195 197
 		return
196 198
 	}
197 199
 
198
-	if err := m.sendLocalState(conn, join); err != nil {
199
-		m.logger.Printf("[ERR] memberlist: Failed to push local state: %s", err)
200
-	}
200
+	switch msgType {
201
+	case userMsg:
202
+		if err := m.readUserMsg(bufConn, dec); err != nil {
203
+			m.logger.Printf("[ERR] memberlist: Failed to receive user message: %s %s", err, LogConn(conn))
204
+		}
205
+	case pushPullMsg:
206
+		join, remoteNodes, userState, err := m.readRemoteState(bufConn, dec)
207
+		if err != nil {
208
+			m.logger.Printf("[ERR] memberlist: Failed to read remote state: %s %s", err, LogConn(conn))
209
+			return
210
+		}
201 211
 
202
-	if err := m.verifyProtocol(remoteNodes); err != nil {
203
-		m.logger.Printf("[ERR] memberlist: Push/pull verification failed: %s", err)
204
-		return
205
-	}
212
+		if err := m.sendLocalState(conn, join); err != nil {
213
+			m.logger.Printf("[ERR] memberlist: Failed to push local state: %s %s", err, LogConn(conn))
214
+			return
215
+		}
206 216
 
207
-	// Invoke the merge delegate if any
208
-	if join && m.config.Merge != nil {
209
-		nodes := make([]*Node, len(remoteNodes))
210
-		for idx, n := range remoteNodes {
211
-			nodes[idx] = &Node{
212
-				Name: n.Name,
213
-				Addr: n.Addr,
214
-				Port: n.Port,
215
-				Meta: n.Meta,
216
-				PMin: n.Vsn[0],
217
-				PMax: n.Vsn[1],
218
-				PCur: n.Vsn[2],
219
-				DMin: n.Vsn[3],
220
-				DMax: n.Vsn[4],
221
-				DCur: n.Vsn[5],
222
-			}
217
+		if err := m.mergeRemoteState(join, remoteNodes, userState); err != nil {
218
+			m.logger.Printf("[ERR] memberlist: Failed push/pull merge: %s %s", err, LogConn(conn))
219
+			return
223 220
 		}
224
-		if m.config.Merge.NotifyMerge(nodes) {
225
-			m.logger.Printf("[WARN] memberlist: Cluster merge canceled")
221
+	case pingMsg:
222
+		var p ping
223
+		if err := dec.Decode(&p); err != nil {
224
+			m.logger.Printf("[ERR] memberlist: Failed to decode TCP ping: %s %s", err, LogConn(conn))
226 225
 			return
227 226
 		}
228
-	}
229 227
 
230
-	// Merge the membership state
231
-	m.mergeState(remoteNodes)
228
+		if p.Node != "" && p.Node != m.config.Name {
229
+			m.logger.Printf("[WARN] memberlist: Got ping for unexpected node %s %s", p.Node, LogConn(conn))
230
+			return
231
+		}
232 232
 
233
-	// Invoke the delegate for user state
234
-	if m.config.Delegate != nil {
235
-		m.config.Delegate.MergeRemoteState(userState, join)
233
+		ack := ackResp{p.SeqNo, nil}
234
+		out, err := encode(ackRespMsg, &ack)
235
+		if err != nil {
236
+			m.logger.Printf("[ERR] memberlist: Failed to encode TCP ack: %s", err)
237
+			return
238
+		}
239
+
240
+		err = m.rawSendMsgTCP(conn, out.Bytes())
241
+		if err != nil {
242
+			m.logger.Printf("[ERR] memberlist: Failed to send TCP ack: %s %s", err, LogConn(conn))
243
+			return
244
+		}
245
+	default:
246
+		m.logger.Printf("[ERR] memberlist: Received invalid msgType (%d) %s", msgType, LogConn(conn))
236 247
 	}
237 248
 }
238 249
 
... ...
@@ -265,29 +290,30 @@ func (m *Memberlist) udpListen() {
265 265
 			continue
266 266
 		}
267 267
 
268
+		// Capture the reception time of the packet as close to the
269
+		// system calls as possible.
270
+		lastPacket = time.Now()
271
+
268 272
 		// Check the length
269 273
 		if n < 1 {
270
-			m.logger.Printf("[ERR] memberlist: UDP packet too short (%d bytes). From: %s",
271
-				len(buf), addr)
274
+			m.logger.Printf("[ERR] memberlist: UDP packet too short (%d bytes) %s",
275
+				len(buf), LogAddress(addr))
272 276
 			continue
273 277
 		}
274 278
 
275
-		// Capture the current time
276
-		lastPacket = time.Now()
277
-
278 279
 		// Ingest this packet
279 280
 		metrics.IncrCounter([]string{"memberlist", "udp", "received"}, float32(n))
280
-		m.ingestPacket(buf[:n], addr)
281
+		m.ingestPacket(buf[:n], addr, lastPacket)
281 282
 	}
282 283
 }
283 284
 
284
-func (m *Memberlist) ingestPacket(buf []byte, from net.Addr) {
285
+func (m *Memberlist) ingestPacket(buf []byte, from net.Addr, timestamp time.Time) {
285 286
 	// Check if encryption is enabled
286 287
 	if m.config.EncryptionEnabled() {
287 288
 		// Decrypt the payload
288 289
 		plain, err := decryptPayload(m.config.Keyring.GetKeys(), buf, nil)
289 290
 		if err != nil {
290
-			m.logger.Printf("[ERR] memberlist: Decrypt packet failed: %v", err)
291
+			m.logger.Printf("[ERR] memberlist: Decrypt packet failed: %v %s", err, LogAddress(from))
291 292
 			return
292 293
 		}
293 294
 
... ...
@@ -296,10 +322,10 @@ func (m *Memberlist) ingestPacket(buf []byte, from net.Addr) {
296 296
 	}
297 297
 
298 298
 	// Handle the command
299
-	m.handleCommand(buf, from)
299
+	m.handleCommand(buf, from, timestamp)
300 300
 }
301 301
 
302
-func (m *Memberlist) handleCommand(buf []byte, from net.Addr) {
302
+func (m *Memberlist) handleCommand(buf []byte, from net.Addr, timestamp time.Time) {
303 303
 	// Decode the message type
304 304
 	msgType := messageType(buf[0])
305 305
 	buf = buf[1:]
... ...
@@ -307,16 +333,16 @@ func (m *Memberlist) handleCommand(buf []byte, from net.Addr) {
307 307
 	// Switch on the msgType
308 308
 	switch msgType {
309 309
 	case compoundMsg:
310
-		m.handleCompound(buf, from)
310
+		m.handleCompound(buf, from, timestamp)
311 311
 	case compressMsg:
312
-		m.handleCompressed(buf, from)
312
+		m.handleCompressed(buf, from, timestamp)
313 313
 
314 314
 	case pingMsg:
315 315
 		m.handlePing(buf, from)
316 316
 	case indirectPingMsg:
317 317
 		m.handleIndirectPing(buf, from)
318 318
 	case ackRespMsg:
319
-		m.handleAck(buf, from)
319
+		m.handleAck(buf, from, timestamp)
320 320
 
321 321
 	case suspectMsg:
322 322
 		fallthrough
... ...
@@ -328,11 +354,11 @@ func (m *Memberlist) handleCommand(buf []byte, from net.Addr) {
328 328
 		select {
329 329
 		case m.handoff <- msgHandoff{msgType, buf, from}:
330 330
 		default:
331
-			m.logger.Printf("[WARN] memberlist: UDP handler queue full, dropping message (%d)", msgType)
331
+			m.logger.Printf("[WARN] memberlist: UDP handler queue full, dropping message (%d) %s", msgType, LogAddress(from))
332 332
 		}
333 333
 
334 334
 	default:
335
-		m.logger.Printf("[ERR] memberlist: UDP msg type (%d) not supported. From: %s", msgType, from)
335
+		m.logger.Printf("[ERR] memberlist: UDP msg type (%d) not supported %s", msgType, LogAddress(from))
336 336
 	}
337 337
 }
338 338
 
... ...
@@ -357,7 +383,7 @@ func (m *Memberlist) udpHandler() {
357 357
 			case userMsg:
358 358
 				m.handleUser(buf, from)
359 359
 			default:
360
-				m.logger.Printf("[ERR] memberlist: UDP msg type (%d) not supported. From: %s (handler)", msgType, from)
360
+				m.logger.Printf("[ERR] memberlist: UDP msg type (%d) not supported %s (handler)", msgType, LogAddress(from))
361 361
 			}
362 362
 
363 363
 		case <-m.shutdownCh:
... ...
@@ -366,46 +392,50 @@ func (m *Memberlist) udpHandler() {
366 366
 	}
367 367
 }
368 368
 
369
-func (m *Memberlist) handleCompound(buf []byte, from net.Addr) {
369
+func (m *Memberlist) handleCompound(buf []byte, from net.Addr, timestamp time.Time) {
370 370
 	// Decode the parts
371 371
 	trunc, parts, err := decodeCompoundMessage(buf)
372 372
 	if err != nil {
373
-		m.logger.Printf("[ERR] memberlist: Failed to decode compound request: %s", err)
373
+		m.logger.Printf("[ERR] memberlist: Failed to decode compound request: %s %s", err, LogAddress(from))
374 374
 		return
375 375
 	}
376 376
 
377 377
 	// Log any truncation
378 378
 	if trunc > 0 {
379
-		m.logger.Printf("[WARN] memberlist: Compound request had %d truncated messages", trunc)
379
+		m.logger.Printf("[WARN] memberlist: Compound request had %d truncated messages %s", trunc, LogAddress(from))
380 380
 	}
381 381
 
382 382
 	// Handle each message
383 383
 	for _, part := range parts {
384
-		m.handleCommand(part, from)
384
+		m.handleCommand(part, from, timestamp)
385 385
 	}
386 386
 }
387 387
 
388 388
 func (m *Memberlist) handlePing(buf []byte, from net.Addr) {
389 389
 	var p ping
390 390
 	if err := decode(buf, &p); err != nil {
391
-		m.logger.Printf("[ERR] memberlist: Failed to decode ping request: %s", err)
391
+		m.logger.Printf("[ERR] memberlist: Failed to decode ping request: %s %s", err, LogAddress(from))
392 392
 		return
393 393
 	}
394 394
 	// If node is provided, verify that it is for us
395 395
 	if p.Node != "" && p.Node != m.config.Name {
396
-		m.logger.Printf("[WARN] memberlist: Got ping for unexpected node '%s'", p.Node)
396
+		m.logger.Printf("[WARN] memberlist: Got ping for unexpected node '%s' %s", p.Node, LogAddress(from))
397 397
 		return
398 398
 	}
399
-	ack := ackResp{p.SeqNo}
399
+	var ack ackResp
400
+	ack.SeqNo = p.SeqNo
401
+	if m.config.Ping != nil {
402
+		ack.Payload = m.config.Ping.AckPayload()
403
+	}
400 404
 	if err := m.encodeAndSendMsg(from, ackRespMsg, &ack); err != nil {
401
-		m.logger.Printf("[ERR] memberlist: Failed to send ack: %s", err)
405
+		m.logger.Printf("[ERR] memberlist: Failed to send ack: %s %s", err, LogAddress(from))
402 406
 	}
403 407
 }
404 408
 
405 409
 func (m *Memberlist) handleIndirectPing(buf []byte, from net.Addr) {
406 410
 	var ind indirectPingReq
407 411
 	if err := decode(buf, &ind); err != nil {
408
-		m.logger.Printf("[ERR] memberlist: Failed to decode indirect ping request: %s", err)
412
+		m.logger.Printf("[ERR] memberlist: Failed to decode indirect ping request: %s %s", err, LogAddress(from))
409 413
 		return
410 414
 	}
411 415
 
... ...
@@ -421,33 +451,33 @@ func (m *Memberlist) handleIndirectPing(buf []byte, from net.Addr) {
421 421
 	destAddr := &net.UDPAddr{IP: ind.Target, Port: int(ind.Port)}
422 422
 
423 423
 	// Setup a response handler to relay the ack
424
-	respHandler := func() {
425
-		ack := ackResp{ind.SeqNo}
424
+	respHandler := func(payload []byte, timestamp time.Time) {
425
+		ack := ackResp{ind.SeqNo, nil}
426 426
 		if err := m.encodeAndSendMsg(from, ackRespMsg, &ack); err != nil {
427
-			m.logger.Printf("[ERR] memberlist: Failed to forward ack: %s", err)
427
+			m.logger.Printf("[ERR] memberlist: Failed to forward ack: %s %s", err, LogAddress(from))
428 428
 		}
429 429
 	}
430 430
 	m.setAckHandler(localSeqNo, respHandler, m.config.ProbeTimeout)
431 431
 
432 432
 	// Send the ping
433 433
 	if err := m.encodeAndSendMsg(destAddr, pingMsg, &ping); err != nil {
434
-		m.logger.Printf("[ERR] memberlist: Failed to send ping: %s", err)
434
+		m.logger.Printf("[ERR] memberlist: Failed to send ping: %s %s", err, LogAddress(from))
435 435
 	}
436 436
 }
437 437
 
438
-func (m *Memberlist) handleAck(buf []byte, from net.Addr) {
438
+func (m *Memberlist) handleAck(buf []byte, from net.Addr, timestamp time.Time) {
439 439
 	var ack ackResp
440 440
 	if err := decode(buf, &ack); err != nil {
441
-		m.logger.Printf("[ERR] memberlist: Failed to decode ack response: %s", err)
441
+		m.logger.Printf("[ERR] memberlist: Failed to decode ack response: %s %s", err, LogAddress(from))
442 442
 		return
443 443
 	}
444
-	m.invokeAckHandler(ack.SeqNo)
444
+	m.invokeAckHandler(ack, timestamp)
445 445
 }
446 446
 
447 447
 func (m *Memberlist) handleSuspect(buf []byte, from net.Addr) {
448 448
 	var sus suspect
449 449
 	if err := decode(buf, &sus); err != nil {
450
-		m.logger.Printf("[ERR] memberlist: Failed to decode suspect message: %s", err)
450
+		m.logger.Printf("[ERR] memberlist: Failed to decode suspect message: %s %s", err, LogAddress(from))
451 451
 		return
452 452
 	}
453 453
 	m.suspectNode(&sus)
... ...
@@ -456,7 +486,7 @@ func (m *Memberlist) handleSuspect(buf []byte, from net.Addr) {
456 456
 func (m *Memberlist) handleAlive(buf []byte, from net.Addr) {
457 457
 	var live alive
458 458
 	if err := decode(buf, &live); err != nil {
459
-		m.logger.Printf("[ERR] memberlist: Failed to decode alive message: %s", err)
459
+		m.logger.Printf("[ERR] memberlist: Failed to decode alive message: %s %s", err, LogAddress(from))
460 460
 		return
461 461
 	}
462 462
 
... ...
@@ -472,7 +502,7 @@ func (m *Memberlist) handleAlive(buf []byte, from net.Addr) {
472 472
 func (m *Memberlist) handleDead(buf []byte, from net.Addr) {
473 473
 	var d dead
474 474
 	if err := decode(buf, &d); err != nil {
475
-		m.logger.Printf("[ERR] memberlist: Failed to decode dead message: %s", err)
475
+		m.logger.Printf("[ERR] memberlist: Failed to decode dead message: %s %s", err, LogAddress(from))
476 476
 		return
477 477
 	}
478 478
 	m.deadNode(&d)
... ...
@@ -487,16 +517,16 @@ func (m *Memberlist) handleUser(buf []byte, from net.Addr) {
487 487
 }
488 488
 
489 489
 // handleCompressed is used to unpack a compressed message
490
-func (m *Memberlist) handleCompressed(buf []byte, from net.Addr) {
490
+func (m *Memberlist) handleCompressed(buf []byte, from net.Addr, timestamp time.Time) {
491 491
 	// Try to decode the payload
492 492
 	payload, err := decompressPayload(buf)
493 493
 	if err != nil {
494
-		m.logger.Printf("[ERR] memberlist: Failed to decompress payload: %v", err)
494
+		m.logger.Printf("[ERR] memberlist: Failed to decompress payload: %v %s", err, LogAddress(from))
495 495
 		return
496 496
 	}
497 497
 
498 498
 	// Recursively handle the payload
499
-	m.handleCommand(payload, from)
499
+	m.handleCommand(payload, from, timestamp)
500 500
 }
501 501
 
502 502
 // encodeAndSendMsg is used to combine the encoding and sending steps
... ...
@@ -523,7 +553,7 @@ func (m *Memberlist) sendMsg(to net.Addr, msg []byte) error {
523 523
 
524 524
 	// Fast path if nothing to piggypack
525 525
 	if len(extra) == 0 {
526
-		return m.rawSendMsg(to, msg)
526
+		return m.rawSendMsgUDP(to, msg)
527 527
 	}
528 528
 
529 529
 	// Join all the messages
... ...
@@ -535,11 +565,11 @@ func (m *Memberlist) sendMsg(to net.Addr, msg []byte) error {
535 535
 	compound := makeCompoundMessage(msgs)
536 536
 
537 537
 	// Send the message
538
-	return m.rawSendMsg(to, compound.Bytes())
538
+	return m.rawSendMsgUDP(to, compound.Bytes())
539 539
 }
540 540
 
541
-// rawSendMsg is used to send a UDP message to another host without modification
542
-func (m *Memberlist) rawSendMsg(to net.Addr, msg []byte) error {
541
+// rawSendMsgUDP is used to send a UDP message to another host without modification
542
+func (m *Memberlist) rawSendMsgUDP(to net.Addr, msg []byte) error {
543 543
 	// Check if we have compression enabled
544 544
 	if m.config.EnableCompression {
545 545
 		buf, err := compressPayload(msg)
... ...
@@ -571,7 +601,72 @@ func (m *Memberlist) rawSendMsg(to net.Addr, msg []byte) error {
571 571
 	return err
572 572
 }
573 573
 
574
-// sendState is used to initiate a push/pull over TCP with a remote node
574
+// rawSendMsgTCP is used to send a TCP message to another host without modification
575
+func (m *Memberlist) rawSendMsgTCP(conn net.Conn, sendBuf []byte) error {
576
+	// Check if compresion is enabled
577
+	if m.config.EnableCompression {
578
+		compBuf, err := compressPayload(sendBuf)
579
+		if err != nil {
580
+			m.logger.Printf("[ERROR] memberlist: Failed to compress payload: %v", err)
581
+		} else {
582
+			sendBuf = compBuf.Bytes()
583
+		}
584
+	}
585
+
586
+	// Check if encryption is enabled
587
+	if m.config.EncryptionEnabled() {
588
+		crypt, err := m.encryptLocalState(sendBuf)
589
+		if err != nil {
590
+			m.logger.Printf("[ERROR] memberlist: Failed to encrypt local state: %v", err)
591
+			return err
592
+		}
593
+		sendBuf = crypt
594
+	}
595
+
596
+	// Write out the entire send buffer
597
+	metrics.IncrCounter([]string{"memberlist", "tcp", "sent"}, float32(len(sendBuf)))
598
+
599
+	if n, err := conn.Write(sendBuf); err != nil {
600
+		return err
601
+	} else if n != len(sendBuf) {
602
+		return fmt.Errorf("only %d of %d bytes written", n, len(sendBuf))
603
+	}
604
+
605
+	return nil
606
+}
607
+
608
+// sendTCPUserMsg is used to send a TCP userMsg to another host
609
+func (m *Memberlist) sendTCPUserMsg(to net.Addr, sendBuf []byte) error {
610
+	dialer := net.Dialer{Timeout: m.config.TCPTimeout}
611
+	conn, err := dialer.Dial("tcp", to.String())
612
+	if err != nil {
613
+		return err
614
+	}
615
+	defer conn.Close()
616
+
617
+	bufConn := bytes.NewBuffer(nil)
618
+
619
+	if err := bufConn.WriteByte(byte(userMsg)); err != nil {
620
+		return err
621
+	}
622
+
623
+	// Send our node state
624
+	header := userMsgHeader{UserMsgLen: len(sendBuf)}
625
+	hd := codec.MsgpackHandle{}
626
+	enc := codec.NewEncoder(bufConn, &hd)
627
+
628
+	if err := enc.Encode(&header); err != nil {
629
+		return err
630
+	}
631
+
632
+	if _, err := bufConn.Write(sendBuf); err != nil {
633
+		return err
634
+	}
635
+
636
+	return m.rawSendMsgTCP(conn, bufConn.Bytes())
637
+}
638
+
639
+// sendAndReceiveState is used to initiate a push/pull over TCP with a remote node
575 640
 func (m *Memberlist) sendAndReceiveState(addr []byte, port uint16, join bool) ([]pushNodeState, []byte, error) {
576 641
 	// Attempt to connect
577 642
 	dialer := net.Dialer{Timeout: m.config.TCPTimeout}
... ...
@@ -589,15 +684,21 @@ func (m *Memberlist) sendAndReceiveState(addr []byte, port uint16, join bool) ([
589 589
 		return nil, nil, err
590 590
 	}
591 591
 
592
-	// Read remote state
593
-	_, remote, userState, err := m.readRemoteState(conn)
592
+	conn.SetDeadline(time.Now().Add(m.config.TCPTimeout))
593
+	msgType, bufConn, dec, err := m.readTCP(conn)
594 594
 	if err != nil {
595
-		err := fmt.Errorf("Reading remote state failed: %v", err)
596 595
 		return nil, nil, err
597 596
 	}
598 597
 
599
-	// Return the remote state
600
-	return remote, userState, nil
598
+	// Quit if not push/pull
599
+	if msgType != pushPullMsg {
600
+		err := fmt.Errorf("received invalid msgType (%d), expected pushPullMsg (%d) %s", msgType, pushPullMsg, LogConn(conn))
601
+		return nil, nil, err
602
+	}
603
+
604
+	// Read remote state
605
+	_, remoteNodes, userState, err := m.readRemoteState(bufConn, dec)
606
+	return remoteNodes, userState, err
601 607
 }
602 608
 
603 609
 // sendLocalState is invoked to send our local state over a tcp connection
... ...
@@ -658,34 +759,7 @@ func (m *Memberlist) sendLocalState(conn net.Conn, join bool) error {
658 658
 	}
659 659
 
660 660
 	// Get the send buffer
661
-	sendBuf := bufConn.Bytes()
662
-
663
-	// Check if compresion is enabled
664
-	if m.config.EnableCompression {
665
-		compBuf, err := compressPayload(bufConn.Bytes())
666
-		if err != nil {
667
-			m.logger.Printf("[ERROR] memberlist: Failed to compress local state: %v", err)
668
-		} else {
669
-			sendBuf = compBuf.Bytes()
670
-		}
671
-	}
672
-
673
-	// Check if encryption is enabled
674
-	if m.config.EncryptionEnabled() {
675
-		crypt, err := m.encryptLocalState(sendBuf)
676
-		if err != nil {
677
-			m.logger.Printf("[ERROR] memberlist: Failed to encrypt local state: %v", err)
678
-			return err
679
-		}
680
-		sendBuf = crypt
681
-	}
682
-
683
-	// Write out the entire send buffer
684
-	metrics.IncrCounter([]string{"memberlist", "tcp", "sent"}, float32(len(sendBuf)))
685
-	if _, err := conn.Write(sendBuf); err != nil {
686
-		return err
687
-	}
688
-	return nil
661
+	return m.rawSendMsgTCP(conn, bufConn.Bytes())
689 662
 }
690 663
 
691 664
 // encryptLocalState is used to help encrypt local state before sending
... ...
@@ -743,38 +817,36 @@ func (m *Memberlist) decryptRemoteState(bufConn io.Reader) ([]byte, error) {
743 743
 	return decryptPayload(keys, cipherBytes, dataBytes)
744 744
 }
745 745
 
746
-// recvRemoteState is used to read the remote state from a connection
747
-func (m *Memberlist) readRemoteState(conn net.Conn) (bool, []pushNodeState, []byte, error) {
748
-	// Setup a deadline
749
-	conn.SetDeadline(time.Now().Add(m.config.TCPTimeout))
750
-
746
+// readTCP is used to read the start of a TCP stream.
747
+// it decrypts and decompresses the stream if necessary
748
+func (m *Memberlist) readTCP(conn net.Conn) (messageType, io.Reader, *codec.Decoder, error) {
751 749
 	// Created a buffered reader
752 750
 	var bufConn io.Reader = bufio.NewReader(conn)
753 751
 
754 752
 	// Read the message type
755 753
 	buf := [1]byte{0}
756 754
 	if _, err := bufConn.Read(buf[:]); err != nil {
757
-		return false, nil, nil, err
755
+		return 0, nil, nil, err
758 756
 	}
759 757
 	msgType := messageType(buf[0])
760 758
 
761 759
 	// Check if the message is encrypted
762 760
 	if msgType == encryptMsg {
763 761
 		if !m.config.EncryptionEnabled() {
764
-			return false, nil, nil,
762
+			return 0, nil, nil,
765 763
 				fmt.Errorf("Remote state is encrypted and encryption is not configured")
766 764
 		}
767 765
 
768 766
 		plain, err := m.decryptRemoteState(bufConn)
769 767
 		if err != nil {
770
-			return false, nil, nil, err
768
+			return 0, nil, nil, err
771 769
 		}
772 770
 
773 771
 		// Reset message type and bufConn
774 772
 		msgType = messageType(plain[0])
775 773
 		bufConn = bytes.NewReader(plain[1:])
776 774
 	} else if m.config.EncryptionEnabled() {
777
-		return false, nil, nil,
775
+		return 0, nil, nil,
778 776
 			fmt.Errorf("Encryption is configured but remote state is not encrypted")
779 777
 	}
780 778
 
... ...
@@ -786,11 +858,11 @@ func (m *Memberlist) readRemoteState(conn net.Conn) (bool, []pushNodeState, []by
786 786
 	if msgType == compressMsg {
787 787
 		var c compress
788 788
 		if err := dec.Decode(&c); err != nil {
789
-			return false, nil, nil, err
789
+			return 0, nil, nil, err
790 790
 		}
791 791
 		decomp, err := decompressBuffer(&c)
792 792
 		if err != nil {
793
-			return false, nil, nil, err
793
+			return 0, nil, nil, err
794 794
 		}
795 795
 
796 796
 		// Reset the message type
... ...
@@ -803,12 +875,11 @@ func (m *Memberlist) readRemoteState(conn net.Conn) (bool, []pushNodeState, []by
803 803
 		dec = codec.NewDecoder(bufConn, &hd)
804 804
 	}
805 805
 
806
-	// Quit if not push/pull
807
-	if msgType != pushPullMsg {
808
-		err := fmt.Errorf("received invalid msgType (%d)", msgType)
809
-		return false, nil, nil, err
810
-	}
806
+	return msgType, bufConn, dec, nil
807
+}
811 808
 
809
+// readRemoteState is used to read the remote state from a connection
810
+func (m *Memberlist) readRemoteState(bufConn io.Reader, dec *codec.Decoder) (bool, []pushNodeState, []byte, error) {
812 811
 	// Read the push/pull header
813 812
 	var header pushPullHeader
814 813
 	if err := dec.Decode(&header); err != nil {
... ...
@@ -821,7 +892,7 @@ func (m *Memberlist) readRemoteState(conn net.Conn) (bool, []pushNodeState, []by
821 821
 	// Try to decode all the states
822 822
 	for i := 0; i < header.Nodes; i++ {
823 823
 		if err := dec.Decode(&remoteNodes[i]); err != nil {
824
-			return false, remoteNodes, nil, err
824
+			return false, nil, nil, err
825 825
 		}
826 826
 	}
827 827
 
... ...
@@ -836,7 +907,7 @@ func (m *Memberlist) readRemoteState(conn net.Conn) (bool, []pushNodeState, []by
836 836
 				bytes, header.UserStateLen)
837 837
 		}
838 838
 		if err != nil {
839
-			return false, remoteNodes, nil, err
839
+			return false, nil, nil, err
840 840
 		}
841 841
 	}
842 842
 
... ...
@@ -850,3 +921,119 @@ func (m *Memberlist) readRemoteState(conn net.Conn) (bool, []pushNodeState, []by
850 850
 
851 851
 	return header.Join, remoteNodes, userBuf, nil
852 852
 }
853
+
854
+// mergeRemoteState is used to merge the remote state with our local state
855
+func (m *Memberlist) mergeRemoteState(join bool, remoteNodes []pushNodeState, userBuf []byte) error {
856
+	if err := m.verifyProtocol(remoteNodes); err != nil {
857
+		return err
858
+	}
859
+
860
+	// Invoke the merge delegate if any
861
+	if join && m.config.Merge != nil {
862
+		nodes := make([]*Node, len(remoteNodes))
863
+		for idx, n := range remoteNodes {
864
+			nodes[idx] = &Node{
865
+				Name: n.Name,
866
+				Addr: n.Addr,
867
+				Port: n.Port,
868
+				Meta: n.Meta,
869
+				PMin: n.Vsn[0],
870
+				PMax: n.Vsn[1],
871
+				PCur: n.Vsn[2],
872
+				DMin: n.Vsn[3],
873
+				DMax: n.Vsn[4],
874
+				DCur: n.Vsn[5],
875
+			}
876
+		}
877
+		if err := m.config.Merge.NotifyMerge(nodes); err != nil {
878
+			return err
879
+		}
880
+	}
881
+
882
+	// Merge the membership state
883
+	m.mergeState(remoteNodes)
884
+
885
+	// Invoke the delegate for user state
886
+	if userBuf != nil && m.config.Delegate != nil {
887
+		m.config.Delegate.MergeRemoteState(userBuf, join)
888
+	}
889
+	return nil
890
+}
891
+
892
+// readUserMsg is used to decode a userMsg from a TCP stream
893
+func (m *Memberlist) readUserMsg(bufConn io.Reader, dec *codec.Decoder) error {
894
+	// Read the user message header
895
+	var header userMsgHeader
896
+	if err := dec.Decode(&header); err != nil {
897
+		return err
898
+	}
899
+
900
+	// Read the user message into a buffer
901
+	var userBuf []byte
902
+	if header.UserMsgLen > 0 {
903
+		userBuf = make([]byte, header.UserMsgLen)
904
+		bytes, err := io.ReadAtLeast(bufConn, userBuf, header.UserMsgLen)
905
+		if err == nil && bytes != header.UserMsgLen {
906
+			err = fmt.Errorf(
907
+				"Failed to read full user message (%d / %d)",
908
+				bytes, header.UserMsgLen)
909
+		}
910
+		if err != nil {
911
+			return err
912
+		}
913
+
914
+		d := m.config.Delegate
915
+		if d != nil {
916
+			d.NotifyMsg(userBuf)
917
+		}
918
+	}
919
+
920
+	return nil
921
+}
922
+
923
+// sendPingAndWaitForAck makes a TCP connection to the given address, sends
924
+// a ping, and waits for an ack. All of this is done as a series of blocking
925
+// operations, given the deadline. The bool return parameter is true if we
926
+// we able to round trip a ping to the other node.
927
+func (m *Memberlist) sendPingAndWaitForAck(destAddr net.Addr, ping ping, deadline time.Time) (bool, error) {
928
+	dialer := net.Dialer{Deadline: deadline}
929
+	conn, err := dialer.Dial("tcp", destAddr.String())
930
+	if err != nil {
931
+		// If the node is actually dead we expect this to fail, so we
932
+		// shouldn't spam the logs with it. After this point, errors
933
+		// with the connection are real, unexpected errors and should
934
+		// get propagated up.
935
+		return false, nil
936
+	}
937
+	defer conn.Close()
938
+	conn.SetDeadline(deadline)
939
+
940
+	out, err := encode(pingMsg, &ping)
941
+	if err != nil {
942
+		return false, err
943
+	}
944
+
945
+	if err = m.rawSendMsgTCP(conn, out.Bytes()); err != nil {
946
+		return false, err
947
+	}
948
+
949
+	msgType, _, dec, err := m.readTCP(conn)
950
+	if err != nil {
951
+		return false, err
952
+	}
953
+
954
+	if msgType != ackRespMsg {
955
+		return false, fmt.Errorf("Unexpected msgType (%d) from TCP ping %s", msgType, LogConn(conn))
956
+	}
957
+
958
+	var ack ackResp
959
+	if err = dec.Decode(&ack); err != nil {
960
+		return false, err
961
+	}
962
+
963
+	if ack.SeqNo != ping.SeqNo {
964
+		return false, fmt.Errorf("Sequence number from ack (%d) doesn't match ping (%d) from TCP ping %s", ack.SeqNo, ping.SeqNo, LogConn(conn))
965
+	}
966
+
967
+	return true, nil
968
+}
853 969
new file mode 100644
... ...
@@ -0,0 +1,14 @@
0
+package memberlist
1
+
2
+import "time"
3
+
4
+// PingDelegate is used to notify an observer how long it took for a ping message to
5
+// complete a round trip.  It can also be used for writing arbitrary byte slices
6
+// into ack messages. Note that in order to be meaningful for RTT estimates, this
7
+// delegate does not apply to indirect pings, nor fallback pings sent over TCP.
8
+type PingDelegate interface {
9
+	// AckPayload is invoked when an ack is being sent; the returned bytes will be appended to the ack
10
+	AckPayload() []byte
11
+	// NotifyPing is invoked when an ack for a ping is received
12
+	NotifyPingComplete(other *Node, rtt time.Duration, payload []byte)
13
+}
... ...
@@ -44,10 +44,20 @@ type nodeState struct {
44 44
 
45 45
 // ackHandler is used to register handlers for incoming acks
46 46
 type ackHandler struct {
47
-	handler func()
47
+	handler func([]byte, time.Time)
48 48
 	timer   *time.Timer
49 49
 }
50 50
 
51
+// NoPingResponseError is used to indicate a 'ping' packet was
52
+// successfully issued but no response was received
53
+type NoPingResponseError struct {
54
+	node string
55
+}
56
+
57
+func (f NoPingResponseError) Error() string {
58
+	return fmt.Sprintf("No response from node %s", f.node)
59
+}
60
+
51 61
 // Schedule is used to ensure the Tick is performed periodically. This
52 62
 // function is safe to call multiple times. If the memberlist is already
53 63
 // scheduled, then it won't do anything.
... ...
@@ -128,9 +138,7 @@ func (m *Memberlist) pushPullTrigger(stop <-chan struct{}) {
128 128
 
129 129
 	// Tick using a dynamic timer
130 130
 	for {
131
-		m.nodeLock.RLock()
132
-		tickTime := pushPullScale(interval, len(m.nodes))
133
-		m.nodeLock.RUnlock()
131
+		tickTime := pushPullScale(interval, m.estNumNodes())
134 132
 		select {
135 133
 		case <-time.After(tickTime):
136 134
 			m.pushPull()
... ...
@@ -207,46 +215,55 @@ START:
207 207
 	m.probeNode(&node)
208 208
 }
209 209
 
210
-// probeNode handles a single round of failure checking on a node
210
+// probeNode handles a single round of failure checking on a node.
211 211
 func (m *Memberlist) probeNode(node *nodeState) {
212 212
 	defer metrics.MeasureSince([]string{"memberlist", "probeNode"}, time.Now())
213 213
 
214
-	// Send a ping to the node
214
+	// Prepare a ping message and setup an ack handler.
215 215
 	ping := ping{SeqNo: m.nextSeqNo(), Node: node.Name}
216
-	destAddr := &net.UDPAddr{IP: node.Addr, Port: int(node.Port)}
217
-
218
-	// Setup an ack handler
219
-	ackCh := make(chan bool, m.config.IndirectChecks+1)
216
+	ackCh := make(chan ackMessage, m.config.IndirectChecks+1)
220 217
 	m.setAckChannel(ping.SeqNo, ackCh, m.config.ProbeInterval)
221 218
 
222
-	// Send the ping message
219
+	// Send a ping to the node.
220
+	deadline := time.Now().Add(m.config.ProbeInterval)
221
+	destAddr := &net.UDPAddr{IP: node.Addr, Port: int(node.Port)}
223 222
 	if err := m.encodeAndSendMsg(destAddr, pingMsg, &ping); err != nil {
224 223
 		m.logger.Printf("[ERR] memberlist: Failed to send ping: %s", err)
225 224
 		return
226 225
 	}
227 226
 
228
-	// Wait for response or round-trip-time
227
+	// Mark the sent time here, which should be after any pre-processing and
228
+	// system calls to do the actual send. This probably under-reports a bit,
229
+	// but it's the best we can do.
230
+	sent := time.Now()
231
+
232
+	// Wait for response or round-trip-time.
229 233
 	select {
230 234
 	case v := <-ackCh:
231
-		if v == true {
235
+		if v.Complete == true {
236
+			if m.config.Ping != nil {
237
+				rtt := v.Timestamp.Sub(sent)
238
+				m.config.Ping.NotifyPingComplete(&node.Node, rtt, v.Payload)
239
+			}
232 240
 			return
233 241
 		}
234 242
 
235 243
 		// As an edge case, if we get a timeout, we need to re-enqueue it
236
-		// here to break out of the select below
237
-		if v == false {
244
+		// here to break out of the select below.
245
+		if v.Complete == false {
238 246
 			ackCh <- v
239 247
 		}
240 248
 	case <-time.After(m.config.ProbeTimeout):
249
+		m.logger.Printf("[DEBUG] memberlist: Failed UDP ping: %v (timeout reached)", node.Name)
241 250
 	}
242 251
 
243
-	// Get some random live nodes
252
+	// Get some random live nodes.
244 253
 	m.nodeLock.RLock()
245 254
 	excludes := []string{m.config.Name, node.Name}
246 255
 	kNodes := kRandomNodes(m.config.IndirectChecks, excludes, m.nodes)
247 256
 	m.nodeLock.RUnlock()
248 257
 
249
-	// Attempt an indirect ping
258
+	// Attempt an indirect ping.
250 259
 	ind := indirectPingReq{SeqNo: ping.SeqNo, Target: node.Addr, Port: node.Port, Node: node.Name}
251 260
 	for _, peer := range kNodes {
252 261
 		destAddr := &net.UDPAddr{IP: peer.Addr, Port: int(peer.Port)}
... ...
@@ -255,10 +272,49 @@ func (m *Memberlist) probeNode(node *nodeState) {
255 255
 		}
256 256
 	}
257 257
 
258
-	// Wait for the acks or timeout
258
+	// Also make an attempt to contact the node directly over TCP. This
259
+	// helps prevent confused clients who get isolated from UDP traffic
260
+	// but can still speak TCP (which also means they can possibly report
261
+	// misinformation to other nodes via anti-entropy), avoiding flapping in
262
+	// the cluster.
263
+	//
264
+	// This is a little unusual because we will attempt a TCP ping to any
265
+	// member who understands version 3 of the protocol, regardless of
266
+	// which protocol version we are speaking. That's why we've included a
267
+	// config option to turn this off if desired.
268
+	fallbackCh := make(chan bool, 1)
269
+	if (!m.config.DisableTcpPings) && (node.PMax >= 3) {
270
+		destAddr := &net.TCPAddr{IP: node.Addr, Port: int(node.Port)}
271
+		go func() {
272
+			defer close(fallbackCh)
273
+			didContact, err := m.sendPingAndWaitForAck(destAddr, ping, deadline)
274
+			if err != nil {
275
+				m.logger.Printf("[ERR] memberlist: Failed TCP fallback ping: %s", err)
276
+			} else {
277
+				fallbackCh <- didContact
278
+			}
279
+		}()
280
+	} else {
281
+		close(fallbackCh)
282
+	}
283
+
284
+	// Wait for the acks or timeout. Note that we don't check the fallback
285
+	// channel here because we want to issue a warning below if that's the
286
+	// *only* way we hear back from the peer, so we have to let this time
287
+	// out first to allow the normal UDP-based acks to come in.
259 288
 	select {
260 289
 	case v := <-ackCh:
261
-		if v == true {
290
+		if v.Complete == true {
291
+			return
292
+		}
293
+	}
294
+
295
+	// Finally, poll the fallback channel. The timeouts are set such that
296
+	// the channel will have something or be closed without having to wait
297
+	// any additional time here.
298
+	for didContact := range fallbackCh {
299
+		if didContact {
300
+			m.logger.Printf("[WARN] memberlist: Was able to reach %s via TCP but not UDP, network may be misconfigured and not allowing bidirectional UDP", node.Name)
262 301
 			return
263 302
 		}
264 303
 	}
... ...
@@ -269,6 +325,37 @@ func (m *Memberlist) probeNode(node *nodeState) {
269 269
 	m.suspectNode(&s)
270 270
 }
271 271
 
272
+// Ping initiates a ping to the node with the specified name.
273
+func (m *Memberlist) Ping(node string, addr net.Addr) (time.Duration, error) {
274
+	// Prepare a ping message and setup an ack handler.
275
+	ping := ping{SeqNo: m.nextSeqNo(), Node: node}
276
+	ackCh := make(chan ackMessage, m.config.IndirectChecks+1)
277
+	m.setAckChannel(ping.SeqNo, ackCh, m.config.ProbeInterval)
278
+
279
+	// Send a ping to the node.
280
+	if err := m.encodeAndSendMsg(addr, pingMsg, &ping); err != nil {
281
+		return 0, err
282
+	}
283
+
284
+	// Mark the sent time here, which should be after any pre-processing and
285
+	// system calls to do the actual send. This probably under-reports a bit,
286
+	// but it's the best we can do.
287
+	sent := time.Now()
288
+
289
+	// Wait for response or timeout.
290
+	select {
291
+	case v := <-ackCh:
292
+		if v.Complete == true {
293
+			return v.Timestamp.Sub(sent), nil
294
+		}
295
+	case <-time.After(m.config.ProbeTimeout):
296
+		// Timeout, return an error below.
297
+	}
298
+
299
+	m.logger.Printf("[DEBUG] memberlist: Failed UDP ping: %v (timeout reached)", node)
300
+	return 0, NoPingResponseError{ping.Node}
301
+}
302
+
272 303
 // resetNodes is used when the tick wraps around. It will reap the
273 304
 // dead nodes and shuffle the node list.
274 305
 func (m *Memberlist) resetNodes() {
... ...
@@ -287,6 +374,9 @@ func (m *Memberlist) resetNodes() {
287 287
 	// Trim the nodes to exclude the dead nodes
288 288
 	m.nodes = m.nodes[0:deadIdx]
289 289
 
290
+	// Update numNodes after we've trimmed the dead nodes
291
+	atomic.StoreUint32(&m.numNodes, uint32(deadIdx))
292
+
290 293
 	// Shuffle live nodes
291 294
 	shuffleNodes(m.nodes)
292 295
 }
... ...
@@ -320,7 +410,7 @@ func (m *Memberlist) gossip() {
320 320
 
321 321
 		// Send the compound message
322 322
 		destAddr := &net.UDPAddr{IP: node.Addr, Port: int(node.Port)}
323
-		if err := m.rawSendMsg(destAddr, compound.Bytes()); err != nil {
323
+		if err := m.rawSendMsgUDP(destAddr, compound.Bytes()); err != nil {
324 324
 			m.logger.Printf("[ERR] memberlist: Failed to send gossip to %s: %s", destAddr, err)
325 325
 		}
326 326
 	}
... ...
@@ -359,40 +449,9 @@ func (m *Memberlist) pushPullNode(addr []byte, port uint16, join bool) error {
359 359
 		return err
360 360
 	}
361 361
 
362
-	if err := m.verifyProtocol(remote); err != nil {
362
+	if err := m.mergeRemoteState(join, remote, userState); err != nil {
363 363
 		return err
364 364
 	}
365
-
366
-	// Invoke the merge delegate if any
367
-	if join && m.config.Merge != nil {
368
-		nodes := make([]*Node, len(remote))
369
-		for idx, n := range remote {
370
-			nodes[idx] = &Node{
371
-				Name: n.Name,
372
-				Addr: n.Addr,
373
-				Port: n.Port,
374
-				Meta: n.Meta,
375
-				PMin: n.Vsn[0],
376
-				PMax: n.Vsn[1],
377
-				PCur: n.Vsn[2],
378
-				DMin: n.Vsn[3],
379
-				DMax: n.Vsn[4],
380
-				DCur: n.Vsn[5],
381
-			}
382
-		}
383
-		if m.config.Merge.NotifyMerge(nodes) {
384
-			m.logger.Printf("[WARN] memberlist: Cluster merge canceled")
385
-			return fmt.Errorf("Merge canceled")
386
-		}
387
-	}
388
-
389
-	// Merge the state
390
-	m.mergeState(remote)
391
-
392
-	// Invoke the delegate
393
-	if m.config.Delegate != nil {
394
-		m.config.Delegate.MergeRemoteState(userState, join)
395
-	}
396 365
 	return nil
397 366
 }
398 367
 
... ...
@@ -525,14 +584,24 @@ func (m *Memberlist) nextIncarnation() uint32 {
525 525
 	return atomic.AddUint32(&m.incarnation, 1)
526 526
 }
527 527
 
528
-// setAckChannel is used to attach a channel to receive a message when
529
-// an ack with a given sequence number is received. The channel gets sent
530
-// false on timeout
531
-func (m *Memberlist) setAckChannel(seqNo uint32, ch chan bool, timeout time.Duration) {
528
+// estNumNodes is used to get the current estimate of the number of nodes
529
+func (m *Memberlist) estNumNodes() int {
530
+	return int(atomic.LoadUint32(&m.numNodes))
531
+}
532
+
533
+type ackMessage struct {
534
+	Complete  bool
535
+	Payload   []byte
536
+	Timestamp time.Time
537
+}
538
+
539
+// setAckChannel is used to attach a channel to receive a message when an ack with a given
540
+// sequence number is received. The `complete` field of the message will be false on timeout
541
+func (m *Memberlist) setAckChannel(seqNo uint32, ch chan ackMessage, timeout time.Duration) {
532 542
 	// Create a handler function
533
-	handler := func() {
543
+	handler := func(payload []byte, timestamp time.Time) {
534 544
 		select {
535
-		case ch <- true:
545
+		case ch <- ackMessage{true, payload, timestamp}:
536 546
 		default:
537 547
 		}
538 548
 	}
... ...
@@ -549,7 +618,7 @@ func (m *Memberlist) setAckChannel(seqNo uint32, ch chan bool, timeout time.Dura
549 549
 		delete(m.ackHandlers, seqNo)
550 550
 		m.ackLock.Unlock()
551 551
 		select {
552
-		case ch <- false:
552
+		case ch <- ackMessage{false, nil, time.Now()}:
553 553
 		default:
554 554
 		}
555 555
 	})
... ...
@@ -558,7 +627,7 @@ func (m *Memberlist) setAckChannel(seqNo uint32, ch chan bool, timeout time.Dura
558 558
 // setAckHandler is used to attach a handler to be invoked when an
559 559
 // ack with a given sequence number is received. If a timeout is reached,
560 560
 // the handler is deleted
561
-func (m *Memberlist) setAckHandler(seqNo uint32, handler func(), timeout time.Duration) {
561
+func (m *Memberlist) setAckHandler(seqNo uint32, handler func([]byte, time.Time), timeout time.Duration) {
562 562
 	// Add the handler
563 563
 	ah := &ackHandler{handler, nil}
564 564
 	m.ackLock.Lock()
... ...
@@ -574,16 +643,16 @@ func (m *Memberlist) setAckHandler(seqNo uint32, handler func(), timeout time.Du
574 574
 }
575 575
 
576 576
 // Invokes an Ack handler if any is associated, and reaps the handler immediately
577
-func (m *Memberlist) invokeAckHandler(seqNo uint32) {
577
+func (m *Memberlist) invokeAckHandler(ack ackResp, timestamp time.Time) {
578 578
 	m.ackLock.Lock()
579
-	ah, ok := m.ackHandlers[seqNo]
580
-	delete(m.ackHandlers, seqNo)
579
+	ah, ok := m.ackHandlers[ack.SeqNo]
580
+	delete(m.ackHandlers, ack.SeqNo)
581 581
 	m.ackLock.Unlock()
582 582
 	if !ok {
583 583
 		return
584 584
 	}
585 585
 	ah.timer.Stop()
586
-	ah.handler()
586
+	ah.handler(ack.Payload, timestamp)
587 587
 }
588 588
 
589 589
 // aliveNode is invoked by the network layer when we get a message about a
... ...
@@ -601,6 +670,30 @@ func (m *Memberlist) aliveNode(a *alive, notify chan struct{}, bootstrap bool) {
601 601
 		return
602 602
 	}
603 603
 
604
+	// Invoke the Alive delegate if any. This can be used to filter out
605
+	// alive messages based on custom logic. For example, using a cluster name.
606
+	// Using a merge delegate is not enough, as it is possible for passive
607
+	// cluster merging to still occur.
608
+	if m.config.Alive != nil {
609
+		node := &Node{
610
+			Name: a.Node,
611
+			Addr: a.Addr,
612
+			Port: a.Port,
613
+			Meta: a.Meta,
614
+			PMin: a.Vsn[0],
615
+			PMax: a.Vsn[1],
616
+			PCur: a.Vsn[2],
617
+			DMin: a.Vsn[3],
618
+			DMax: a.Vsn[4],
619
+			DCur: a.Vsn[5],
620
+		}
621
+		if err := m.config.Alive.NotifyAlive(node); err != nil {
622
+			m.logger.Printf("[WARN] memberlist: ignoring alive message for '%s': %s",
623
+				a.Node, err)
624
+			return
625
+		}
626
+	}
627
+
604 628
 	// Check if we've never seen this node before, and if not, then
605 629
 	// store this node in our node map.
606 630
 	if !ok {
... ...
@@ -627,6 +720,9 @@ func (m *Memberlist) aliveNode(a *alive, notify chan struct{}, bootstrap bool) {
627 627
 		// Add at the end and swap with the node at the offset
628 628
 		m.nodes = append(m.nodes, state)
629 629
 		m.nodes[offset], m.nodes[n] = m.nodes[n], m.nodes[offset]
630
+
631
+		// Update numNodes after we've added a new node
632
+		atomic.AddUint32(&m.numNodes, 1)
630 633
 	}
631 634
 
632 635
 	// Check if this address is different than the existing node
... ...
@@ -658,9 +754,6 @@ func (m *Memberlist) aliveNode(a *alive, notify chan struct{}, bootstrap bool) {
658 658
 		return
659 659
 	}
660 660
 
661
-	// Update metrics
662
-	metrics.IncrCounter([]string{"memberlist", "msg", "alive"}, 1)
663
-
664 661
 	// Store the old state and meta data
665 662
 	oldState := state.State
666 663
 	oldMeta := state.Meta
... ...
@@ -728,6 +821,9 @@ func (m *Memberlist) aliveNode(a *alive, notify chan struct{}, bootstrap bool) {
728 728
 		}
729 729
 	}
730 730
 
731
+	// Update metrics
732
+	metrics.IncrCounter([]string{"memberlist", "msg", "alive"}, 1)
733
+
731 734
 	// Notify the delegate of any relevant updates
732 735
 	if m.config.Events != nil {
733 736
 		if oldState == stateDead {
... ...
@@ -799,7 +895,7 @@ func (m *Memberlist) suspectNode(s *suspect) {
799 799
 	state.StateChange = changeTime
800 800
 
801 801
 	// Setup a timeout for this
802
-	timeout := suspicionTimeout(m.config.SuspicionMult, len(m.nodes), m.config.ProbeInterval)
802
+	timeout := suspicionTimeout(m.config.SuspicionMult, m.estNumNodes(), m.config.ProbeInterval)
803 803
 	time.AfterFunc(timeout, func() {
804 804
 		m.nodeLock.Lock()
805 805
 		state, ok := m.nodeMap[s.Node]
... ...
@@ -5,12 +5,14 @@ import (
5 5
 	"compress/lzw"
6 6
 	"encoding/binary"
7 7
 	"fmt"
8
-	"github.com/hashicorp/go-msgpack/codec"
9 8
 	"io"
10 9
 	"math"
11 10
 	"math/rand"
12 11
 	"net"
12
+	"strings"
13 13
 	"time"
14
+
15
+	"github.com/hashicorp/go-msgpack/codec"
14 16
 )
15 17
 
16 18
 // pushPullScale is the minimum number of nodes
... ...
@@ -23,8 +25,11 @@ const pushPullScaleThreshold = 32
23 23
 /*
24 24
  * Contains an entry for each private block:
25 25
  * 10.0.0.0/8
26
+ * 100.64.0.0/10
27
+ * 127.0.0.0/8
28
+ * 169.254.0.0/16
26 29
  * 172.16.0.0/12
27
- * 192.168/16
30
+ * 192.168.0.0/16
28 31
  */
29 32
 var privateBlocks []*net.IPNet
30 33
 
... ...
@@ -40,25 +45,44 @@ func init() {
40 40
 	rand.Seed(time.Now().UnixNano())
41 41
 
42 42
 	// Add each private block
43
-	privateBlocks = make([]*net.IPNet, 3)
43
+	privateBlocks = make([]*net.IPNet, 6)
44
+
44 45
 	_, block, err := net.ParseCIDR("10.0.0.0/8")
45 46
 	if err != nil {
46 47
 		panic(fmt.Sprintf("Bad cidr. Got %v", err))
47 48
 	}
48 49
 	privateBlocks[0] = block
49 50
 
50
-	_, block, err = net.ParseCIDR("172.16.0.0/12")
51
+	_, block, err = net.ParseCIDR("100.64.0.0/10")
51 52
 	if err != nil {
52 53
 		panic(fmt.Sprintf("Bad cidr. Got %v", err))
53 54
 	}
54 55
 	privateBlocks[1] = block
55 56
 
56
-	_, block, err = net.ParseCIDR("192.168.0.0/16")
57
+	_, block, err = net.ParseCIDR("127.0.0.0/8")
57 58
 	if err != nil {
58 59
 		panic(fmt.Sprintf("Bad cidr. Got %v", err))
59 60
 	}
60 61
 	privateBlocks[2] = block
61 62
 
63
+	_, block, err = net.ParseCIDR("169.254.0.0/16")
64
+	if err != nil {
65
+		panic(fmt.Sprintf("Bad cidr. Got %v", err))
66
+	}
67
+	privateBlocks[3] = block
68
+
69
+	_, block, err = net.ParseCIDR("172.16.0.0/12")
70
+	if err != nil {
71
+		panic(fmt.Sprintf("Bad cidr. Got %v", err))
72
+	}
73
+	privateBlocks[4] = block
74
+
75
+	_, block, err = net.ParseCIDR("192.168.0.0/16")
76
+	if err != nil {
77
+		panic(fmt.Sprintf("Bad cidr. Got %v", err))
78
+	}
79
+	privateBlocks[5] = block
80
+
62 81
 	_, block, err = net.ParseCIDR("127.0.0.0/8")
63 82
 	if err != nil {
64 83
 		panic(fmt.Sprintf("Bad cidr. Got %v", err))
... ...
@@ -84,6 +108,42 @@ func encode(msgType messageType, in interface{}) (*bytes.Buffer, error) {
84 84
 	return buf, err
85 85
 }
86 86
 
87
+// GetPrivateIP returns the first private IP address found in a list of
88
+// addresses.
89
+func GetPrivateIP(addresses []net.Addr) (net.IP, error) {
90
+	var candidates []net.IP
91
+
92
+	// Find private IPv4 address
93
+	for _, rawAddr := range addresses {
94
+		var ip net.IP
95
+		switch addr := rawAddr.(type) {
96
+		case *net.IPAddr:
97
+			ip = addr.IP
98
+		case *net.IPNet:
99
+			ip = addr.IP
100
+		default:
101
+			continue
102
+		}
103
+
104
+		if ip.To4() == nil {
105
+			continue
106
+		}
107
+		if !IsPrivateIP(ip.String()) {
108
+			continue
109
+		}
110
+		candidates = append(candidates, ip)
111
+	}
112
+	numIps := len(candidates)
113
+	switch numIps {
114
+	case 0:
115
+		return nil, fmt.Errorf("No private IP address found")
116
+	case 1:
117
+		return candidates[0], nil
118
+	default:
119
+		return nil, fmt.Errorf("Multiple private IPs found. Please configure one.")
120
+	}
121
+}
122
+
87 123
 // Returns a random offset between 0 and n
88 124
 func randomOffset(n int) int {
89 125
 	if n == 0 {
... ...
@@ -107,9 +167,10 @@ func retransmitLimit(retransmitMult, n int) int {
107 107
 	return limit
108 108
 }
109 109
 
110
-// shuffleNodes randomly shuffles the input nodes
110
+// shuffleNodes randomly shuffles the input nodes using the Fisher-Yates shuffle
111 111
 func shuffleNodes(nodes []*nodeState) {
112
-	for i := range nodes {
112
+	n := len(nodes)
113
+	for i := n - 1; i > 0; i-- {
113 114
 		j := rand.Intn(i + 1)
114 115
 		nodes[i], nodes[j] = nodes[j], nodes[i]
115 116
 	}
... ...
@@ -250,7 +311,7 @@ func decodeCompoundMessage(buf []byte) (trunc int, parts [][]byte, err error) {
250 250
 }
251 251
 
252 252
 // Returns if the given IP is in a private block
253
-func isPrivateIP(ip_str string) bool {
253
+func IsPrivateIP(ip_str string) bool {
254 254
 	ip := net.ParseIP(ip_str)
255 255
 	for _, priv := range privateBlocks {
256 256
 		if priv.Contains(ip) {
... ...
@@ -266,6 +327,12 @@ func isLoopbackIP(ip_str string) bool {
266 266
 	return loopbackBlock.Contains(ip)
267 267
 }
268 268
 
269
+// Given a string of the form "host", "host:port", or "[ipv6::address]:port",
270
+// return true if the string includes a port.
271
+func hasPort(s string) bool {
272
+	return strings.LastIndex(s, ":") > strings.LastIndex(s, "]")
273
+}
274
+
269 275
 // compressPayload takes an opaque input buffer, compresses it
270 276
 // and wraps it in a compress{} message that is encoded.
271 277
 func compressPayload(inp []byte) (*bytes.Buffer, error) {
272 278
new file mode 100644
... ...
@@ -0,0 +1,180 @@
0
+package coordinate
1
+
2
+import (
3
+	"fmt"
4
+	"math"
5
+	"sort"
6
+	"sync"
7
+	"time"
8
+)
9
+
10
+// Client manages the estimated network coordinate for a given node, and adjusts
11
+// it as the node observes round trip times and estimated coordinates from other
12
+// nodes. The core algorithm is based on Vivaldi, see the documentation for Config
13
+// for more details.
14
+type Client struct {
15
+	// coord is the current estimate of the client's network coordinate.
16
+	coord *Coordinate
17
+
18
+	// origin is a coordinate sitting at the origin.
19
+	origin *Coordinate
20
+
21
+	// config contains the tuning parameters that govern the performance of
22
+	// the algorithm.
23
+	config *Config
24
+
25
+	// adjustmentIndex is the current index into the adjustmentSamples slice.
26
+	adjustmentIndex uint
27
+
28
+	// adjustment is used to store samples for the adjustment calculation.
29
+	adjustmentSamples []float64
30
+
31
+	// latencyFilterSamples is used to store the last several RTT samples,
32
+	// keyed by node name. We will use the config's LatencyFilterSamples
33
+	// value to determine how many samples we keep, per node.
34
+	latencyFilterSamples map[string][]float64
35
+
36
+	// mutex enables safe concurrent access to the client.
37
+	mutex sync.RWMutex
38
+}
39
+
40
+// NewClient creates a new Client and verifies the configuration is valid.
41
+func NewClient(config *Config) (*Client, error) {
42
+	if !(config.Dimensionality > 0) {
43
+		return nil, fmt.Errorf("dimensionality must be >0")
44
+	}
45
+
46
+	return &Client{
47
+		coord:                NewCoordinate(config),
48
+		origin:               NewCoordinate(config),
49
+		config:               config,
50
+		adjustmentIndex:      0,
51
+		adjustmentSamples:    make([]float64, config.AdjustmentWindowSize),
52
+		latencyFilterSamples: make(map[string][]float64),
53
+	}, nil
54
+}
55
+
56
+// GetCoordinate returns a copy of the coordinate for this client.
57
+func (c *Client) GetCoordinate() *Coordinate {
58
+	c.mutex.RLock()
59
+	defer c.mutex.RUnlock()
60
+
61
+	return c.coord.Clone()
62
+}
63
+
64
+// SetCoordinate forces the client's coordinate to a known state.
65
+func (c *Client) SetCoordinate(coord *Coordinate) {
66
+	c.mutex.Lock()
67
+	defer c.mutex.Unlock()
68
+
69
+	c.coord = coord.Clone()
70
+}
71
+
72
+// ForgetNode removes any client state for the given node.
73
+func (c *Client) ForgetNode(node string) {
74
+	c.mutex.Lock()
75
+	defer c.mutex.Unlock()
76
+
77
+	delete(c.latencyFilterSamples, node)
78
+}
79
+
80
+// latencyFilter applies a simple moving median filter with a new sample for
81
+// a node. This assumes that the mutex has been locked already.
82
+func (c *Client) latencyFilter(node string, rttSeconds float64) float64 {
83
+	samples, ok := c.latencyFilterSamples[node]
84
+	if !ok {
85
+		samples = make([]float64, 0, c.config.LatencyFilterSize)
86
+	}
87
+
88
+	// Add the new sample and trim the list, if needed.
89
+	samples = append(samples, rttSeconds)
90
+	if len(samples) > int(c.config.LatencyFilterSize) {
91
+		samples = samples[1:]
92
+	}
93
+	c.latencyFilterSamples[node] = samples
94
+
95
+	// Sort a copy of the samples and return the median.
96
+	sorted := make([]float64, len(samples))
97
+	copy(sorted, samples)
98
+	sort.Float64s(sorted)
99
+	return sorted[len(sorted)/2]
100
+}
101
+
102
+// updateVivialdi updates the Vivaldi portion of the client's coordinate. This
103
+// assumes that the mutex has been locked already.
104
+func (c *Client) updateVivaldi(other *Coordinate, rttSeconds float64) {
105
+	const zeroThreshold = 1.0e-6
106
+
107
+	dist := c.coord.DistanceTo(other).Seconds()
108
+	if rttSeconds < zeroThreshold {
109
+		rttSeconds = zeroThreshold
110
+	}
111
+	wrongness := math.Abs(dist-rttSeconds) / rttSeconds
112
+
113
+	totalError := c.coord.Error + other.Error
114
+	if totalError < zeroThreshold {
115
+		totalError = zeroThreshold
116
+	}
117
+	weight := c.coord.Error / totalError
118
+
119
+	c.coord.Error = c.config.VivaldiCE*weight*wrongness + c.coord.Error*(1.0-c.config.VivaldiCE*weight)
120
+	if c.coord.Error > c.config.VivaldiErrorMax {
121
+		c.coord.Error = c.config.VivaldiErrorMax
122
+	}
123
+
124
+	delta := c.config.VivaldiCC * weight
125
+	force := delta * (rttSeconds - dist)
126
+	c.coord = c.coord.ApplyForce(c.config, force, other)
127
+}
128
+
129
+// updateAdjustment updates the adjustment portion of the client's coordinate, if
130
+// the feature is enabled. This assumes that the mutex has been locked already.
131
+func (c *Client) updateAdjustment(other *Coordinate, rttSeconds float64) {
132
+	if c.config.AdjustmentWindowSize == 0 {
133
+		return
134
+	}
135
+
136
+	// Note that the existing adjustment factors don't figure in to this
137
+	// calculation so we use the raw distance here.
138
+	dist := c.coord.rawDistanceTo(other)
139
+	c.adjustmentSamples[c.adjustmentIndex] = rttSeconds - dist
140
+	c.adjustmentIndex = (c.adjustmentIndex + 1) % c.config.AdjustmentWindowSize
141
+
142
+	sum := 0.0
143
+	for _, sample := range c.adjustmentSamples {
144
+		sum += sample
145
+	}
146
+	c.coord.Adjustment = sum / (2.0 * float64(c.config.AdjustmentWindowSize))
147
+}
148
+
149
+// updateGravity applies a small amount of gravity to pull coordinates towards
150
+// the center of the coordinate system to combat drift. This assumes that the
151
+// mutex is locked already.
152
+func (c *Client) updateGravity() {
153
+	dist := c.origin.DistanceTo(c.coord).Seconds()
154
+	force := -1.0 * math.Pow(dist/c.config.GravityRho, 2.0)
155
+	c.coord = c.coord.ApplyForce(c.config, force, c.origin)
156
+}
157
+
158
+// Update takes other, a coordinate for another node, and rtt, a round trip
159
+// time observation for a ping to that node, and updates the estimated position of
160
+// the client's coordinate. Returns the updated coordinate.
161
+func (c *Client) Update(node string, other *Coordinate, rtt time.Duration) *Coordinate {
162
+	c.mutex.Lock()
163
+	defer c.mutex.Unlock()
164
+
165
+	rttSeconds := c.latencyFilter(node, rtt.Seconds())
166
+	c.updateVivaldi(other, rttSeconds)
167
+	c.updateAdjustment(other, rttSeconds)
168
+	c.updateGravity()
169
+	return c.coord.Clone()
170
+}
171
+
172
+// DistanceTo returns the estimated RTT from the client's coordinate to other, the
173
+// coordinate for another node.
174
+func (c *Client) DistanceTo(other *Coordinate) time.Duration {
175
+	c.mutex.RLock()
176
+	defer c.mutex.RUnlock()
177
+
178
+	return c.coord.DistanceTo(other)
179
+}
0 180
new file mode 100644
... ...
@@ -0,0 +1,70 @@
0
+package coordinate
1
+
2
+// Config is used to set the parameters of the Vivaldi-based coordinate mapping
3
+// algorithm.
4
+//
5
+// The following references are called out at various points in the documentation
6
+// here:
7
+//
8
+// [1] Dabek, Frank, et al. "Vivaldi: A decentralized network coordinate system."
9
+//     ACM SIGCOMM Computer Communication Review. Vol. 34. No. 4. ACM, 2004.
10
+// [2] Ledlie, Jonathan, Paul Gardner, and Margo I. Seltzer. "Network Coordinates
11
+//     in the Wild." NSDI. Vol. 7. 2007.
12
+// [3] Lee, Sanghwan, et al. "On suitability of Euclidean embedding for
13
+//     host-based network coordinate systems." Networking, IEEE/ACM Transactions
14
+//     on 18.1 (2010): 27-40.
15
+type Config struct {
16
+	// The dimensionality of the coordinate system. As discussed in [2], more
17
+	// dimensions improves the accuracy of the estimates up to a point. Per [2]
18
+	// we chose 4 dimensions plus a non-Euclidean height.
19
+	Dimensionality uint
20
+
21
+	// VivaldiErrorMax is the default error value when a node hasn't yet made
22
+	// any observations. It also serves as an upper limit on the error value in
23
+	// case observations cause the error value to increase without bound.
24
+	VivaldiErrorMax float64
25
+
26
+	// VivaldiCE is a tuning factor that controls the maximum impact an
27
+	// observation can have on a node's confidence. See [1] for more details.
28
+	VivaldiCE float64
29
+
30
+	// VivaldiCC is a tuning factor that controls the maximum impact an
31
+	// observation can have on a node's coordinate. See [1] for more details.
32
+	VivaldiCC float64
33
+
34
+	// AdjustmentWindowSize is a tuning factor that determines how many samples
35
+	// we retain to calculate the adjustment factor as discussed in [3]. Setting
36
+	// this to zero disables this feature.
37
+	AdjustmentWindowSize uint
38
+
39
+	// HeightMin is the minimum value of the height parameter. Since this
40
+	// always must be positive, it will introduce a small amount error, so
41
+	// the chosen value should be relatively small compared to "normal"
42
+	// coordinates.
43
+	HeightMin float64
44
+
45
+	// LatencyFilterSamples is the maximum number of samples that are retained
46
+	// per node, in order to compute a median. The intent is to ride out blips
47
+	// but still keep the delay low, since our time to probe any given node is
48
+	// pretty infrequent. See [2] for more details.
49
+	LatencyFilterSize uint
50
+
51
+	// GravityRho is a tuning factor that sets how much gravity has an effect
52
+	// to try to re-center coordinates. See [2] for more details.
53
+	GravityRho float64
54
+}
55
+
56
+// DefaultConfig returns a Config that has some default values suitable for
57
+// basic testing of the algorithm, but not tuned to any particular type of cluster.
58
+func DefaultConfig() *Config {
59
+	return &Config{
60
+		Dimensionality:       8,
61
+		VivaldiErrorMax:      1.5,
62
+		VivaldiCE:            0.25,
63
+		VivaldiCC:            0.25,
64
+		AdjustmentWindowSize: 20,
65
+		HeightMin:            10.0e-6,
66
+		LatencyFilterSize:    3,
67
+		GravityRho:           150.0,
68
+	}
69
+}
0 70
new file mode 100644
... ...
@@ -0,0 +1,183 @@
0
+package coordinate
1
+
2
+import (
3
+	"math"
4
+	"math/rand"
5
+	"time"
6
+)
7
+
8
+// Coordinate is a specialized structure for holding network coordinates for the
9
+// Vivaldi-based coordinate mapping algorithm. All of the fields should be public
10
+// to enable this to be serialized. All values in here are in units of seconds.
11
+type Coordinate struct {
12
+	// Vec is the Euclidean portion of the coordinate. This is used along
13
+	// with the other fields to provide an overall distance estimate. The
14
+	// units here are seconds.
15
+	Vec []float64
16
+
17
+	// Err reflects the confidence in the given coordinate and is updated
18
+	// dynamically by the Vivaldi Client. This is dimensionless.
19
+	Error float64
20
+
21
+	// Adjustment is a distance offset computed based on a calculation over
22
+	// observations from all other nodes over a fixed window and is updated
23
+	// dynamically by the Vivaldi Client. The units here are seconds.
24
+	Adjustment float64
25
+
26
+	// Height is a distance offset that accounts for non-Euclidean effects
27
+	// which model the access links from nodes to the core Internet. The access
28
+	// links are usually set by bandwidth and congestion, and the core links
29
+	// usually follow distance based on geography.
30
+	Height float64
31
+}
32
+
33
+const (
34
+	// secondsToNanoseconds is used to convert float seconds to nanoseconds.
35
+	secondsToNanoseconds = 1.0e9
36
+
37
+	// zeroThreshold is used to decide if two coordinates are on top of each
38
+	// other.
39
+	zeroThreshold = 1.0e-6
40
+)
41
+
42
+// ErrDimensionalityConflict will be panic-d if you try to perform operations
43
+// with incompatible dimensions.
44
+type DimensionalityConflictError struct{}
45
+
46
+// Adds the error interface.
47
+func (e DimensionalityConflictError) Error() string {
48
+	return "coordinate dimensionality does not match"
49
+}
50
+
51
+// NewCoordinate creates a new coordinate at the origin, using the given config
52
+// to supply key initial values.
53
+func NewCoordinate(config *Config) *Coordinate {
54
+	return &Coordinate{
55
+		Vec:        make([]float64, config.Dimensionality),
56
+		Error:      config.VivaldiErrorMax,
57
+		Adjustment: 0.0,
58
+		Height:     config.HeightMin,
59
+	}
60
+}
61
+
62
+// Clone creates an independent copy of this coordinate.
63
+func (c *Coordinate) Clone() *Coordinate {
64
+	vec := make([]float64, len(c.Vec))
65
+	copy(vec, c.Vec)
66
+	return &Coordinate{
67
+		Vec:        vec,
68
+		Error:      c.Error,
69
+		Adjustment: c.Adjustment,
70
+		Height:     c.Height,
71
+	}
72
+}
73
+
74
+// IsCompatibleWith checks to see if the two coordinates are compatible
75
+// dimensionally. If this returns true then you are guaranteed to not get
76
+// any runtime errors operating on them.
77
+func (c *Coordinate) IsCompatibleWith(other *Coordinate) bool {
78
+	return len(c.Vec) == len(other.Vec)
79
+}
80
+
81
+// ApplyForce returns the result of applying the force from the direction of the
82
+// other coordinate.
83
+func (c *Coordinate) ApplyForce(config *Config, force float64, other *Coordinate) *Coordinate {
84
+	if !c.IsCompatibleWith(other) {
85
+		panic(DimensionalityConflictError{})
86
+	}
87
+
88
+	ret := c.Clone()
89
+	unit, mag := unitVectorAt(c.Vec, other.Vec)
90
+	ret.Vec = add(ret.Vec, mul(unit, force))
91
+	if mag > zeroThreshold {
92
+		ret.Height = (ret.Height+other.Height)*force/mag + ret.Height
93
+		ret.Height = math.Max(ret.Height, config.HeightMin)
94
+	}
95
+	return ret
96
+}
97
+
98
+// DistanceTo returns the distance between this coordinate and the other
99
+// coordinate, including adjustments.
100
+func (c *Coordinate) DistanceTo(other *Coordinate) time.Duration {
101
+	if !c.IsCompatibleWith(other) {
102
+		panic(DimensionalityConflictError{})
103
+	}
104
+
105
+	dist := c.rawDistanceTo(other)
106
+	adjustedDist := dist + c.Adjustment + other.Adjustment
107
+	if adjustedDist > 0.0 {
108
+		dist = adjustedDist
109
+	}
110
+	return time.Duration(dist * secondsToNanoseconds)
111
+}
112
+
113
+// rawDistanceTo returns the Vivaldi distance between this coordinate and the
114
+// other coordinate in seconds, not including adjustments. This assumes the
115
+// dimensions have already been checked to be compatible.
116
+func (c *Coordinate) rawDistanceTo(other *Coordinate) float64 {
117
+	return magnitude(diff(c.Vec, other.Vec)) + c.Height + other.Height
118
+}
119
+
120
+// add returns the sum of vec1 and vec2. This assumes the dimensions have
121
+// already been checked to be compatible.
122
+func add(vec1 []float64, vec2 []float64) []float64 {
123
+	ret := make([]float64, len(vec1))
124
+	for i, _ := range ret {
125
+		ret[i] = vec1[i] + vec2[i]
126
+	}
127
+	return ret
128
+}
129
+
130
+// diff returns the difference between the vec1 and vec2. This assumes the
131
+// dimensions have already been checked to be compatible.
132
+func diff(vec1 []float64, vec2 []float64) []float64 {
133
+	ret := make([]float64, len(vec1))
134
+	for i, _ := range ret {
135
+		ret[i] = vec1[i] - vec2[i]
136
+	}
137
+	return ret
138
+}
139
+
140
+// mul returns vec multiplied by a scalar factor.
141
+func mul(vec []float64, factor float64) []float64 {
142
+	ret := make([]float64, len(vec))
143
+	for i, _ := range vec {
144
+		ret[i] = vec[i] * factor
145
+	}
146
+	return ret
147
+}
148
+
149
+// magnitude computes the magnitude of the vec.
150
+func magnitude(vec []float64) float64 {
151
+	sum := 0.0
152
+	for i, _ := range vec {
153
+		sum += vec[i] * vec[i]
154
+	}
155
+	return math.Sqrt(sum)
156
+}
157
+
158
+// unitVectorAt returns a unit vector pointing at vec1 from vec2. If the two
159
+// positions are the same then a random unit vector is returned. We also return
160
+// the distance between the points for use in the later height calculation.
161
+func unitVectorAt(vec1 []float64, vec2 []float64) ([]float64, float64) {
162
+	ret := diff(vec1, vec2)
163
+
164
+	// If the coordinates aren't on top of each other we can normalize.
165
+	if mag := magnitude(ret); mag > zeroThreshold {
166
+		return mul(ret, 1.0/mag), mag
167
+	}
168
+
169
+	// Otherwise, just return a random unit vector.
170
+	for i, _ := range ret {
171
+		ret[i] = rand.Float64() - 0.5
172
+	}
173
+	if mag := magnitude(ret); mag > zeroThreshold {
174
+		return mul(ret, 1.0/mag), 0.0
175
+	}
176
+
177
+	// And finally just give up and make a unit vector along the first
178
+	// dimension. This should be exceedingly rare.
179
+	ret = make([]float64, len(ret))
180
+	ret[0] = 1.0
181
+	return ret, 0.0
182
+}
0 183
new file mode 100644
... ...
@@ -0,0 +1,187 @@
0
+package coordinate
1
+
2
+import (
3
+	"fmt"
4
+	"math"
5
+	"math/rand"
6
+	"time"
7
+)
8
+
9
+// GenerateClients returns a slice with nodes number of clients, all with the
10
+// given config.
11
+func GenerateClients(nodes int, config *Config) ([]*Client, error) {
12
+	clients := make([]*Client, nodes)
13
+	for i, _ := range clients {
14
+		client, err := NewClient(config)
15
+		if err != nil {
16
+			return nil, err
17
+		}
18
+
19
+		clients[i] = client
20
+	}
21
+	return clients, nil
22
+}
23
+
24
+// GenerateLine returns a truth matrix as if all the nodes are in a straight linke
25
+// with the given spacing between them.
26
+func GenerateLine(nodes int, spacing time.Duration) [][]time.Duration {
27
+	truth := make([][]time.Duration, nodes)
28
+	for i := range truth {
29
+		truth[i] = make([]time.Duration, nodes)
30
+	}
31
+
32
+	for i := 0; i < nodes; i++ {
33
+		for j := i + 1; j < nodes; j++ {
34
+			rtt := time.Duration(j-i) * spacing
35
+			truth[i][j], truth[j][i] = rtt, rtt
36
+		}
37
+	}
38
+	return truth
39
+}
40
+
41
+// GenerateGrid returns a truth matrix as if all the nodes are in a two dimensional
42
+// grid with the given spacing between them.
43
+func GenerateGrid(nodes int, spacing time.Duration) [][]time.Duration {
44
+	truth := make([][]time.Duration, nodes)
45
+	for i := range truth {
46
+		truth[i] = make([]time.Duration, nodes)
47
+	}
48
+
49
+	n := int(math.Sqrt(float64(nodes)))
50
+	for i := 0; i < nodes; i++ {
51
+		for j := i + 1; j < nodes; j++ {
52
+			x1, y1 := float64(i%n), float64(i/n)
53
+			x2, y2 := float64(j%n), float64(j/n)
54
+			dx, dy := x2-x1, y2-y1
55
+			dist := math.Sqrt(dx*dx + dy*dy)
56
+			rtt := time.Duration(dist * float64(spacing))
57
+			truth[i][j], truth[j][i] = rtt, rtt
58
+		}
59
+	}
60
+	return truth
61
+}
62
+
63
+// GenerateSplit returns a truth matrix as if half the nodes are close together in
64
+// one location and half the nodes are close together in another. The lan factor
65
+// is used to separate the nodes locally and the wan factor represents the split
66
+// between the two sides.
67
+func GenerateSplit(nodes int, lan time.Duration, wan time.Duration) [][]time.Duration {
68
+	truth := make([][]time.Duration, nodes)
69
+	for i := range truth {
70
+		truth[i] = make([]time.Duration, nodes)
71
+	}
72
+
73
+	split := nodes / 2
74
+	for i := 0; i < nodes; i++ {
75
+		for j := i + 1; j < nodes; j++ {
76
+			rtt := lan
77
+			if (i <= split && j > split) || (i > split && j <= split) {
78
+				rtt += wan
79
+			}
80
+			truth[i][j], truth[j][i] = rtt, rtt
81
+		}
82
+	}
83
+	return truth
84
+}
85
+
86
+// GenerateCircle returns a truth matrix for a set of nodes, evenly distributed
87
+// around a circle with the given radius. The first node is at the "center" of the
88
+// circle because it's equidistant from all the other nodes, but we place it at
89
+// double the radius, so it should show up above all the other nodes in height.
90
+func GenerateCircle(nodes int, radius time.Duration) [][]time.Duration {
91
+	truth := make([][]time.Duration, nodes)
92
+	for i := range truth {
93
+		truth[i] = make([]time.Duration, nodes)
94
+	}
95
+
96
+	for i := 0; i < nodes; i++ {
97
+		for j := i + 1; j < nodes; j++ {
98
+			var rtt time.Duration
99
+			if i == 0 {
100
+				rtt = 2 * radius
101
+			} else {
102
+				t1 := 2.0 * math.Pi * float64(i) / float64(nodes)
103
+				x1, y1 := math.Cos(t1), math.Sin(t1)
104
+				t2 := 2.0 * math.Pi * float64(j) / float64(nodes)
105
+				x2, y2 := math.Cos(t2), math.Sin(t2)
106
+				dx, dy := x2-x1, y2-y1
107
+				dist := math.Sqrt(dx*dx + dy*dy)
108
+				rtt = time.Duration(dist * float64(radius))
109
+			}
110
+			truth[i][j], truth[j][i] = rtt, rtt
111
+		}
112
+	}
113
+	return truth
114
+}
115
+
116
+// GenerateRandom returns a truth matrix for a set of nodes with normally
117
+// distributed delays, with the given mean and deviation. The RNG is re-seeded
118
+// so you always get the same matrix for a given size.
119
+func GenerateRandom(nodes int, mean time.Duration, deviation time.Duration) [][]time.Duration {
120
+	rand.Seed(1)
121
+
122
+	truth := make([][]time.Duration, nodes)
123
+	for i := range truth {
124
+		truth[i] = make([]time.Duration, nodes)
125
+	}
126
+
127
+	for i := 0; i < nodes; i++ {
128
+		for j := i + 1; j < nodes; j++ {
129
+			rttSeconds := rand.NormFloat64()*deviation.Seconds() + mean.Seconds()
130
+			rtt := time.Duration(rttSeconds * secondsToNanoseconds)
131
+			truth[i][j], truth[j][i] = rtt, rtt
132
+		}
133
+	}
134
+	return truth
135
+}
136
+
137
+// Simulate runs the given number of cycles using the given list of clients and
138
+// truth matrix. On each cycle, each client will pick a random node and observe
139
+// the truth RTT, updating its coordinate estimate. The RNG is re-seeded for
140
+// each simulation run to get deterministic results (for this algorithm and the
141
+// underlying algorithm which will use random numbers for position vectors when
142
+// starting out with everything at the origin).
143
+func Simulate(clients []*Client, truth [][]time.Duration, cycles int) {
144
+	rand.Seed(1)
145
+
146
+	nodes := len(clients)
147
+	for cycle := 0; cycle < cycles; cycle++ {
148
+		for i, _ := range clients {
149
+			if j := rand.Intn(nodes); j != i {
150
+				c := clients[j].GetCoordinate()
151
+				rtt := truth[i][j]
152
+				node := fmt.Sprintf("node_%d", j)
153
+				clients[i].Update(node, c, rtt)
154
+			}
155
+		}
156
+	}
157
+}
158
+
159
+// Stats is returned from the Evaluate function with a summary of the algorithm
160
+// performance.
161
+type Stats struct {
162
+	ErrorMax float64
163
+	ErrorAvg float64
164
+}
165
+
166
+// Evaluate uses the coordinates of the given clients to calculate estimated
167
+// distances and compares them with the given truth matrix, returning summary
168
+// stats.
169
+func Evaluate(clients []*Client, truth [][]time.Duration) (stats Stats) {
170
+	nodes := len(clients)
171
+	count := 0
172
+	for i := 0; i < nodes; i++ {
173
+		for j := i + 1; j < nodes; j++ {
174
+			est := clients[i].DistanceTo(clients[j].GetCoordinate()).Seconds()
175
+			actual := truth[i][j].Seconds()
176
+			error := math.Abs(est-actual) / actual
177
+			stats.ErrorMax = math.Max(stats.ErrorMax, error)
178
+			stats.ErrorAvg += error
179
+			count += 1
180
+		}
181
+	}
182
+
183
+	stats.ErrorAvg /= float64(count)
184
+	fmt.Printf("Error avg=%9.6f max=%9.6f\n", stats.ErrorAvg, stats.ErrorMax)
185
+	return
186
+}
... ...
@@ -149,6 +149,14 @@ type Config struct {
149 149
 	//
150 150
 	QueryTimeoutMult int
151 151
 
152
+	// QueryResponseSizeLimit and QuerySizeLimit limit the inbound and
153
+	// outbound payload sizes for queries, respectively. These must fit
154
+	// in a UDP packet with some additional overhead, so tuning these
155
+	// past the default values of 1024 will depend on your network
156
+	// configuration.
157
+	QueryResponseSizeLimit int
158
+	QuerySizeLimit         int
159
+
152 160
 	// MemberlistConfig is the memberlist configuration that Serf will
153 161
 	// use to do the underlying membership management and gossip. Some
154 162
 	// fields in the MemberlistConfig will be overwritten by Serf no
... ...
@@ -189,6 +197,12 @@ type Config struct {
189 189
 	// node stays while the other node will leave the cluster and exit.
190 190
 	EnableNameConflictResolution bool
191 191
 
192
+	// DisableCoordinates controls if Serf will maintain an estimate of this
193
+	// node's network coordinate internally. A network coordinate is useful
194
+	// for estimating the network distance (i.e. round trip time) between
195
+	// two nodes. Enabling this option adds some overhead to ping messages.
196
+	DisableCoordinates bool
197
+
192 198
 	// KeyringFile provides the location of a writable file where Serf can
193 199
 	// persist changes to the encryption keyring.
194 200
 	KeyringFile string
... ...
@@ -229,6 +243,9 @@ func DefaultConfig() *Config {
229 229
 		TombstoneTimeout:             24 * time.Hour,
230 230
 		MemberlistConfig:             memberlist.DefaultLANConfig(),
231 231
 		QueryTimeoutMult:             16,
232
+		QueryResponseSizeLimit:       1024,
233
+		QuerySizeLimit:               1024,
232 234
 		EnableNameConflictResolution: true,
235
+		DisableCoordinates:           false,
233 236
 	}
234 237
 }
... ...
@@ -2,6 +2,7 @@ package serf
2 2
 
3 3
 import (
4 4
 	"fmt"
5
+
5 6
 	"github.com/armon/go-metrics"
6 7
 )
7 8
 
... ...
@@ -170,6 +171,12 @@ func (d *delegate) LocalState(join bool) []byte {
170 170
 }
171 171
 
172 172
 func (d *delegate) MergeRemoteState(buf []byte, isJoin bool) {
173
+	// Ensure we have a message
174
+	if len(buf) == 0 {
175
+		d.serf.logger.Printf("[ERR] serf: Remote state is zero bytes")
176
+		return
177
+	}
178
+
173 179
 	// Check the message type
174 180
 	if messageType(buf[0]) != messagePushPullType {
175 181
 		d.serf.logger.Printf("[ERR] serf: Remote state has bad type prefix: %v", buf[0])
... ...
@@ -152,8 +152,8 @@ func (q *Query) Respond(buf []byte) error {
152 152
 	}
153 153
 
154 154
 	// Check the size limit
155
-	if len(raw) > QueryResponseSizeLimit {
156
-		return fmt.Errorf("response exceeds limit of %d bytes", QueryResponseSizeLimit)
155
+	if len(raw) > q.serf.config.QueryResponseSizeLimit {
156
+		return fmt.Errorf("response exceeds limit of %d bytes", q.serf.config.QueryResponseSizeLimit)
157 157
 	}
158 158
 
159 159
 	// Send the response
... ...
@@ -7,29 +7,38 @@ import (
7 7
 )
8 8
 
9 9
 type MergeDelegate interface {
10
-	NotifyMerge([]*Member) (cancel bool)
10
+	NotifyMerge([]*Member) error
11 11
 }
12 12
 
13 13
 type mergeDelegate struct {
14 14
 	serf *Serf
15 15
 }
16 16
 
17
-func (m *mergeDelegate) NotifyMerge(nodes []*memberlist.Node) (cancel bool) {
17
+func (m *mergeDelegate) NotifyMerge(nodes []*memberlist.Node) error {
18 18
 	members := make([]*Member, len(nodes))
19 19
 	for idx, n := range nodes {
20
-		members[idx] = &Member{
21
-			Name:        n.Name,
22
-			Addr:        net.IP(n.Addr),
23
-			Port:        n.Port,
24
-			Tags:        m.serf.decodeTags(n.Meta),
25
-			Status:      StatusNone,
26
-			ProtocolMin: n.PMin,
27
-			ProtocolMax: n.PMax,
28
-			ProtocolCur: n.PCur,
29
-			DelegateMin: n.DMin,
30
-			DelegateMax: n.DMax,
31
-			DelegateCur: n.DCur,
32
-		}
20
+		members[idx] = m.nodeToMember(n)
33 21
 	}
34 22
 	return m.serf.config.Merge.NotifyMerge(members)
35 23
 }
24
+
25
+func (m *mergeDelegate) NotifyAlive(peer *memberlist.Node) error {
26
+	member := m.nodeToMember(peer)
27
+	return m.serf.config.Merge.NotifyMerge([]*Member{member})
28
+}
29
+
30
+func (m *mergeDelegate) nodeToMember(n *memberlist.Node) *Member {
31
+	return &Member{
32
+		Name:        n.Name,
33
+		Addr:        net.IP(n.Addr),
34
+		Port:        n.Port,
35
+		Tags:        m.serf.decodeTags(n.Meta),
36
+		Status:      StatusNone,
37
+		ProtocolMin: n.PMin,
38
+		ProtocolMax: n.PMax,
39
+		ProtocolCur: n.PCur,
40
+		DelegateMin: n.DMin,
41
+		DelegateMax: n.DMax,
42
+		DelegateCur: n.DCur,
43
+	}
44
+}
36 45
new file mode 100644
... ...
@@ -0,0 +1,89 @@
0
+package serf
1
+
2
+import (
3
+	"bytes"
4
+	"log"
5
+	"time"
6
+
7
+	"github.com/armon/go-metrics"
8
+	"github.com/hashicorp/go-msgpack/codec"
9
+	"github.com/hashicorp/memberlist"
10
+	"github.com/hashicorp/serf/coordinate"
11
+)
12
+
13
+// pingDelegate is notified when memberlist successfully completes a direct ping
14
+// of a peer node. We use this to update our estimated network coordinate, as
15
+// well as cache the coordinate of the peer.
16
+type pingDelegate struct {
17
+	serf *Serf
18
+}
19
+
20
+const (
21
+	// PingVersion is an internal version for the ping message, above the normal
22
+	// versioning we get from the protocol version. This enables small updates
23
+	// to the ping message without a full protocol bump.
24
+	PingVersion = 1
25
+)
26
+
27
+// AckPayload is called to produce a payload to send back in response to a ping
28
+// request.
29
+func (p *pingDelegate) AckPayload() []byte {
30
+	var buf bytes.Buffer
31
+
32
+	// The first byte is the version number, forming a simple header.
33
+	version := []byte{PingVersion}
34
+	buf.Write(version)
35
+
36
+	// The rest of the message is the serialized coordinate.
37
+	enc := codec.NewEncoder(&buf, &codec.MsgpackHandle{})
38
+	if err := enc.Encode(p.serf.coordClient.GetCoordinate()); err != nil {
39
+		log.Printf("[ERR] serf: Failed to encode coordinate: %v\n", err)
40
+	}
41
+	return buf.Bytes()
42
+}
43
+
44
+// NotifyPingComplete is called when this node successfully completes a direct ping
45
+// of a peer node.
46
+func (p *pingDelegate) NotifyPingComplete(other *memberlist.Node, rtt time.Duration, payload []byte) {
47
+	if payload == nil || len(payload) == 0 {
48
+		return
49
+	}
50
+
51
+	// Verify ping version in the header.
52
+	version := payload[0]
53
+	if version != PingVersion {
54
+		log.Printf("[ERR] serf: Unsupported ping version: %v", version)
55
+		return
56
+	}
57
+
58
+	// Process the remainder of the message as a coordinate.
59
+	r := bytes.NewReader(payload[1:])
60
+	dec := codec.NewDecoder(r, &codec.MsgpackHandle{})
61
+	var coord coordinate.Coordinate
62
+	if err := dec.Decode(&coord); err != nil {
63
+		log.Printf("[ERR] serf: Failed to decode coordinate from ping: %v", err)
64
+	}
65
+
66
+	// Apply the update. Since this is a coordinate coming from some place
67
+	// else we harden this and look for dimensionality problems proactively.
68
+	before := p.serf.coordClient.GetCoordinate()
69
+	if before.IsCompatibleWith(&coord) {
70
+		after := p.serf.coordClient.Update(other.Name, &coord, rtt)
71
+
72
+		// Publish some metrics to give us an idea of how much we are
73
+		// adjusting each time we update.
74
+		d := float32(before.DistanceTo(after).Seconds() * 1.0e3)
75
+		metrics.AddSample([]string{"serf", "coordinate", "adjustment-ms"}, d)
76
+
77
+		// Cache the coordinate for the other node, and add our own
78
+		// to the cache as well since it just got updated. This lets
79
+		// users call GetCachedCoordinate with our node name, which is
80
+		// more friendly.
81
+		p.serf.coordCacheLock.Lock()
82
+		p.serf.coordCache[other.Name] = &coord
83
+		p.serf.coordCache[p.serf.config.NodeName] = p.serf.coordClient.GetCoordinate()
84
+		p.serf.coordCacheLock.Unlock()
85
+	} else {
86
+		log.Printf("[ERR] serf: Rejected bad coordinate: %v\n", coord)
87
+	}
88
+}
... ...
@@ -17,6 +17,7 @@ import (
17 17
 	"github.com/armon/go-metrics"
18 18
 	"github.com/hashicorp/go-msgpack/codec"
19 19
 	"github.com/hashicorp/memberlist"
20
+	"github.com/hashicorp/serf/coordinate"
20 21
 )
21 22
 
22 23
 // These are the protocol versions that Serf can _understand_. These are
... ...
@@ -91,6 +92,10 @@ type Serf struct {
91 91
 
92 92
 	snapshotter *Snapshotter
93 93
 	keyManager  *KeyManager
94
+
95
+	coordClient    *coordinate.Client
96
+	coordCache     map[string]*coordinate.Coordinate
97
+	coordCacheLock sync.RWMutex
94 98
 }
95 99
 
96 100
 // SerfState is the state of the Serf instance.
... ...
@@ -209,10 +214,8 @@ type queries struct {
209 209
 }
210 210
 
211 211
 const (
212
-	UserEventSizeLimit     = 512        // Maximum byte size for event name and payload
213
-	QuerySizeLimit         = 1024       // Maximum byte size for query
214
-	QueryResponseSizeLimit = 1024       // Maximum bytes size for response
215
-	snapshotSizeLimit      = 128 * 1024 // Maximum 128 KB snapshot
212
+	UserEventSizeLimit = 512        // Maximum byte size for event name and payload
213
+	snapshotSizeLimit  = 128 * 1024 // Maximum 128 KB snapshot
216 214
 )
217 215
 
218 216
 // Create creates a new Serf instance, starting all the background tasks
... ...
@@ -274,15 +277,25 @@ func Create(conf *Config) (*Serf, error) {
274 274
 	}
275 275
 	conf.EventCh = outCh
276 276
 
277
+	// Set up network coordinate client.
278
+	if !conf.DisableCoordinates {
279
+		serf.coordClient, err = coordinate.NewClient(coordinate.DefaultConfig())
280
+		if err != nil {
281
+			return nil, fmt.Errorf("Failed to create coordinate client: %v", err)
282
+		}
283
+	}
284
+
277 285
 	// Try access the snapshot
278 286
 	var oldClock, oldEventClock, oldQueryClock LamportTime
279 287
 	var prev []*PreviousNode
280 288
 	if conf.SnapshotPath != "" {
281
-		eventCh, snap, err := NewSnapshotter(conf.SnapshotPath,
289
+		eventCh, snap, err := NewSnapshotter(
290
+			conf.SnapshotPath,
282 291
 			snapshotSizeLimit,
283 292
 			conf.RejoinAfterLeave,
284 293
 			serf.logger,
285 294
 			&serf.clock,
295
+			serf.coordClient,
286 296
 			conf.EventCh,
287 297
 			serf.shutdownCh)
288 298
 		if err != nil {
... ...
@@ -298,6 +311,13 @@ func Create(conf *Config) (*Serf, error) {
298 298
 		serf.queryMinTime = oldQueryClock + 1
299 299
 	}
300 300
 
301
+	// Set up the coordinate cache. We do this after we read the snapshot to
302
+	// make sure we get a good initial value from there, if we got one.
303
+	if !conf.DisableCoordinates {
304
+		serf.coordCache = make(map[string]*coordinate.Coordinate)
305
+		serf.coordCache[conf.NodeName] = serf.coordClient.GetCoordinate()
306
+	}
307
+
301 308
 	// Setup the various broadcast queues, which we use to send our own
302 309
 	// custom broadcasts along the gossip channel.
303 310
 	serf.broadcasts = &memberlist.TransmitLimitedQueue{
... ...
@@ -347,17 +367,22 @@ func Create(conf *Config) (*Serf, error) {
347 347
 	conf.MemberlistConfig.DelegateProtocolMax = ProtocolVersionMax
348 348
 	conf.MemberlistConfig.Name = conf.NodeName
349 349
 	conf.MemberlistConfig.ProtocolVersion = ProtocolVersionMap[conf.ProtocolVersion]
350
+	if !conf.DisableCoordinates {
351
+		conf.MemberlistConfig.Ping = &pingDelegate{serf: serf}
352
+	}
350 353
 
351 354
 	// Setup a merge delegate if necessary
352 355
 	if conf.Merge != nil {
353
-		conf.MemberlistConfig.Merge = &mergeDelegate{serf: serf}
356
+		md := &mergeDelegate{serf: serf}
357
+		conf.MemberlistConfig.Merge = md
358
+		conf.MemberlistConfig.Alive = md
354 359
 	}
355 360
 
356 361
 	// Create the underlying memberlist that will manage membership
357 362
 	// and failure detection for the Serf instance.
358 363
 	memberlist, err := memberlist.Create(conf.MemberlistConfig)
359 364
 	if err != nil {
360
-		return nil, err
365
+		return nil, fmt.Errorf("Failed to create memberlist: %v", err)
361 366
 	}
362 367
 
363 368
 	serf.memberlist = memberlist
... ...
@@ -486,8 +511,8 @@ func (s *Serf) Query(name string, payload []byte, params *QueryParam) (*QueryRes
486 486
 	}
487 487
 
488 488
 	// Check the size
489
-	if len(raw) > QuerySizeLimit {
490
-		return nil, fmt.Errorf("query exceeds limit of %d bytes", QuerySizeLimit)
489
+	if len(raw) > s.config.QuerySizeLimit {
490
+		return nil, fmt.Errorf("query exceeds limit of %d bytes", s.config.QuerySizeLimit)
491 491
 	}
492 492
 
493 493
 	// Register QueryResponse to track acks and responses
... ...
@@ -950,6 +975,19 @@ func (s *Serf) handleNodeUpdate(n *memberlist.Node) {
950 950
 	member.Port = n.Port
951 951
 	member.Tags = s.decodeTags(n.Meta)
952 952
 
953
+	// Snag the latest versions. NOTE - the current memberlist code will NOT
954
+	// fire an update event if the metadata (for Serf, tags) stays the same
955
+	// and only the protocol versions change. If we wake any Serf-level
956
+	// protocol changes where we want to get this event under those
957
+	// circumstances, we will need to update memberlist to do a check of
958
+	// versions as well as the metadata.
959
+	member.ProtocolMin = n.PMin
960
+	member.ProtocolMax = n.PMax
961
+	member.ProtocolCur = n.PCur
962
+	member.DelegateMin = n.DMin
963
+	member.DelegateMax = n.DMax
964
+	member.DelegateCur = n.DCur
965
+
953 966
 	// Update some metrics
954 967
 	metrics.IncrCounter([]string{"serf", "member", "update"}, 1)
955 968
 
... ...
@@ -1016,6 +1054,17 @@ func (s *Serf) handleNodeLeaveIntent(leaveMsg *messageLeave) bool {
1016 1016
 		s.failedMembers = removeOldMember(s.failedMembers, member.Name)
1017 1017
 		s.leftMembers = append(s.leftMembers, member)
1018 1018
 
1019
+		// We must push a message indicating the node has now
1020
+		// left to allow higher-level applications to handle the
1021
+		// graceful leave.
1022
+		s.logger.Printf("[INFO] serf: EventMemberLeave (forced): %s %s",
1023
+			member.Member.Name, member.Member.Addr)
1024
+		if s.config.EventCh != nil {
1025
+			s.config.EventCh <- MemberEvent{
1026
+				Type:    EventMemberLeave,
1027
+				Members: []Member{member.Member},
1028
+			}
1029
+		}
1019 1030
 		return true
1020 1031
 	default:
1021 1032
 		return false
... ...
@@ -1384,6 +1433,16 @@ func (s *Serf) reap(old []*memberState, timeout time.Duration) []*memberState {
1384 1384
 		// Delete from members
1385 1385
 		delete(s.members, m.Name)
1386 1386
 
1387
+		// Tell the coordinate client the node has gone away and delete
1388
+		// its cached coordinates.
1389
+		if !s.config.DisableCoordinates {
1390
+			s.coordClient.ForgetNode(m.Name)
1391
+
1392
+			s.coordCacheLock.Lock()
1393
+			delete(s.coordCache, m.Name)
1394
+			s.coordCacheLock.Unlock()
1395
+		}
1396
+
1387 1397
 		// Send an event along
1388 1398
 		s.logger.Printf("[INFO] serf: EventMemberReap: %s", m.Name)
1389 1399
 		if s.config.EventCh != nil {
... ...
@@ -1596,3 +1655,38 @@ func (s *Serf) writeKeyringFile() error {
1596 1596
 	// Success!
1597 1597
 	return nil
1598 1598
 }
1599
+
1600
+// GetCoordinate returns the network coordinate of the local node.
1601
+func (s *Serf) GetCoordinate() (*coordinate.Coordinate, error) {
1602
+	if !s.config.DisableCoordinates {
1603
+		return s.coordClient.GetCoordinate(), nil
1604
+	}
1605
+
1606
+	return nil, fmt.Errorf("Coordinates are disabled")
1607
+}
1608
+
1609
+// GetCachedCoordinate returns the network coordinate for the node with the given
1610
+// name. This will only be valid if DisableCoordinates is set to false.
1611
+func (s *Serf) GetCachedCoordinate(name string) (coord *coordinate.Coordinate, ok bool) {
1612
+	if !s.config.DisableCoordinates {
1613
+		s.coordCacheLock.RLock()
1614
+		defer s.coordCacheLock.RUnlock()
1615
+		if coord, ok = s.coordCache[name]; ok {
1616
+			return coord, true
1617
+		}
1618
+
1619
+		return nil, false
1620
+	}
1621
+
1622
+	return nil, false
1623
+}
1624
+
1625
+// NumNodes returns the number of nodes in the serf cluster, regardless of
1626
+// their health or status.
1627
+func (s *Serf) NumNodes() (numNodes int) {
1628
+	s.memberLock.RLock()
1629
+	numNodes = len(s.members)
1630
+	s.memberLock.RUnlock()
1631
+
1632
+	return numNodes
1633
+}
... ...
@@ -2,6 +2,7 @@ package serf
2 2
 
3 3
 import (
4 4
 	"bufio"
5
+	"encoding/json"
5 6
 	"fmt"
6 7
 	"log"
7 8
 	"math/rand"
... ...
@@ -12,6 +13,7 @@ import (
12 12
 	"time"
13 13
 
14 14
 	"github.com/armon/go-metrics"
15
+	"github.com/hashicorp/serf/coordinate"
15 16
 )
16 17
 
17 18
 /*
... ...
@@ -27,6 +29,7 @@ old events.
27 27
 
28 28
 const flushInterval = 500 * time.Millisecond
29 29
 const clockUpdateInterval = 500 * time.Millisecond
30
+const coordinateUpdateInterval = 60 * time.Second
30 31
 const tmpExt = ".compact"
31 32
 
32 33
 // Snapshotter is responsible for ingesting events and persisting
... ...
@@ -34,6 +37,7 @@ const tmpExt = ".compact"
34 34
 type Snapshotter struct {
35 35
 	aliveNodes       map[string]string
36 36
 	clock            *LamportClock
37
+	coordClient      *coordinate.Client
37 38
 	fh               *os.File
38 39
 	buffered         *bufio.Writer
39 40
 	inCh             <-chan Event
... ...
@@ -74,6 +78,7 @@ func NewSnapshotter(path string,
74 74
 	rejoinAfterLeave bool,
75 75
 	logger *log.Logger,
76 76
 	clock *LamportClock,
77
+	coordClient *coordinate.Client,
77 78
 	outCh chan<- Event,
78 79
 	shutdownCh <-chan struct{}) (chan<- Event, *Snapshotter, error) {
79 80
 	inCh := make(chan Event, 1024)
... ...
@@ -96,6 +101,7 @@ func NewSnapshotter(path string,
96 96
 	snap := &Snapshotter{
97 97
 		aliveNodes:       make(map[string]string),
98 98
 		clock:            clock,
99
+		coordClient:      coordClient,
99 100
 		fh:               fh,
100 101
 		buffered:         bufio.NewWriter(fh),
101 102
 		inCh:             inCh,
... ...
@@ -171,6 +177,12 @@ func (s *Snapshotter) Leave() {
171 171
 
172 172
 // stream is a long running routine that is used to handle events
173 173
 func (s *Snapshotter) stream() {
174
+	clockTicker := time.NewTicker(clockUpdateInterval)
175
+	defer clockTicker.Stop()
176
+
177
+	coordinateTicker := time.NewTicker(coordinateUpdateInterval)
178
+	defer coordinateTicker.Stop()
179
+
174 180
 	for {
175 181
 		select {
176 182
 		case <-s.leaveCh:
... ...
@@ -209,9 +221,12 @@ func (s *Snapshotter) stream() {
209 209
 				s.logger.Printf("[ERR] serf: Unknown event to snapshot: %#v", e)
210 210
 			}
211 211
 
212
-		case <-time.After(clockUpdateInterval):
212
+		case <-clockTicker.C:
213 213
 			s.updateClock()
214 214
 
215
+		case <-coordinateTicker.C:
216
+			s.updateCoordinate()
217
+
215 218
 		case <-s.shutdownCh:
216 219
 			if err := s.buffered.Flush(); err != nil {
217 220
 				s.logger.Printf("[ERR] serf: failed to flush snapshot: %v", err)
... ...
@@ -258,6 +273,20 @@ func (s *Snapshotter) updateClock() {
258 258
 	}
259 259
 }
260 260
 
261
+// updateCoordinate is called periodically to write out the current local
262
+// coordinate. It's safe to call this if coordinates aren't enabled (nil
263
+// client) and it will be a no-op.
264
+func (s *Snapshotter) updateCoordinate() {
265
+	if s.coordClient != nil {
266
+		encoded, err := json.Marshal(s.coordClient.GetCoordinate())
267
+		if err != nil {
268
+			s.logger.Printf("[ERR] serf: Failed to encode coordinate: %v", err)
269
+		} else {
270
+			s.tryAppend(fmt.Sprintf("coordinate: %s\n", encoded))
271
+		}
272
+	}
273
+}
274
+
261 275
 // processUserEvent is used to handle a single user event
262 276
 func (s *Snapshotter) processUserEvent(e UserEvent) {
263 277
 	// Ignore old clocks
... ...
@@ -362,6 +391,23 @@ func (s *Snapshotter) compact() error {
362 362
 	}
363 363
 	offset += int64(n)
364 364
 
365
+	// Write out the coordinate.
366
+	if s.coordClient != nil {
367
+		encoded, err := json.Marshal(s.coordClient.GetCoordinate())
368
+		if err != nil {
369
+			fh.Close()
370
+			return err
371
+		}
372
+
373
+		line = fmt.Sprintf("coordinate: %s\n", encoded)
374
+		n, err = buf.WriteString(line)
375
+		if err != nil {
376
+			fh.Close()
377
+			return err
378
+		}
379
+		offset += int64(n)
380
+	}
381
+
365 382
 	// Flush the new snapshot
366 383
 	err = buf.Flush()
367 384
 	fh.Close()
... ...
@@ -473,6 +519,20 @@ func (s *Snapshotter) replay() error {
473 473
 			}
474 474
 			s.lastQueryClock = LamportTime(timeInt)
475 475
 
476
+		} else if strings.HasPrefix(line, "coordinate: ") {
477
+			if s.coordClient == nil {
478
+				s.logger.Printf("[WARN] serf: Ignoring snapshot coordinates since they are disabled")
479
+				continue
480
+			}
481
+
482
+			coordStr := strings.TrimPrefix(line, "coordinate: ")
483
+			var coord coordinate.Coordinate
484
+			err := json.Unmarshal([]byte(coordStr), &coord)
485
+			if err != nil {
486
+				s.logger.Printf("[WARN] serf: Failed to decode coordinate: %v", err)
487
+				continue
488
+			}
489
+			s.coordClient.SetCoordinate(&coord)
476 490
 		} else if line == "leave" {
477 491
 			// Ignore a leave if we plan on re-joining
478 492
 			if s.rejoinAfterLeave {
479 493
deleted file mode 100644
... ...
@@ -1,10 +0,0 @@
1
-# Proprietary License
2
-
3
-This license is temporary while a more official one is drafted. However,
4
-this should make it clear:
5
-
6
-* The text contents of this website are MPL 2.0 licensed.
7
-
8
-* The design contents of this website are proprietary and may not be reproduced
9
-  or reused in any way other than to run the Serf website locally. The license
10
-  for the design is owned solely by HashiCorp, Inc.
11 1
new file mode 100644
... ...
@@ -0,0 +1,10 @@
0
+# Proprietary License
1
+
2
+This license is temporary while a more official one is drafted. However,
3
+this should make it clear:
4
+
5
+* The text contents of this website are MPL 2.0 licensed.
6
+
7
+* The design contents of this website are proprietary and may not be reproduced
8
+  or reused in any way other than to run the Serf website locally. The license
9
+  for the design is owned solely by HashiCorp, Inc.