GitList

Browse code

Merge pull request #2134 from dani-docker/esc-532

Adding a recovery mechanism for a split gossip cluster

Flavio Crisciani authored on 2018/04/24 05:14:27
Showing 3 changed files

libnetwork/networkdb/cluster.go index 198cace..bd48fb9 100644
libnetwork/networkdb/delegate.go index 1a57de3..9a379fe 100644
libnetwork/networkdb/networkdb.go index d1fa3b8..c433913 100644

@@ -2,6 +2,7 @@ package networkdb
                      import (
                      	"bytes"
                     +	"context"
                      	"crypto/rand"
                      	"encoding/hex"
                      	"fmt"
@@ -17,10 +18,12 @@ import (
+                     )
                      const (
                     -	reapPeriod       = 5 * time.Second
                     -	retryInterval    = 1 * time.Second
                     -	nodeReapInterval = 24 * time.Hour
                     -	nodeReapPeriod   = 2 * time.Hour
                     +	reapPeriod            = 5 * time.Second
                     +	rejoinClusterDuration = 10 * time.Second
                     +	rejoinInterval        = 60 * time.Second
                     +	retryInterval         = 1 * time.Second
                     +	nodeReapInterval      = 24 * time.Hour
                     +	nodeReapPeriod        = 2 * time.Hour
+                     )
                      type logWriter struct{}
@@ -154,7 +157,7 @@ func (nDB *NetworkDB) clusterInit() error {
                      		return fmt.Errorf("failed to create memberlist: %v", err)
+                     	}
                     -	nDB.stopCh = make(chan struct{})
                     +	nDB.ctx, nDB.cancelCtx = context.WithCancel(context.Background())
                      	nDB.memberlist = mlist
                      	for _, trigger := range []struct {
@@ -166,16 +169,17 @@ func (nDB *NetworkDB) clusterInit() error {
                      		{config.PushPullInterval, nDB.bulkSyncTables},
                      		{retryInterval, nDB.reconnectNode},
                      		{nodeReapPeriod, nDB.reapDeadNode},
                     +		{rejoinInterval, nDB.rejoinClusterBootStrap},
                      	} {
                      		t := time.NewTicker(trigger.interval)
                     -		go nDB.triggerFunc(trigger.interval, t.C, nDB.stopCh, trigger.fn)
                     +		go nDB.triggerFunc(trigger.interval, t.C, trigger.fn)
                      		nDB.tickers = append(nDB.tickers, t)
+                     	}
                      	return nil
+                     }
                     -func (nDB *NetworkDB) retryJoin(members []string, stop <-chan struct{}) {
                     +func (nDB *NetworkDB) retryJoin(ctx context.Context, members []string) {
                      	t := time.NewTicker(retryInterval)
                      	defer t.Stop()
@@ -191,7 +195,7 @@ func (nDB *NetworkDB) retryJoin(members []string, stop <-chan struct{}) {
                      				continue
+                     			}
                      			return
                     -		case <-stop:
                     +		case <-ctx.Done():
                      			return
+                     		}
+                     	}
@@ -202,8 +206,8 @@ func (nDB *NetworkDB) clusterJoin(members []string) error {
                      	mlist := nDB.memberlist
                      	if _, err := mlist.Join(members); err != nil {
                     -		// In case of failure, keep retrying join until it succeeds or the cluster is shutdown.
                     -		go nDB.retryJoin(members, nDB.stopCh)
                     +		// In case of failure, we no longer need to explicitly call retryJoin.
                     +		// rejoinClusterBootStrap, which runs every minute, will retryJoin for 10sec
                      		return fmt.Errorf("could not join node to memberlist: %v", err)
+                     	}
@@ -225,7 +229,8 @@ func (nDB *NetworkDB) clusterLeave() error {
                      		return err
+                     	}
                     -	close(nDB.stopCh)
                     +	// cancel the context
                     +	nDB.cancelCtx()
                      	for _, t := range nDB.tickers {
                      		t.Stop()
@@ -234,19 +239,19 @@ func (nDB *NetworkDB) clusterLeave() error {
                      	return mlist.Shutdown()
+                     }
                     -func (nDB *NetworkDB) triggerFunc(stagger time.Duration, C <-chan time.Time, stop <-chan struct{}, f func()) {
                     +func (nDB *NetworkDB) triggerFunc(stagger time.Duration, C <-chan time.Time, f func()) {
                      	// Use a random stagger to avoid syncronizing
                      	randStagger := time.Duration(uint64(rnd.Int63()) % uint64(stagger))
                      	select {
                      	case <-time.After(randStagger):
                     -	case <-stop:
                     +	case <-nDB.ctx.Done():
                      		return
+                     	}
                      	for {
                      		select {
                      		case <-C:
                      			f()
                     -		case <-stop:
                     +		case <-nDB.ctx.Done():
                      			return
+                     		}
+                     	}
@@ -270,6 +275,35 @@ func (nDB *NetworkDB) reapDeadNode() {
+                     	}
+                     }
                     +// rejoinClusterBootStrap is called periodically to check if all bootStrap nodes are active in the cluster,
                     +// if not, call the cluster join to merge 2 separate clusters that are formed when all managers
                     +// stopped/started at the same time
                     +func (nDB *NetworkDB) rejoinClusterBootStrap() {
                     +	nDB.RLock()
                     +	if len(nDB.bootStrapIP) == 0 {
                     +		nDB.RUnlock()
                     +		return
                     +	}
+                    +
                     +	bootStrapIPs := make([]string, 0, len(nDB.bootStrapIP))
                     +	for _, bootIP := range nDB.bootStrapIP {
                     +		for _, node := range nDB.nodes {
                     +			if node.Addr.Equal(bootIP) {
                     +				// One of the bootstrap nodes is part of the cluster, return
                     +				nDB.RUnlock()
                     +				return
                     +			}
                     +		}
                     +		bootStrapIPs = append(bootStrapIPs, bootIP.String())
                     +	}
                     +	nDB.RUnlock()
                     +	// None of the bootStrap nodes are in the cluster, call memberlist join
                     +	logrus.Debugf("rejoinClusterBootStrap, calling cluster join with bootStrap %v", bootStrapIPs)
                     +	ctx, cancel := context.WithTimeout(nDB.ctx, rejoinClusterDuration)
                     +	defer cancel()
                     +	nDB.retryJoin(ctx, bootStrapIPs)
                     +}
+                    +
                      func (nDB *NetworkDB) reconnectNode() {
                      	nDB.RLock()
                      	if len(nDB.failedNodes) == 0 {

libnetwork/networkdb/delegate.go

History View file @ 65e8971

@@ -38,16 +38,11 @@ func (nDB *NetworkDB) handleNodeEvent(nEvent *NodeEvent) bool {
                      	// If we are here means that the event is fresher and the node is known. Update the laport time
                      	n.ltime = nEvent.LTime
                     -	// If it is a node leave event for a manager and this is the only manager we
                     -	// know of we want the reconnect logic to kick in. In a single manager
                     -	// cluster manager's gossip can't be bootstrapped unless some other node
                     -	// connects to it.
                     -	if len(nDB.bootStrapIP) == 1 && nEvent.Type == NodeEventTypeLeave {
                     -		for _, ip := range nDB.bootStrapIP {
                     -			if ip.Equal(n.Addr) {
                     -				return true
                     -			}
                     -		}
                     +	// If the node is not known from memberlist we cannot process save any state of it else if it actually
                     +	// dies we won't receive any notification and we will remain stuck with it
                     +	if _, ok := nDB.nodes[nEvent.NodeName]; !ok {
                     +		logrus.Error("node: %s is unknown to memberlist", nEvent.NodeName)
                     +		return false
+                     	}
                      	switch nEvent.Type {

libnetwork/networkdb/networkdb.go

History View file @ 65e8971

@@ -3,6 +3,7 @@ package networkdb
                      //go:generate protoc -I.:../vendor/github.com/gogo/protobuf --gogo_out=import_path=github.com/docker/libnetwork/networkdb,Mgogoproto/gogo.proto=github.com/gogo/protobuf/gogoproto:. networkdb.proto
                      import (
                     +	"context"
                      	"fmt"
                      	"net"
                      	"os"
@@ -77,9 +78,10 @@ type NetworkDB struct {
                      	// Broadcast queue for node event gossip.
                      	nodeBroadcasts *memberlist.TransmitLimitedQueue
                     -	// A central stop channel to stop all go routines running on
                     +	// A central context to stop all go routines running on
                      	// behalf of the NetworkDB instance.
                     -	stopCh chan struct{}
                     +	ctx       context.Context
                     +	cancelCtx context.CancelFunc
                      	// A central broadcaster for all local watchers watching table
                      	// events.