GitList

Browse code

Avoid alignment of reapNetwork and tableEntries

Make sure that the network is garbage collected after
the entries. Entries to be deleted requires that the network
is present.

Signed-off-by: Flavio Crisciani <flavio.crisciani@docker.com>

Flavio Crisciani authored on 2017/09/23 02:23:07
Showing 3 changed files

libnetwork/networkdb/cluster.go index a823148..af6f5d9 100644
libnetwork/networkdb/delegate.go index bcddc90..28919cf 100644
libnetwork/networkdb/networkdb.go index caa3cfc..afdf32e 100644

libnetwork/networkdb/cluster.go

History View file @ a4e64d0

@@ -17,11 +17,15 @@ import (
+                     )
                      const (
                     -	reapInterval     = 30 * time.Minute
                     -	reapPeriod       = 5 * time.Second
                     -	retryInterval    = 1 * time.Second
                     -	nodeReapInterval = 24 * time.Hour
                     -	nodeReapPeriod   = 2 * time.Hour
                     +	// The garbage collection logic for entries leverage the presence of the network.
                     +	// For this reason the expiration time of the network is put slightly higher than the entry expiration so that
                     +	// there is at least 5 extra cycle to make sure that all the entries are properly deleted before deleting the network.
                     +	reapEntryInterval   = 30 * time.Minute
                     +	reapNetworkInterval = reapEntryInterval + 5*reapPeriod
                     +	reapPeriod          = 5 * time.Second
                     +	retryInterval       = 1 * time.Second
                     +	nodeReapInterval    = 24 * time.Hour
                     +	nodeReapPeriod      = 2 * time.Hour
+                     )
                      type logWriter struct{}
@@ -300,8 +304,9 @@ func (nDB *NetworkDB) reconnectNode() {
                      // the reaper runs. NOTE nDB.reapTableEntries updates the reapTime with a readlock. This
                      // is safe as long as no other concurrent path touches the reapTime field.
                      func (nDB *NetworkDB) reapState() {
                     -	nDB.reapNetworks()
                     +	// The reapTableEntries leverage the presence of the network so garbage collect entries first
                      	nDB.reapTableEntries()
                     +	nDB.reapNetworks()
+                     }
                      func (nDB *NetworkDB) reapNetworks() {
@@ -414,8 +419,8 @@ func (nDB *NetworkDB) gossip() {
                      		// Collect stats and print the queue info, note this code is here also to have a view of the queues empty
                      		network.qMessagesSent += len(msgs)
                      		if printStats {
                     -			logrus.Infof("NetworkDB stats - net:%s Entries:%d Queue qLen:%d netPeers:%d netMsg/s:%d",
                     -				nid, network.entriesNumber, broadcastQ.NumQueued(), broadcastQ.NumNodes(),
                     +			logrus.Infof("NetworkDB stats - netID:%s leaving:%t netPeers:%d entries:%d Queue qLen:%d netMsg/s:%d",
                     +				nid, network.leaving, broadcastQ.NumNodes(), network.entriesNumber, broadcastQ.NumQueued(),
                      				network.qMessagesSent/int((nDB.config.StatsPrintPeriod/time.Second)))
                      			network.qMessagesSent = 0
+                     		}

libnetwork/networkdb/delegate.go

History View file @ a4e64d0

@@ -165,7 +165,7 @@ func (nDB *NetworkDB) handleNetworkEvent(nEvent *NetworkEvent) bool {
                      		n.ltime = nEvent.LTime
                      		n.leaving = nEvent.Type == NetworkEventTypeLeave
                      		if n.leaving {
                     -			n.reapTime = reapInterval
                     +			n.reapTime = reapNetworkInterval
                      			// The remote node is leaving the network, but not the gossip cluster.
                      			// Mark all its entries in deleted state, this will guarantee that
@@ -242,7 +242,7 @@ func (nDB *NetworkDB) handleTableEvent(tEvent *TableEvent) bool {
                      	// field. If that is not the case, this can be a BUG
                      	if e.deleting && e.reapTime == 0 {
                      		logrus.Warnf("handleTableEvent object %+v has a 0 reapTime, is the cluster running the same docker engine version?", tEvent)
                     -		e.reapTime = reapInterval
                     +		e.reapTime = reapEntryInterval
+                     	}
                      	nDB.Lock()

libnetwork/networkdb/networkdb.go

History View file @ a4e64d0

@@ -405,7 +405,7 @@ func (nDB *NetworkDB) DeleteEntry(tname, nid, key string) error {
                      		node:     nDB.config.NodeName,
                      		value:    value,
                      		deleting: true,
                     -		reapTime: reapInterval,
                     +		reapTime: reapEntryInterval,
+                     	}
                      	if err := nDB.sendTableEvent(TableEventTypeDelete, nid, tname, key, entry); err != nil {
@@ -478,7 +478,7 @@ func (nDB *NetworkDB) deleteNodeNetworkEntries(nid, node string) {
                      				node:     oldEntry.node,
                      				value:    oldEntry.value,
                      				deleting: true,
                     -				reapTime: reapInterval,
                     +				reapTime: reapEntryInterval,
+                     			}
                      			// we arrived at this point in 2 cases:
@@ -619,8 +619,9 @@ func (nDB *NetworkDB) LeaveNetwork(nid string) error {
                      		return fmt.Errorf("could not find network %s while trying to leave", nid)
+                     	}
                     +	logrus.Debugf("%s: leaving network %s", nDB.config.NodeName, nid)
                      	n.ltime = ltime
                     -	n.reapTime = reapInterval
                     +	n.reapTime = reapNetworkInterval
                      	n.leaving = true
                      	return nil
+                     }