Make sure that the network is garbage collected after
the entries. Entries to be deleted requires that the network
is present.
Signed-off-by: Flavio Crisciani <flavio.crisciani@docker.com>
| ... | ... |
@@ -17,11 +17,15 @@ import ( |
| 17 | 17 |
) |
| 18 | 18 |
|
| 19 | 19 |
const ( |
| 20 |
- reapInterval = 30 * time.Minute |
|
| 21 |
- reapPeriod = 5 * time.Second |
|
| 22 |
- retryInterval = 1 * time.Second |
|
| 23 |
- nodeReapInterval = 24 * time.Hour |
|
| 24 |
- nodeReapPeriod = 2 * time.Hour |
|
| 20 |
+ // The garbage collection logic for entries leverage the presence of the network. |
|
| 21 |
+ // For this reason the expiration time of the network is put slightly higher than the entry expiration so that |
|
| 22 |
+ // there is at least 5 extra cycle to make sure that all the entries are properly deleted before deleting the network. |
|
| 23 |
+ reapEntryInterval = 30 * time.Minute |
|
| 24 |
+ reapNetworkInterval = reapEntryInterval + 5*reapPeriod |
|
| 25 |
+ reapPeriod = 5 * time.Second |
|
| 26 |
+ retryInterval = 1 * time.Second |
|
| 27 |
+ nodeReapInterval = 24 * time.Hour |
|
| 28 |
+ nodeReapPeriod = 2 * time.Hour |
|
| 25 | 29 |
) |
| 26 | 30 |
|
| 27 | 31 |
type logWriter struct{}
|
| ... | ... |
@@ -300,8 +304,9 @@ func (nDB *NetworkDB) reconnectNode() {
|
| 300 | 300 |
// the reaper runs. NOTE nDB.reapTableEntries updates the reapTime with a readlock. This |
| 301 | 301 |
// is safe as long as no other concurrent path touches the reapTime field. |
| 302 | 302 |
func (nDB *NetworkDB) reapState() {
|
| 303 |
- nDB.reapNetworks() |
|
| 303 |
+ // The reapTableEntries leverage the presence of the network so garbage collect entries first |
|
| 304 | 304 |
nDB.reapTableEntries() |
| 305 |
+ nDB.reapNetworks() |
|
| 305 | 306 |
} |
| 306 | 307 |
|
| 307 | 308 |
func (nDB *NetworkDB) reapNetworks() {
|
| ... | ... |
@@ -414,8 +419,8 @@ func (nDB *NetworkDB) gossip() {
|
| 414 | 414 |
// Collect stats and print the queue info, note this code is here also to have a view of the queues empty |
| 415 | 415 |
network.qMessagesSent += len(msgs) |
| 416 | 416 |
if printStats {
|
| 417 |
- logrus.Infof("NetworkDB stats - net:%s Entries:%d Queue qLen:%d netPeers:%d netMsg/s:%d",
|
|
| 418 |
- nid, network.entriesNumber, broadcastQ.NumQueued(), broadcastQ.NumNodes(), |
|
| 417 |
+ logrus.Infof("NetworkDB stats - netID:%s leaving:%t netPeers:%d entries:%d Queue qLen:%d netMsg/s:%d",
|
|
| 418 |
+ nid, network.leaving, broadcastQ.NumNodes(), network.entriesNumber, broadcastQ.NumQueued(), |
|
| 419 | 419 |
network.qMessagesSent/int((nDB.config.StatsPrintPeriod/time.Second))) |
| 420 | 420 |
network.qMessagesSent = 0 |
| 421 | 421 |
} |
| ... | ... |
@@ -165,7 +165,7 @@ func (nDB *NetworkDB) handleNetworkEvent(nEvent *NetworkEvent) bool {
|
| 165 | 165 |
n.ltime = nEvent.LTime |
| 166 | 166 |
n.leaving = nEvent.Type == NetworkEventTypeLeave |
| 167 | 167 |
if n.leaving {
|
| 168 |
- n.reapTime = reapInterval |
|
| 168 |
+ n.reapTime = reapNetworkInterval |
|
| 169 | 169 |
|
| 170 | 170 |
// The remote node is leaving the network, but not the gossip cluster. |
| 171 | 171 |
// Mark all its entries in deleted state, this will guarantee that |
| ... | ... |
@@ -242,7 +242,7 @@ func (nDB *NetworkDB) handleTableEvent(tEvent *TableEvent) bool {
|
| 242 | 242 |
// field. If that is not the case, this can be a BUG |
| 243 | 243 |
if e.deleting && e.reapTime == 0 {
|
| 244 | 244 |
logrus.Warnf("handleTableEvent object %+v has a 0 reapTime, is the cluster running the same docker engine version?", tEvent)
|
| 245 |
- e.reapTime = reapInterval |
|
| 245 |
+ e.reapTime = reapEntryInterval |
|
| 246 | 246 |
} |
| 247 | 247 |
|
| 248 | 248 |
nDB.Lock() |
| ... | ... |
@@ -405,7 +405,7 @@ func (nDB *NetworkDB) DeleteEntry(tname, nid, key string) error {
|
| 405 | 405 |
node: nDB.config.NodeName, |
| 406 | 406 |
value: value, |
| 407 | 407 |
deleting: true, |
| 408 |
- reapTime: reapInterval, |
|
| 408 |
+ reapTime: reapEntryInterval, |
|
| 409 | 409 |
} |
| 410 | 410 |
|
| 411 | 411 |
if err := nDB.sendTableEvent(TableEventTypeDelete, nid, tname, key, entry); err != nil {
|
| ... | ... |
@@ -478,7 +478,7 @@ func (nDB *NetworkDB) deleteNodeNetworkEntries(nid, node string) {
|
| 478 | 478 |
node: oldEntry.node, |
| 479 | 479 |
value: oldEntry.value, |
| 480 | 480 |
deleting: true, |
| 481 |
- reapTime: reapInterval, |
|
| 481 |
+ reapTime: reapEntryInterval, |
|
| 482 | 482 |
} |
| 483 | 483 |
|
| 484 | 484 |
// we arrived at this point in 2 cases: |
| ... | ... |
@@ -619,8 +619,9 @@ func (nDB *NetworkDB) LeaveNetwork(nid string) error {
|
| 619 | 619 |
return fmt.Errorf("could not find network %s while trying to leave", nid)
|
| 620 | 620 |
} |
| 621 | 621 |
|
| 622 |
+ logrus.Debugf("%s: leaving network %s", nDB.config.NodeName, nid)
|
|
| 622 | 623 |
n.ltime = ltime |
| 623 |
- n.reapTime = reapInterval |
|
| 624 |
+ n.reapTime = reapNetworkInterval |
|
| 624 | 625 |
n.leaving = true |
| 625 | 626 |
return nil |
| 626 | 627 |
} |