Browse code

Make sure timers are stopped after use.

`time.After` keeps a timer running until the specified duration is
completed. It also allocates a new timer on each call. This can wind up
leaving lots of uneccessary timers running in the background that are
not needed and consume resources.

Instead of `time.After`, use `time.NewTimer` so the timer can actually
be stopped.
In some of these cases it's not a big deal since the duraiton is really
short, but in others it is much worse.

Signed-off-by: Brian Goff <cpuguy83@gmail.com>

Brian Goff authored on 2019/01/10 03:24:03
Showing 15 changed files
... ...
@@ -165,7 +165,9 @@ func (s *systemRouter) getEvents(ctx context.Context, w http.ResponseWriter, r *
165 165
 
166 166
 		if !onlyPastEvents {
167 167
 			dur := until.Sub(now)
168
-			timeout = time.After(dur)
168
+			timer := time.NewTimer(dur)
169
+			defer timer.Stop()
170
+			timeout = timer.C
169 171
 		}
170 172
 	}
171 173
 
... ...
@@ -378,10 +378,14 @@ func shutdownDaemon(d *daemon.Daemon) {
378 378
 		logrus.Debug("Clean shutdown succeeded")
379 379
 		return
380 380
 	}
381
+
382
+	timeout := time.NewTimer(time.Duration(shutdownTimeout) * time.Second)
383
+	defer timeout.Stop()
384
+
381 385
 	select {
382 386
 	case <-ch:
383 387
 		logrus.Debug("Clean shutdown succeeded")
384
-	case <-time.After(time.Duration(shutdownTimeout) * time.Second):
388
+	case <-timeout.C:
385 389
 		logrus.Error("Force shutdown daemon")
386 390
 	}
387 391
 }
... ...
@@ -33,8 +33,11 @@ func (container *Container) Reset(lock bool) {
33 33
 				container.LogCopier.Wait()
34 34
 				close(exit)
35 35
 			}()
36
+
37
+			timer := time.NewTimer(loggerCloseTimeout)
38
+			defer timer.Stop()
36 39
 			select {
37
-			case <-time.After(loggerCloseTimeout):
40
+			case <-timer.C:
38 41
 				logrus.Warn("Logger didn't exit in time: logs may be truncated")
39 42
 			case <-exit:
40 43
 			}
... ...
@@ -186,8 +186,11 @@ func (c *Cluster) Start() error {
186 186
 	}
187 187
 	c.nr = nr
188 188
 
189
+	timer := time.NewTimer(swarmConnectTimeout)
190
+	defer timer.Stop()
191
+
189 192
 	select {
190
-	case <-time.After(swarmConnectTimeout):
193
+	case <-timer.C:
191 194
 		logrus.Error("swarm component could not be started before timeout was reached")
192 195
 	case err := <-nr.Ready():
193 196
 		if err != nil {
... ...
@@ -194,8 +194,11 @@ func (c *Cluster) Join(req types.JoinRequest) error {
194 194
 	c.nr = nr
195 195
 	c.mu.Unlock()
196 196
 
197
+	timeout := time.NewTimer(swarmConnectTimeout)
198
+	defer timeout.Stop()
199
+
197 200
 	select {
198
-	case <-time.After(swarmConnectTimeout):
201
+	case <-timeout.C:
199 202
 		return errSwarmJoinTimeoutReached
200 203
 	case err := <-nr.Ready():
201 204
 		if err != nil {
... ...
@@ -42,6 +42,7 @@ import (
42 42
 	"github.com/moby/buildkit/util/resolver"
43 43
 	"github.com/moby/buildkit/util/tracing"
44 44
 	"github.com/sirupsen/logrus"
45
+
45 46
 	// register graph drivers
46 47
 	_ "github.com/docker/docker/daemon/graphdriver/register"
47 48
 	"github.com/docker/docker/daemon/stats"
... ...
@@ -479,12 +480,14 @@ func (daemon *Daemon) restore() error {
479 479
 			// ignore errors here as this is a best effort to wait for children to be
480 480
 			//   running before we try to start the container
481 481
 			children := daemon.children(c)
482
-			timeout := time.After(5 * time.Second)
482
+			timeout := time.NewTimer(5 * time.Second)
483
+			defer timeout.Stop()
484
+
483 485
 			for _, child := range children {
484 486
 				if notifier, exists := restartContainers[child]; exists {
485 487
 					select {
486 488
 					case <-notifier:
487
-					case <-timeout:
489
+					case <-timeout.C:
488 490
 					}
489 491
 				}
490 492
 			}
... ...
@@ -602,6 +605,7 @@ func (daemon *Daemon) waitForNetworks(c *container.Container) {
602 602
 	if daemon.discoveryWatcher == nil {
603 603
 		return
604 604
 	}
605
+
605 606
 	// Make sure if the container has a network that requires discovery that the discovery service is available before starting
606 607
 	for netName := range c.NetworkSettings.Networks {
607 608
 		// If we get `ErrNoSuchNetwork` here, we can assume that it is due to discovery not being ready
... ...
@@ -610,13 +614,19 @@ func (daemon *Daemon) waitForNetworks(c *container.Container) {
610 610
 			if _, ok := err.(libnetwork.ErrNoSuchNetwork); !ok {
611 611
 				continue
612 612
 			}
613
+
613 614
 			// use a longish timeout here due to some slowdowns in libnetwork if the k/v store is on anything other than --net=host
614 615
 			// FIXME: why is this slow???
616
+			dur := 60 * time.Second
617
+			timer := time.NewTimer(dur)
618
+
615 619
 			logrus.Debugf("Container %s waiting for network to be ready", c.Name)
616 620
 			select {
617 621
 			case <-daemon.discoveryWatcher.ReadyCh():
618
-			case <-time.After(60 * time.Second):
622
+			case <-timer.C:
619 623
 			}
624
+			timer.Stop()
625
+
620 626
 			return
621 627
 		}
622 628
 	}
... ...
@@ -666,10 +676,14 @@ func (daemon *Daemon) DaemonLeavesCluster() {
666 666
 	// This is called also on graceful daemon shutdown. We need to
667 667
 	// wait, because the ingress release has to happen before the
668 668
 	// network controller is stopped.
669
+
669 670
 	if done, err := daemon.ReleaseIngress(); err == nil {
671
+		timeout := time.NewTimer(5 * time.Second)
672
+		defer timeout.Stop()
673
+
670 674
 		select {
671 675
 		case <-done:
672
-		case <-time.After(5 * time.Second):
676
+		case <-timeout.C:
673 677
 			logrus.Warn("timeout while waiting for ingress network removal")
674 678
 		}
675 679
 	} else {
... ...
@@ -148,12 +148,14 @@ func (d *daemonDiscoveryReloader) initHeartbeat(address string) error {
148 148
 	// Setup a short ticker until the first heartbeat has succeeded
149 149
 	t := time.NewTicker(500 * time.Millisecond)
150 150
 	defer t.Stop()
151
+
151 152
 	// timeout makes sure that after a period of time we stop being so aggressive trying to reach the discovery service
152
-	timeout := time.After(60 * time.Second)
153
+	timeout := time.NewTimer(60 * time.Second)
154
+	defer timeout.Stop()
153 155
 
154 156
 	for {
155 157
 		select {
156
-		case <-timeout:
158
+		case <-timeout.C:
157 159
 			return errors.New("timeout waiting for initial discovery")
158 160
 		case <-d.term:
159 161
 			return errors.New("terminated")
... ...
@@ -22,7 +22,7 @@ import (
22 22
 )
23 23
 
24 24
 // Seconds to wait after sending TERM before trying KILL
25
-const termProcessTimeout = 10
25
+const termProcessTimeout = 10 * time.Second
26 26
 
27 27
 func (d *Daemon) registerExecCommand(container *container.Container, config *exec.Config) {
28 28
 	// Storing execs in container in order to kill them gracefully whenever the container is stopped or removed.
... ...
@@ -265,9 +265,13 @@ func (d *Daemon) ContainerExecStart(ctx context.Context, name string, stdin io.R
265 265
 	case <-ctx.Done():
266 266
 		logrus.Debugf("Sending TERM signal to process %v in container %v", name, c.ID)
267 267
 		d.containerd.SignalProcess(ctx, c.ID, name, int(signal.SignalMap["TERM"]))
268
+
269
+		timeout := time.NewTimer(termProcessTimeout)
270
+		defer timeout.Stop()
271
+
268 272
 		select {
269
-		case <-time.After(termProcessTimeout * time.Second):
270
-			logrus.Infof("Container %v, process %v failed to exit within %d seconds of signal TERM - using the force", c.ID, name, termProcessTimeout)
273
+		case <-timeout.C:
274
+			logrus.Infof("Container %v, process %v failed to exit within %v of signal TERM - using the force", c.ID, name, termProcessTimeout)
271 275
 			d.containerd.SignalProcess(ctx, c.ID, name, int(signal.SignalMap["KILL"]))
272 276
 		case <-attachErr:
273 277
 			// TERM signal worked
... ...
@@ -187,12 +187,18 @@ func handleProbeResult(d *Daemon, c *container.Container, result *types.Healthch
187 187
 func monitor(d *Daemon, c *container.Container, stop chan struct{}, probe probe) {
188 188
 	probeTimeout := timeoutWithDefault(c.Config.Healthcheck.Timeout, defaultProbeTimeout)
189 189
 	probeInterval := timeoutWithDefault(c.Config.Healthcheck.Interval, defaultProbeInterval)
190
+
191
+	intervalTimer := time.NewTimer(probeInterval)
192
+	defer intervalTimer.Stop()
193
+
190 194
 	for {
195
+		intervalTimer.Reset(probeInterval)
196
+
191 197
 		select {
192 198
 		case <-stop:
193 199
 			logrus.Debugf("Stop healthcheck monitoring for container %s (received while idle)", c.ID)
194 200
 			return
195
-		case <-time.After(probeInterval):
201
+		case <-intervalTimer.C:
196 202
 			logrus.Debugf("Running health check for container %s ...", c.ID)
197 203
 			startTime := time.Now()
198 204
 			ctx, cancelProbe := context.WithTimeout(context.Background(), probeTimeout)
... ...
@@ -38,13 +38,16 @@ func (daemon *Daemon) ContainerExecResize(name string, height, width int) error
38 38
 	if err != nil {
39 39
 		return err
40 40
 	}
41
+
41 42
 	// TODO: the timeout is hardcoded here, it would be more flexible to make it
42 43
 	// a parameter in resize request context, which would need API changes.
43
-	timeout := 10 * time.Second
44
+	timeout := time.NewTimer(10 * time.Second)
45
+	defer timeout.Stop()
46
+
44 47
 	select {
45 48
 	case <-ec.Started:
46 49
 		return daemon.containerd.ResizeTerminal(context.Background(), ec.ContainerID, ec.ID, width, height)
47
-	case <-time.After(timeout):
50
+	case <-timeout.C:
48 51
 		return fmt.Errorf("timeout waiting for exec session ready")
49 52
 	}
50 53
 }
... ...
@@ -89,8 +89,11 @@ func Start(ctx context.Context, rootDir, stateDir string, opts ...DaemonOpt) (Da
89 89
 
90 90
 	go r.monitorDaemon(ctx)
91 91
 
92
+	timeout := time.NewTimer(startupTimeout)
93
+	defer timeout.Stop()
94
+
92 95
 	select {
93
-	case <-time.After(startupTimeout):
96
+	case <-timeout.C:
94 97
 		return nil, errors.New("timeout waiting for containerd to start")
95 98
 	case err := <-r.daemonStartCh:
96 99
 		if err != nil {
... ...
@@ -101,8 +104,11 @@ func Start(ctx context.Context, rootDir, stateDir string, opts ...DaemonOpt) (Da
101 101
 	return r, nil
102 102
 }
103 103
 func (r *remote) WaitTimeout(d time.Duration) error {
104
+	timeout := time.NewTimer(d)
105
+	defer timeout.Stop()
106
+
104 107
 	select {
105
-	case <-time.After(d):
108
+	case <-timeout.C:
106 109
 		return errors.New("timeout waiting for containerd to stop")
107 110
 	case <-r.daemonStopCh:
108 111
 	}
... ...
@@ -230,7 +236,8 @@ func (r *remote) monitorDaemon(ctx context.Context) {
230 230
 		transientFailureCount = 0
231 231
 		client                *containerd.Client
232 232
 		err                   error
233
-		delay                 <-chan time.Time
233
+		delay                 time.Duration
234
+		timer                 = time.NewTimer(0)
234 235
 		started               bool
235 236
 	)
236 237
 
... ...
@@ -245,19 +252,25 @@ func (r *remote) monitorDaemon(ctx context.Context) {
245 245
 		r.platformCleanup()
246 246
 
247 247
 		close(r.daemonStopCh)
248
+		timer.Stop()
248 249
 	}()
249 250
 
251
+	// ensure no races on sending to timer.C even though there is a 0 duration.
252
+	if !timer.Stop() {
253
+		<-timer.C
254
+	}
255
+
250 256
 	for {
251
-		if delay != nil {
252
-			select {
253
-			case <-ctx.Done():
254
-				r.logger.Info("stopping healthcheck following graceful shutdown")
255
-				if client != nil {
256
-					client.Close()
257
-				}
258
-				return
259
-			case <-delay:
257
+		timer.Reset(delay)
258
+
259
+		select {
260
+		case <-ctx.Done():
261
+			r.logger.Info("stopping healthcheck following graceful shutdown")
262
+			if client != nil {
263
+				client.Close()
260 264
 			}
265
+			return
266
+		case <-timer.C:
261 267
 		}
262 268
 
263 269
 		if r.daemonPid == -1 {
... ...
@@ -277,14 +290,14 @@ func (r *remote) monitorDaemon(ctx context.Context) {
277 277
 					return
278 278
 				}
279 279
 				r.logger.WithError(err).Error("failed restarting containerd")
280
-				delay = time.After(50 * time.Millisecond)
280
+				delay = 50 * time.Millisecond
281 281
 				continue
282 282
 			}
283 283
 
284 284
 			client, err = containerd.New(r.GRPC.Address, containerd.WithTimeout(60*time.Second))
285 285
 			if err != nil {
286 286
 				r.logger.WithError(err).Error("failed connecting to containerd")
287
-				delay = time.After(100 * time.Millisecond)
287
+				delay = 100 * time.Millisecond
288 288
 				continue
289 289
 			}
290 290
 		}
... ...
@@ -300,7 +313,7 @@ func (r *remote) monitorDaemon(ctx context.Context) {
300 300
 				}
301 301
 
302 302
 				transientFailureCount = 0
303
-				delay = time.After(500 * time.Millisecond)
303
+				delay = 500 * time.Millisecond
304 304
 				continue
305 305
 			}
306 306
 
... ...
@@ -308,7 +321,7 @@ func (r *remote) monitorDaemon(ctx context.Context) {
308 308
 
309 309
 			transientFailureCount++
310 310
 			if transientFailureCount < maxConnectionRetryCount || system.IsProcessAlive(r.daemonPid) {
311
-				delay = time.After(time.Duration(transientFailureCount) * 200 * time.Millisecond)
311
+				delay = time.Duration(transientFailureCount) * 200 * time.Millisecond
312 312
 				continue
313 313
 			}
314 314
 		}
... ...
@@ -321,7 +334,7 @@ func (r *remote) monitorDaemon(ctx context.Context) {
321 321
 		client.Close()
322 322
 		client = nil
323 323
 		r.daemonPid = -1
324
-		delay = nil
324
+		delay = 0
325 325
 		transientFailureCount = 0
326 326
 	}
327 327
 }
... ...
@@ -146,9 +146,18 @@ func (w *filePoller) sendErr(e error, chClose <-chan struct{}) error {
146 146
 // upon finding changes to a file or errors, sendEvent/sendErr is called
147 147
 func (w *filePoller) watch(f *os.File, lastFi os.FileInfo, chClose chan struct{}) {
148 148
 	defer f.Close()
149
+
150
+	timer := time.NewTimer(watchWaitTime)
151
+	if !timer.Stop() {
152
+		<-timer.C
153
+	}
154
+	defer timer.Stop()
155
+
149 156
 	for {
157
+		timer.Reset(watchWaitTime)
158
+
150 159
 		select {
151
-		case <-time.After(watchWaitTime):
160
+		case <-timer.C:
152 161
 		case <-chClose:
153 162
 			logrus.Debugf("watch for %s closed", f.Name())
154 163
 			return
... ...
@@ -107,9 +107,12 @@ func (p *Publisher) sendTopic(sub subscriber, topic topicFunc, v interface{}, wg
107 107
 
108 108
 	// send under a select as to not block if the receiver is unavailable
109 109
 	if p.timeout > 0 {
110
+		timeout := time.NewTimer(p.timeout)
111
+		defer timeout.Stop()
112
+
110 113
 		select {
111 114
 		case sub <- v:
112
-		case <-time.After(p.timeout):
115
+		case <-timeout.C:
113 116
 		}
114 117
 		return
115 118
 	}
... ...
@@ -146,6 +146,8 @@ func (pm *Manager) restore(p *v2.Plugin, c *controller) error {
146 146
 	return nil
147 147
 }
148 148
 
149
+const shutdownTimeout = 10 * time.Second
150
+
149 151
 func shutdownPlugin(p *v2.Plugin, ec chan bool, executor Executor) {
150 152
 	pluginID := p.GetID()
151 153
 
... ...
@@ -153,19 +155,26 @@ func shutdownPlugin(p *v2.Plugin, ec chan bool, executor Executor) {
153 153
 	if err != nil {
154 154
 		logrus.Errorf("Sending SIGTERM to plugin failed with error: %v", err)
155 155
 	} else {
156
+
157
+		timeout := time.NewTimer(shutdownTimeout)
158
+		defer timeout.Stop()
159
+
156 160
 		select {
157 161
 		case <-ec:
158 162
 			logrus.Debug("Clean shutdown of plugin")
159
-		case <-time.After(time.Second * 10):
163
+		case <-timeout.C:
160 164
 			logrus.Debug("Force shutdown plugin")
161 165
 			if err := executor.Signal(pluginID, int(unix.SIGKILL)); err != nil {
162 166
 				logrus.Errorf("Sending SIGKILL to plugin failed with error: %v", err)
163 167
 			}
168
+
169
+			timeout.Reset(shutdownTimeout)
170
+
164 171
 			select {
165 172
 			case <-ec:
166 173
 				logrus.Debug("SIGKILL plugin shutdown")
167
-			case <-time.After(time.Second * 10):
168
-				logrus.Debug("Force shutdown plugin FAILED")
174
+			case <-timeout.C:
175
+				logrus.WithField("plugin", p.Name).Warn("Force shutdown plugin FAILED")
169 176
 			}
170 177
 		}
171 178
 	}
... ...
@@ -107,11 +107,14 @@ func (rm *restartManager) ShouldRestart(exitCode uint32, hasBeenManuallyStopped
107 107
 
108 108
 	ch := make(chan error)
109 109
 	go func() {
110
+		timeout := time.NewTimer(rm.timeout)
111
+		defer timeout.Stop()
112
+
110 113
 		select {
111 114
 		case <-rm.cancel:
112 115
 			ch <- ErrRestartCanceled
113 116
 			close(ch)
114
-		case <-time.After(rm.timeout):
117
+		case <-timeout.C:
115 118
 			rm.Lock()
116 119
 			close(ch)
117 120
 			rm.active = false