GitList

Browse code

Merge pull request #38522 from cpuguy83/fix_timers

Make sure timers are stopped after use.

Sebastiaan van Stijn authored on 2019/06/07 20:16:46
Showing 15 changed files

api/server/router/system/system_routes.go index 2d496a1..7b45546 100644
cmd/dockerd/daemon.go index 0359411..6584210 100644
container/monitor.go index 1735e34..ff4b343 100644
daemon/cluster/cluster.go index 828748b..52cbaed 100644
daemon/cluster/swarm.go index 2b7487c..baa6167 100644
daemon/daemon.go index e6a1fb1..9957361 100644
daemon/discovery/discovery.go index 092c576..985ab6a 100644
daemon/exec.go index abb239b..5fdba72 100644
daemon/health.go index ae0d7f8..12bb9ee 100644
daemon/resize.go index 623c73e..ac93953 100644
libcontainerd/supervisor/remote_daemon.go index 31b93f1..7517da4 100644
pkg/filenotify/poller.go index 6161d4a..01ef057 100644
pkg/pubsub/publisher.go index 76033ed..32b2f18 100644
plugin/manager_linux.go index 86ada8d..23fa462 100644
restartmanager/restartmanager.go index 6468ccf..12094de 100644

api/server/router/system/system_routes.go

@@ -174,7 +174,9 @@ func (s *systemRouter) getEvents(ctx context.Context, w http.ResponseWriter, r *
 
 		if !onlyPastEvents {
 			dur := until.Sub(now)
-			timeout = time.After(dur)
+			timer := time.NewTimer(dur)
+			defer timer.Stop()
+			timeout = timer.C
 		}
 	}
 

cmd/dockerd/daemon.go

History View file @ c85fe2d

@@ -400,10 +400,14 @@ func shutdownDaemon(d *daemon.Daemon) {
                      		logrus.Debug("Clean shutdown succeeded")
                      		return
+                     	}
+                    +
                     +	timeout := time.NewTimer(time.Duration(shutdownTimeout) * time.Second)
                     +	defer timeout.Stop()
+                    +
                      	select {
                      	case <-ch:
                      		logrus.Debug("Clean shutdown succeeded")
                     -	case <-time.After(time.Duration(shutdownTimeout) * time.Second):
                     +	case <-timeout.C:
                      		logrus.Error("Force shutdown daemon")
+                     	}
+                     }

container/monitor.go

History View file @ c85fe2d

@@ -33,8 +33,11 @@ func (container *Container) Reset(lock bool) {
                      				container.LogCopier.Wait()
                      				close(exit)
                      			}()
+                    +
                     +			timer := time.NewTimer(loggerCloseTimeout)
                     +			defer timer.Stop()
                      			select {
                     -			case <-time.After(loggerCloseTimeout):
                     +			case <-timer.C:
                      				logrus.Warn("Logger didn't exit in time: logs may be truncated")
                      			case <-exit:
+                     			}

daemon/cluster/cluster.go

History View file @ c85fe2d

@@ -186,8 +186,11 @@ func (c *Cluster) Start() error {
+                     	}
                      	c.nr = nr
                     +	timer := time.NewTimer(swarmConnectTimeout)
                     +	defer timer.Stop()
+                    +
                      	select {
                     -	case <-time.After(swarmConnectTimeout):
                     +	case <-timer.C:
                      		logrus.Error("swarm component could not be started before timeout was reached")
                      	case err := <-nr.Ready():
                      		if err != nil {

daemon/cluster/swarm.go

History View file @ c85fe2d

@@ -194,8 +194,11 @@ func (c *Cluster) Join(req types.JoinRequest) error {
                      	c.nr = nr
                      	c.mu.Unlock()
                     +	timeout := time.NewTimer(swarmConnectTimeout)
                     +	defer timeout.Stop()
+                    +
                      	select {
                     -	case <-time.After(swarmConnectTimeout):
                     +	case <-timeout.C:
                      		return errSwarmJoinTimeoutReached
                      	case err := <-nr.Ready():
                      		if err != nil {

daemon/daemon.go

History View file @ c85fe2d

@@ -486,12 +486,14 @@ func (daemon *Daemon) restore() error {
                      			// ignore errors here as this is a best effort to wait for children to be
                      			//   running before we try to start the container
                      			children := daemon.children(c)
                     -			timeout := time.After(5 * time.Second)
                     +			timeout := time.NewTimer(5 * time.Second)
                     +			defer timeout.Stop()
+                    +
                      			for _, child := range children {
                      				if notifier, exists := restartContainers[child]; exists {
                      					select {
                      					case <-notifier:
                     -					case <-timeout:
                     +					case <-timeout.C:
+                     					}
+                     				}
+                     			}
@@ -609,6 +611,7 @@ func (daemon *Daemon) waitForNetworks(c *container.Container) {
                      	if daemon.discoveryWatcher == nil {
                      		return
+                     	}
+                    +
                      	// Make sure if the container has a network that requires discovery that the discovery service is available before starting
                      	for netName := range c.NetworkSettings.Networks {
                      		// If we get `ErrNoSuchNetwork` here, we can assume that it is due to discovery not being ready
@@ -617,13 +620,19 @@ func (daemon *Daemon) waitForNetworks(c *container.Container) {
                      			if _, ok := err.(libnetwork.ErrNoSuchNetwork); !ok {
                      				continue
+                     			}
+                    +
                      			// use a longish timeout here due to some slowdowns in libnetwork if the k/v store is on anything other than --net=host
                      			// FIXME: why is this slow???
                     +			dur := 60 * time.Second
                     +			timer := time.NewTimer(dur)
+                    +
                      			logrus.Debugf("Container %s waiting for network to be ready", c.Name)
                      			select {
                      			case <-daemon.discoveryWatcher.ReadyCh():
                     -			case <-time.After(60 * time.Second):
                     +			case <-timer.C:
+                     			}
                     +			timer.Stop()
+                    +
                      			return
+                     		}
+                     	}
@@ -673,10 +682,14 @@ func (daemon *Daemon) DaemonLeavesCluster() {
                      	// This is called also on graceful daemon shutdown. We need to
                      	// wait, because the ingress release has to happen before the
                      	// network controller is stopped.
+                    +
                      	if done, err := daemon.ReleaseIngress(); err == nil {
                     +		timeout := time.NewTimer(5 * time.Second)
                     +		defer timeout.Stop()
+                    +
                      		select {
                      		case <-done:
                     -		case <-time.After(5 * time.Second):
                     +		case <-timeout.C:
                      			logrus.Warn("timeout while waiting for ingress network removal")
+                     		}
                      	} else {

daemon/discovery/discovery.go

History View file @ c85fe2d

@@ -148,12 +148,14 @@ func (d *daemonDiscoveryReloader) initHeartbeat(address string) error {
                      	// Setup a short ticker until the first heartbeat has succeeded
                      	t := time.NewTicker(500 * time.Millisecond)
                      	defer t.Stop()
+                    +
                      	// timeout makes sure that after a period of time we stop being so aggressive trying to reach the discovery service
                     -	timeout := time.After(60 * time.Second)
                     +	timeout := time.NewTimer(60 * time.Second)
                     +	defer timeout.Stop()
                      	for {
                      		select {
                     -		case <-timeout:
                     +		case <-timeout.C:
                      			return errors.New("timeout waiting for initial discovery")
                      		case <-d.term:
                      			return errors.New("terminated")

daemon/exec.go

History View file @ c85fe2d

@@ -23,7 +23,7 @@ import (
+                     )
                      // Seconds to wait after sending TERM before trying KILL
                     -const termProcessTimeout = 10
                     +const termProcessTimeout = 10 * time.Second
                      func (d *Daemon) registerExecCommand(container *container.Container, config *exec.Config) {
                      	// Storing execs in container in order to kill them gracefully whenever the container is stopped or removed.
@@ -277,9 +277,13 @@ func (d *Daemon) ContainerExecStart(ctx context.Context, name string, stdin io.R
                      	case <-ctx.Done():
                      		logrus.Debugf("Sending TERM signal to process %v in container %v", name, c.ID)
                      		d.containerd.SignalProcess(ctx, c.ID, name, int(signal.SignalMap["TERM"]))
+                    +
                     +		timeout := time.NewTimer(termProcessTimeout)
                     +		defer timeout.Stop()
+                    +
                      		select {
                     -		case <-time.After(termProcessTimeout * time.Second):
                     -			logrus.Infof("Container %v, process %v failed to exit within %d seconds of signal TERM - using the force", c.ID, name, termProcessTimeout)
                     +		case <-timeout.C:
                     +			logrus.Infof("Container %v, process %v failed to exit within %v of signal TERM - using the force", c.ID, name, termProcessTimeout)
                      			d.containerd.SignalProcess(ctx, c.ID, name, int(signal.SignalMap["KILL"]))
                      		case <-attachErr:
                      			// TERM signal worked

daemon/health.go

History View file @ c85fe2d

@@ -187,12 +187,18 @@ func handleProbeResult(d *Daemon, c *container.Container, result *types.Healthch
                      func monitor(d *Daemon, c *container.Container, stop chan struct{}, probe probe) {
                      	probeTimeout := timeoutWithDefault(c.Config.Healthcheck.Timeout, defaultProbeTimeout)
                      	probeInterval := timeoutWithDefault(c.Config.Healthcheck.Interval, defaultProbeInterval)
+                    +
                     +	intervalTimer := time.NewTimer(probeInterval)
                     +	defer intervalTimer.Stop()
+                    +
                      	for {
                     +		intervalTimer.Reset(probeInterval)
+                    +
                      		select {
                      		case <-stop:
                      			logrus.Debugf("Stop healthcheck monitoring for container %s (received while idle)", c.ID)
                      			return
                     -		case <-time.After(probeInterval):
                     +		case <-intervalTimer.C:
                      			logrus.Debugf("Running health check for container %s ...", c.ID)
                      			startTime := time.Now()
                      			ctx, cancelProbe := context.WithTimeout(context.Background(), probeTimeout)

daemon/resize.go

History View file @ c85fe2d

@@ -38,13 +38,16 @@ func (daemon *Daemon) ContainerExecResize(name string, height, width int) error
                      	if err != nil {
                      		return err
+                     	}
+                    +
                      	// TODO: the timeout is hardcoded here, it would be more flexible to make it
                      	// a parameter in resize request context, which would need API changes.
                     -	timeout := 10 * time.Second
                     +	timeout := time.NewTimer(10 * time.Second)
                     +	defer timeout.Stop()
+                    +
                      	select {
                      	case <-ec.Started:
                      		return daemon.containerd.ResizeTerminal(context.Background(), ec.ContainerID, ec.ID, width, height)
                     -	case <-time.After(timeout):
                     +	case <-timeout.C:
                      		return fmt.Errorf("timeout waiting for exec session ready")
+                     	}
+                     }

libcontainerd/supervisor/remote_daemon.go

History View file @ c85fe2d

@@ -89,8 +89,11 @@ func Start(ctx context.Context, rootDir, stateDir string, opts ...DaemonOpt) (Da
                      	go r.monitorDaemon(ctx)
                     +	timeout := time.NewTimer(startupTimeout)
                     +	defer timeout.Stop()
+                    +
                      	select {
                     -	case <-time.After(startupTimeout):
                     +	case <-timeout.C:
                      		return nil, errors.New("timeout waiting for containerd to start")
                      	case err := <-r.daemonStartCh:
                      		if err != nil {
@@ -101,8 +104,11 @@ func Start(ctx context.Context, rootDir, stateDir string, opts ...DaemonOpt) (Da
                      	return r, nil
+                     }
                      func (r *remote) WaitTimeout(d time.Duration) error {
                     +	timeout := time.NewTimer(d)
                     +	defer timeout.Stop()
+                    +
                      	select {
                     -	case <-time.After(d):
                     +	case <-timeout.C:
                      		return errors.New("timeout waiting for containerd to stop")
                      	case <-r.daemonStopCh:
+                     	}
@@ -230,7 +236,8 @@ func (r *remote) monitorDaemon(ctx context.Context) {
                      		transientFailureCount = 0
                      		client                *containerd.Client
                      		err                   error
                     -		delay                 <-chan time.Time
                     +		delay                 time.Duration
                     +		timer                 = time.NewTimer(0)
                      		started               bool
+                     	)
@@ -245,19 +252,25 @@ func (r *remote) monitorDaemon(ctx context.Context) {
                      		r.platformCleanup()
                      		close(r.daemonStopCh)
                     +		timer.Stop()
                      	}()
                     +	// ensure no races on sending to timer.C even though there is a 0 duration.
                     +	if !timer.Stop() {
                     +		<-timer.C
                     +	}
+                    +
                      	for {
                     -		if delay != nil {
                     -			select {
                     -			case <-ctx.Done():
                     -				r.logger.Info("stopping healthcheck following graceful shutdown")
                     -				if client != nil {
                     -					client.Close()
                     -				}
                     -				return
                     -			case <-delay:
                     +		timer.Reset(delay)
+                    +
                     +		select {
                     +		case <-ctx.Done():
                     +			r.logger.Info("stopping healthcheck following graceful shutdown")
                     +			if client != nil {
                     +				client.Close()
+                     			}
                     +			return
                     +		case <-timer.C:
+                     		}
                      		if r.daemonPid == -1 {
@@ -277,14 +290,14 @@ func (r *remote) monitorDaemon(ctx context.Context) {
                      					return
+                     				}
                      				r.logger.WithError(err).Error("failed restarting containerd")
                     -				delay = time.After(50 * time.Millisecond)
                     +				delay = 50 * time.Millisecond
                      				continue
+                     			}
                      			client, err = containerd.New(r.GRPC.Address, containerd.WithTimeout(60*time.Second))
                      			if err != nil {
                      				r.logger.WithError(err).Error("failed connecting to containerd")
                     -				delay = time.After(100 * time.Millisecond)
                     +				delay = 100 * time.Millisecond
                      				continue
+                     			}
+                     		}
@@ -300,7 +313,7 @@ func (r *remote) monitorDaemon(ctx context.Context) {
+                     				}
                      				transientFailureCount = 0
                     -				delay = time.After(500 * time.Millisecond)
                     +				delay = 500 * time.Millisecond
                      				continue
+                     			}
@@ -308,7 +321,7 @@ func (r *remote) monitorDaemon(ctx context.Context) {
                      			transientFailureCount++
                      			if transientFailureCount < maxConnectionRetryCount || system.IsProcessAlive(r.daemonPid) {
                     -				delay = time.After(time.Duration(transientFailureCount) * 200 * time.Millisecond)
                     +				delay = time.Duration(transientFailureCount) * 200 * time.Millisecond
                      				continue
+                     			}
                      			client.Close()
@@ -321,7 +334,7 @@ func (r *remote) monitorDaemon(ctx context.Context) {
+                     		}
                      		r.daemonPid = -1
                     -		delay = nil
                     +		delay = 0
                      		transientFailureCount = 0
+                     	}
+                     }

pkg/filenotify/poller.go

History View file @ c85fe2d

@@ -146,9 +146,18 @@ func (w *filePoller) sendErr(e error, chClose <-chan struct{}) error {
                      // upon finding changes to a file or errors, sendEvent/sendErr is called
                      func (w *filePoller) watch(f *os.File, lastFi os.FileInfo, chClose chan struct{}) {
                      	defer f.Close()
+                    +
                     +	timer := time.NewTimer(watchWaitTime)
                     +	if !timer.Stop() {
                     +		<-timer.C
                     +	}
                     +	defer timer.Stop()
+                    +
                      	for {
                     +		timer.Reset(watchWaitTime)
+                    +
                      		select {
                     -		case <-time.After(watchWaitTime):
                     +		case <-timer.C:
                      		case <-chClose:
                      			logrus.Debugf("watch for %s closed", f.Name())
                      			return

pkg/pubsub/publisher.go

History View file @ c85fe2d

@@ -107,9 +107,12 @@ func (p *Publisher) sendTopic(sub subscriber, topic topicFunc, v interface{}, wg
                      	// send under a select as to not block if the receiver is unavailable
                      	if p.timeout > 0 {
                     +		timeout := time.NewTimer(p.timeout)
                     +		defer timeout.Stop()
+                    +
                      		select {
                      		case sub <- v:
                     -		case <-time.After(p.timeout):
                     +		case <-timeout.C:
+                     		}
                      		return
+                     	}

plugin/manager_linux.go

History View file @ c85fe2d

@@ -146,6 +146,8 @@ func (pm *Manager) restore(p *v2.Plugin, c *controller) error {
                      	return nil
+                     }
                     +const shutdownTimeout = 10 * time.Second
+                    +
                      func shutdownPlugin(p *v2.Plugin, ec chan bool, executor Executor) {
                      	pluginID := p.GetID()
@@ -153,19 +155,26 @@ func shutdownPlugin(p *v2.Plugin, ec chan bool, executor Executor) {
                      	if err != nil {
                      		logrus.Errorf("Sending SIGTERM to plugin failed with error: %v", err)
                      	} else {
+                    +
                     +		timeout := time.NewTimer(shutdownTimeout)
                     +		defer timeout.Stop()
+                    +
                      		select {
                      		case <-ec:
                      			logrus.Debug("Clean shutdown of plugin")
                     -		case <-time.After(time.Second * 10):
                     +		case <-timeout.C:
                      			logrus.Debug("Force shutdown plugin")
                      			if err := executor.Signal(pluginID, int(unix.SIGKILL)); err != nil {
                      				logrus.Errorf("Sending SIGKILL to plugin failed with error: %v", err)
+                     			}
+                    +
                     +			timeout.Reset(shutdownTimeout)
+                    +
                      			select {
                      			case <-ec:
                      				logrus.Debug("SIGKILL plugin shutdown")
                     -			case <-time.After(time.Second * 10):
                     -				logrus.Debug("Force shutdown plugin FAILED")
                     +			case <-timeout.C:
                     +				logrus.WithField("plugin", p.Name).Warn("Force shutdown plugin FAILED")
+                     			}
+                     		}
+                     	}

restartmanager/restartmanager.go

History View file @ c85fe2d

@@ -107,11 +107,14 @@ func (rm *restartManager) ShouldRestart(exitCode uint32, hasBeenManuallyStopped
                      	ch := make(chan error)
                      	go func() {
                     +		timeout := time.NewTimer(rm.timeout)
                     +		defer timeout.Stop()
+                    +
                      		select {
                      		case <-rm.cancel:
                      			ch <- ErrRestartCanceled
                      			close(ch)
                     -		case <-time.After(rm.timeout):
                     +		case <-timeout.C:
                      			rm.Lock()
                      			close(ch)
                      			rm.active = false

...	...	@@ -174,7 +174,9 @@ func (s systemRouter) getEvents(ctx context.Context, w http.ResponseWriter, r
174	174
175	175	if !onlyPastEvents {
176	176	dur := until.Sub(now)
177		- timeout = time.After(dur)
	177	+ timer := time.NewTimer(dur)
	178	+ defer timer.Stop()
	179	+ timeout = timer.C
178	180	}
179	181	}
180	182