GitList

Browse code

container: protect the health status with mutex

Adds a mutex to protect the status, as well. When running the race
detector with the unit test, we can see that the Status field is written
without holding this lock. Adding a mutex to read and set status
addresses the issue.

Signed-off-by: Stephen J Day <stephen.day@docker.com>

Stephen J Day authored on 2017/11/16 12:28:36
Showing 4 changed files

container/health.go index 75365fe..6f75012 100644
daemon/health.go index 7d8c84a..26ae20f 100644
daemon/health_test.go index 4fd8914..479ff24 100644
daemon/inspect.go index 8b0f109..20cfa6c 100644

container/health.go

History View file @ 7db30ab

@@ -16,19 +16,42 @@ type Health struct {
                      // String returns a human-readable description of the health-check state
                      func (s *Health) String() string {
                     -	// This happens when the monitor has yet to be setup.
                     -	if s.Status == "" {
                     -		return types.Unhealthy
                     -	}
                     +	status := s.Status()
                     -	switch s.Status {
                     +	switch status {
                      	case types.Starting:
                      		return "health: starting"
                      	default: // Healthy and Unhealthy are clear on their own
                     -		return s.Status
                     +		return s.Health.Status
+                     	}
+                     }
                     +// Status returns the current health status.
                     +//
                     +// Note that this takes a lock and the value may change after being read.
                     +func (s *Health) Status() string {
                     +	s.mu.Lock()
                     +	defer s.mu.Unlock()
+                    +
                     +	// This happens when the monitor has yet to be setup.
                     +	if s.Health.Status == "" {
                     +		return types.Unhealthy
                     +	}
+                    +
                     +	return s.Health.Status
                     +}
+                    +
                     +// SetStatus writes the current status to the underlying health structure,
                     +// obeying the locking semantics.
                     +//
                     +// Status may be set directly if another lock is used.
                     +func (s *Health) SetStatus(new string) {
                     +	s.mu.Lock()
                     +	defer s.mu.Unlock()
+                    +
                     +	s.Health.Status = new
                     +}
+                    +
                      // OpenMonitorChannel creates and returns a new monitor channel. If there
                      // already is one, it returns nil.
                      func (s *Health) OpenMonitorChannel() chan struct{} {
@@ -53,7 +76,7 @@ func (s *Health) CloseMonitorChannel() {
                      		close(s.stop)
                      		s.stop = nil
                      		// unhealthy when the monitor has stopped for compatibility reasons
                     -		s.Status = types.Unhealthy
                     +		s.Health.Status = types.Unhealthy
                      		logrus.Debug("CloseMonitorChannel done")
+                     	}
+                     }

daemon/health.go

History View file @ 7db30ab

@@ -129,7 +129,7 @@ func handleProbeResult(d *Daemon, c *container.Container, result *types.Healthch
+                     	}
                      	h := c.State.Health
                     -	oldStatus := h.Status
                     +	oldStatus := h.Status()
                      	if len(h.Log) >= maxLogEntries {
                      		h.Log = append(h.Log[len(h.Log)+1-maxLogEntries:], result)
@@ -139,14 +139,14 @@ func handleProbeResult(d *Daemon, c *container.Container, result *types.Healthch
                      	if result.ExitCode == exitStatusHealthy {
                      		h.FailingStreak = 0
                     -		h.Status = types.Healthy
                     +		h.SetStatus(types.Healthy)
                      	} else { // Failure (including invalid exit code)
                      		shouldIncrementStreak := true
                      		// If the container is starting (i.e. we never had a successful health check)
                      		// then we check if we are within the start period of the container in which
                      		// case we do not increment the failure streak.
                     -		if h.Status == types.Starting {
                     +		if h.Status() == types.Starting {
                      			startPeriod := timeoutWithDefault(c.Config.Healthcheck.StartPeriod, defaultStartPeriod)
                      			timeSinceStart := result.Start.Sub(c.State.StartedAt)
@@ -160,7 +160,7 @@ func handleProbeResult(d *Daemon, c *container.Container, result *types.Healthch
                      			h.FailingStreak++
                      			if h.FailingStreak >= retries {
                     -				h.Status = types.Unhealthy
                     +				h.SetStatus(types.Unhealthy)
+                     			}
+                     		}
                      		// Else we're starting or healthy. Stay in that state.
@@ -173,8 +173,9 @@ func handleProbeResult(d *Daemon, c *container.Container, result *types.Healthch
                      		logrus.Errorf("Error replicating health state for container %s: %v", c.ID, err)
+                     	}
                     -	if oldStatus != h.Status {
                     -		d.LogContainerEvent(c, "health_status: "+h.Status)
                     +	current := h.Status()
                     +	if oldStatus != current {
                     +		d.LogContainerEvent(c, "health_status: "+current)
+                     	}
+                     }
@@ -293,11 +294,11 @@ func (d *Daemon) initHealthMonitor(c *container.Container) {
                      	d.stopHealthchecks(c)
                      	if h := c.State.Health; h != nil {
                     -		h.Status = types.Starting
                     +		h.SetStatus(types.Starting)
                      		h.FailingStreak = 0
                      	} else {
                      		h := &container.Health{}
                     -		h.Status = types.Starting
                     +		h.SetStatus(types.Starting)
                      		c.State.Health = h
+                     	}

daemon/health_test.go

History View file @ 7db30ab

@@ -14,7 +14,7 @@ import (
                      func reset(c *container.Container) {
                      	c.State = &container.State{}
                      	c.State.Health = &container.Health{}
                     -	c.State.Health.Status = types.Starting
                     +	c.State.Health.SetStatus(types.Starting)
+                     }
                      func TestNoneHealthcheck(t *testing.T) {
@@ -111,8 +111,8 @@ func TestHealthStates(t *testing.T) {
                      	handleResult(c.State.StartedAt.Add(20*time.Second), 1)
                      	handleResult(c.State.StartedAt.Add(40*time.Second), 1)
                     -	if c.State.Health.Status != types.Starting {
                     -		t.Errorf("Expecting starting, but got %#v\n", c.State.Health.Status)
                     +	if status := c.State.Health.Status(); status != types.Starting {
                     +		t.Errorf("Expecting starting, but got %#v\n", status)
+                     	}
                      	if c.State.Health.FailingStreak != 2 {
                      		t.Errorf("Expecting FailingStreak=2, but got %d\n", c.State.Health.FailingStreak)
@@ -133,15 +133,15 @@ func TestHealthStates(t *testing.T) {
                      	c.Config.Healthcheck.StartPeriod = 30 * time.Second
                      	handleResult(c.State.StartedAt.Add(20*time.Second), 1)
                     -	if c.State.Health.Status != types.Starting {
                     -		t.Errorf("Expecting starting, but got %#v\n", c.State.Health.Status)
                     +	if status := c.State.Health.Status(); status != types.Starting {
                     +		t.Errorf("Expecting starting, but got %#v\n", status)
+                     	}
                      	if c.State.Health.FailingStreak != 0 {
                      		t.Errorf("Expecting FailingStreak=0, but got %d\n", c.State.Health.FailingStreak)
+                     	}
                      	handleResult(c.State.StartedAt.Add(50*time.Second), 1)
                     -	if c.State.Health.Status != types.Starting {
                     -		t.Errorf("Expecting starting, but got %#v\n", c.State.Health.Status)
                     +	if status := c.State.Health.Status(); status != types.Starting {
                     +		t.Errorf("Expecting starting, but got %#v\n", status)
+                     	}
                      	if c.State.Health.FailingStreak != 1 {
                      		t.Errorf("Expecting FailingStreak=1, but got %d\n", c.State.Health.FailingStreak)

daemon/inspect.go

History View file @ 7db30ab

@@ -139,7 +139,7 @@ func (daemon *Daemon) getInspectData(container *container.Container) (*types.Con
                      	var containerHealth *types.Health
                      	if container.State.Health != nil {
                      		containerHealth = &types.Health{
                     -			Status:        container.State.Health.Status,
                     +			Status:        container.State.Health.Status(),
                      			FailingStreak: container.State.Health.FailingStreak,
                      			Log:           append([]*types.HealthcheckResult{}, container.State.Health.Log...),
+                     		}