Adds a mutex to protect the status, as well. When running the race
detector with the unit test, we can see that the Status field is written
without holding this lock. Adding a mutex to read and set status
addresses the issue.
Signed-off-by: Stephen J Day <stephen.day@docker.com>
| ... | ... |
@@ -16,19 +16,42 @@ type Health struct {
|
| 16 | 16 |
|
| 17 | 17 |
// String returns a human-readable description of the health-check state |
| 18 | 18 |
func (s *Health) String() string {
|
| 19 |
- // This happens when the monitor has yet to be setup. |
|
| 20 |
- if s.Status == "" {
|
|
| 21 |
- return types.Unhealthy |
|
| 22 |
- } |
|
| 19 |
+ status := s.Status() |
|
| 23 | 20 |
|
| 24 |
- switch s.Status {
|
|
| 21 |
+ switch status {
|
|
| 25 | 22 |
case types.Starting: |
| 26 | 23 |
return "health: starting" |
| 27 | 24 |
default: // Healthy and Unhealthy are clear on their own |
| 28 |
- return s.Status |
|
| 25 |
+ return s.Health.Status |
|
| 29 | 26 |
} |
| 30 | 27 |
} |
| 31 | 28 |
|
| 29 |
+// Status returns the current health status. |
|
| 30 |
+// |
|
| 31 |
+// Note that this takes a lock and the value may change after being read. |
|
| 32 |
+func (s *Health) Status() string {
|
|
| 33 |
+ s.mu.Lock() |
|
| 34 |
+ defer s.mu.Unlock() |
|
| 35 |
+ |
|
| 36 |
+ // This happens when the monitor has yet to be setup. |
|
| 37 |
+ if s.Health.Status == "" {
|
|
| 38 |
+ return types.Unhealthy |
|
| 39 |
+ } |
|
| 40 |
+ |
|
| 41 |
+ return s.Health.Status |
|
| 42 |
+} |
|
| 43 |
+ |
|
| 44 |
+// SetStatus writes the current status to the underlying health structure, |
|
| 45 |
+// obeying the locking semantics. |
|
| 46 |
+// |
|
| 47 |
+// Status may be set directly if another lock is used. |
|
| 48 |
+func (s *Health) SetStatus(new string) {
|
|
| 49 |
+ s.mu.Lock() |
|
| 50 |
+ defer s.mu.Unlock() |
|
| 51 |
+ |
|
| 52 |
+ s.Health.Status = new |
|
| 53 |
+} |
|
| 54 |
+ |
|
| 32 | 55 |
// OpenMonitorChannel creates and returns a new monitor channel. If there |
| 33 | 56 |
// already is one, it returns nil. |
| 34 | 57 |
func (s *Health) OpenMonitorChannel() chan struct{} {
|
| ... | ... |
@@ -53,7 +76,7 @@ func (s *Health) CloseMonitorChannel() {
|
| 53 | 53 |
close(s.stop) |
| 54 | 54 |
s.stop = nil |
| 55 | 55 |
// unhealthy when the monitor has stopped for compatibility reasons |
| 56 |
- s.Status = types.Unhealthy |
|
| 56 |
+ s.Health.Status = types.Unhealthy |
|
| 57 | 57 |
logrus.Debug("CloseMonitorChannel done")
|
| 58 | 58 |
} |
| 59 | 59 |
} |
| ... | ... |
@@ -129,7 +129,7 @@ func handleProbeResult(d *Daemon, c *container.Container, result *types.Healthch |
| 129 | 129 |
} |
| 130 | 130 |
|
| 131 | 131 |
h := c.State.Health |
| 132 |
- oldStatus := h.Status |
|
| 132 |
+ oldStatus := h.Status() |
|
| 133 | 133 |
|
| 134 | 134 |
if len(h.Log) >= maxLogEntries {
|
| 135 | 135 |
h.Log = append(h.Log[len(h.Log)+1-maxLogEntries:], result) |
| ... | ... |
@@ -139,14 +139,14 @@ func handleProbeResult(d *Daemon, c *container.Container, result *types.Healthch |
| 139 | 139 |
|
| 140 | 140 |
if result.ExitCode == exitStatusHealthy {
|
| 141 | 141 |
h.FailingStreak = 0 |
| 142 |
- h.Status = types.Healthy |
|
| 142 |
+ h.SetStatus(types.Healthy) |
|
| 143 | 143 |
} else { // Failure (including invalid exit code)
|
| 144 | 144 |
shouldIncrementStreak := true |
| 145 | 145 |
|
| 146 | 146 |
// If the container is starting (i.e. we never had a successful health check) |
| 147 | 147 |
// then we check if we are within the start period of the container in which |
| 148 | 148 |
// case we do not increment the failure streak. |
| 149 |
- if h.Status == types.Starting {
|
|
| 149 |
+ if h.Status() == types.Starting {
|
|
| 150 | 150 |
startPeriod := timeoutWithDefault(c.Config.Healthcheck.StartPeriod, defaultStartPeriod) |
| 151 | 151 |
timeSinceStart := result.Start.Sub(c.State.StartedAt) |
| 152 | 152 |
|
| ... | ... |
@@ -160,7 +160,7 @@ func handleProbeResult(d *Daemon, c *container.Container, result *types.Healthch |
| 160 | 160 |
h.FailingStreak++ |
| 161 | 161 |
|
| 162 | 162 |
if h.FailingStreak >= retries {
|
| 163 |
- h.Status = types.Unhealthy |
|
| 163 |
+ h.SetStatus(types.Unhealthy) |
|
| 164 | 164 |
} |
| 165 | 165 |
} |
| 166 | 166 |
// Else we're starting or healthy. Stay in that state. |
| ... | ... |
@@ -173,8 +173,9 @@ func handleProbeResult(d *Daemon, c *container.Container, result *types.Healthch |
| 173 | 173 |
logrus.Errorf("Error replicating health state for container %s: %v", c.ID, err)
|
| 174 | 174 |
} |
| 175 | 175 |
|
| 176 |
- if oldStatus != h.Status {
|
|
| 177 |
- d.LogContainerEvent(c, "health_status: "+h.Status) |
|
| 176 |
+ current := h.Status() |
|
| 177 |
+ if oldStatus != current {
|
|
| 178 |
+ d.LogContainerEvent(c, "health_status: "+current) |
|
| 178 | 179 |
} |
| 179 | 180 |
} |
| 180 | 181 |
|
| ... | ... |
@@ -293,11 +294,11 @@ func (d *Daemon) initHealthMonitor(c *container.Container) {
|
| 293 | 293 |
d.stopHealthchecks(c) |
| 294 | 294 |
|
| 295 | 295 |
if h := c.State.Health; h != nil {
|
| 296 |
- h.Status = types.Starting |
|
| 296 |
+ h.SetStatus(types.Starting) |
|
| 297 | 297 |
h.FailingStreak = 0 |
| 298 | 298 |
} else {
|
| 299 | 299 |
h := &container.Health{}
|
| 300 |
- h.Status = types.Starting |
|
| 300 |
+ h.SetStatus(types.Starting) |
|
| 301 | 301 |
c.State.Health = h |
| 302 | 302 |
} |
| 303 | 303 |
|
| ... | ... |
@@ -14,7 +14,7 @@ import ( |
| 14 | 14 |
func reset(c *container.Container) {
|
| 15 | 15 |
c.State = &container.State{}
|
| 16 | 16 |
c.State.Health = &container.Health{}
|
| 17 |
- c.State.Health.Status = types.Starting |
|
| 17 |
+ c.State.Health.SetStatus(types.Starting) |
|
| 18 | 18 |
} |
| 19 | 19 |
|
| 20 | 20 |
func TestNoneHealthcheck(t *testing.T) {
|
| ... | ... |
@@ -111,8 +111,8 @@ func TestHealthStates(t *testing.T) {
|
| 111 | 111 |
|
| 112 | 112 |
handleResult(c.State.StartedAt.Add(20*time.Second), 1) |
| 113 | 113 |
handleResult(c.State.StartedAt.Add(40*time.Second), 1) |
| 114 |
- if c.State.Health.Status != types.Starting {
|
|
| 115 |
- t.Errorf("Expecting starting, but got %#v\n", c.State.Health.Status)
|
|
| 114 |
+ if status := c.State.Health.Status(); status != types.Starting {
|
|
| 115 |
+ t.Errorf("Expecting starting, but got %#v\n", status)
|
|
| 116 | 116 |
} |
| 117 | 117 |
if c.State.Health.FailingStreak != 2 {
|
| 118 | 118 |
t.Errorf("Expecting FailingStreak=2, but got %d\n", c.State.Health.FailingStreak)
|
| ... | ... |
@@ -133,15 +133,15 @@ func TestHealthStates(t *testing.T) {
|
| 133 | 133 |
c.Config.Healthcheck.StartPeriod = 30 * time.Second |
| 134 | 134 |
|
| 135 | 135 |
handleResult(c.State.StartedAt.Add(20*time.Second), 1) |
| 136 |
- if c.State.Health.Status != types.Starting {
|
|
| 137 |
- t.Errorf("Expecting starting, but got %#v\n", c.State.Health.Status)
|
|
| 136 |
+ if status := c.State.Health.Status(); status != types.Starting {
|
|
| 137 |
+ t.Errorf("Expecting starting, but got %#v\n", status)
|
|
| 138 | 138 |
} |
| 139 | 139 |
if c.State.Health.FailingStreak != 0 {
|
| 140 | 140 |
t.Errorf("Expecting FailingStreak=0, but got %d\n", c.State.Health.FailingStreak)
|
| 141 | 141 |
} |
| 142 | 142 |
handleResult(c.State.StartedAt.Add(50*time.Second), 1) |
| 143 |
- if c.State.Health.Status != types.Starting {
|
|
| 144 |
- t.Errorf("Expecting starting, but got %#v\n", c.State.Health.Status)
|
|
| 143 |
+ if status := c.State.Health.Status(); status != types.Starting {
|
|
| 144 |
+ t.Errorf("Expecting starting, but got %#v\n", status)
|
|
| 145 | 145 |
} |
| 146 | 146 |
if c.State.Health.FailingStreak != 1 {
|
| 147 | 147 |
t.Errorf("Expecting FailingStreak=1, but got %d\n", c.State.Health.FailingStreak)
|
| ... | ... |
@@ -139,7 +139,7 @@ func (daemon *Daemon) getInspectData(container *container.Container) (*types.Con |
| 139 | 139 |
var containerHealth *types.Health |
| 140 | 140 |
if container.State.Health != nil {
|
| 141 | 141 |
containerHealth = &types.Health{
|
| 142 |
- Status: container.State.Health.Status, |
|
| 142 |
+ Status: container.State.Health.Status(), |
|
| 143 | 143 |
FailingStreak: container.State.Health.FailingStreak, |
| 144 | 144 |
Log: append([]*types.HealthcheckResult{}, container.State.Health.Log...),
|
| 145 | 145 |
} |