Adds a mutex to protect the status, as well. When running the race
detector with the unit test, we can see that the Status field is written
without holding this lock. Adding a mutex to read and set status
addresses the issue.
Signed-off-by: Stephen J Day <stephen.day@docker.com>
... | ... |
@@ -16,19 +16,42 @@ type Health struct { |
16 | 16 |
|
17 | 17 |
// String returns a human-readable description of the health-check state |
18 | 18 |
func (s *Health) String() string { |
19 |
- // This happens when the monitor has yet to be setup. |
|
20 |
- if s.Status == "" { |
|
21 |
- return types.Unhealthy |
|
22 |
- } |
|
19 |
+ status := s.Status() |
|
23 | 20 |
|
24 |
- switch s.Status { |
|
21 |
+ switch status { |
|
25 | 22 |
case types.Starting: |
26 | 23 |
return "health: starting" |
27 | 24 |
default: // Healthy and Unhealthy are clear on their own |
28 |
- return s.Status |
|
25 |
+ return s.Health.Status |
|
29 | 26 |
} |
30 | 27 |
} |
31 | 28 |
|
29 |
+// Status returns the current health status. |
|
30 |
+// |
|
31 |
+// Note that this takes a lock and the value may change after being read. |
|
32 |
+func (s *Health) Status() string { |
|
33 |
+ s.mu.Lock() |
|
34 |
+ defer s.mu.Unlock() |
|
35 |
+ |
|
36 |
+ // This happens when the monitor has yet to be setup. |
|
37 |
+ if s.Health.Status == "" { |
|
38 |
+ return types.Unhealthy |
|
39 |
+ } |
|
40 |
+ |
|
41 |
+ return s.Health.Status |
|
42 |
+} |
|
43 |
+ |
|
44 |
+// SetStatus writes the current status to the underlying health structure, |
|
45 |
+// obeying the locking semantics. |
|
46 |
+// |
|
47 |
+// Status may be set directly if another lock is used. |
|
48 |
+func (s *Health) SetStatus(new string) { |
|
49 |
+ s.mu.Lock() |
|
50 |
+ defer s.mu.Unlock() |
|
51 |
+ |
|
52 |
+ s.Health.Status = new |
|
53 |
+} |
|
54 |
+ |
|
32 | 55 |
// OpenMonitorChannel creates and returns a new monitor channel. If there |
33 | 56 |
// already is one, it returns nil. |
34 | 57 |
func (s *Health) OpenMonitorChannel() chan struct{} { |
... | ... |
@@ -53,7 +76,7 @@ func (s *Health) CloseMonitorChannel() { |
53 | 53 |
close(s.stop) |
54 | 54 |
s.stop = nil |
55 | 55 |
// unhealthy when the monitor has stopped for compatibility reasons |
56 |
- s.Status = types.Unhealthy |
|
56 |
+ s.Health.Status = types.Unhealthy |
|
57 | 57 |
logrus.Debug("CloseMonitorChannel done") |
58 | 58 |
} |
59 | 59 |
} |
... | ... |
@@ -129,7 +129,7 @@ func handleProbeResult(d *Daemon, c *container.Container, result *types.Healthch |
129 | 129 |
} |
130 | 130 |
|
131 | 131 |
h := c.State.Health |
132 |
- oldStatus := h.Status |
|
132 |
+ oldStatus := h.Status() |
|
133 | 133 |
|
134 | 134 |
if len(h.Log) >= maxLogEntries { |
135 | 135 |
h.Log = append(h.Log[len(h.Log)+1-maxLogEntries:], result) |
... | ... |
@@ -139,14 +139,14 @@ func handleProbeResult(d *Daemon, c *container.Container, result *types.Healthch |
139 | 139 |
|
140 | 140 |
if result.ExitCode == exitStatusHealthy { |
141 | 141 |
h.FailingStreak = 0 |
142 |
- h.Status = types.Healthy |
|
142 |
+ h.SetStatus(types.Healthy) |
|
143 | 143 |
} else { // Failure (including invalid exit code) |
144 | 144 |
shouldIncrementStreak := true |
145 | 145 |
|
146 | 146 |
// If the container is starting (i.e. we never had a successful health check) |
147 | 147 |
// then we check if we are within the start period of the container in which |
148 | 148 |
// case we do not increment the failure streak. |
149 |
- if h.Status == types.Starting { |
|
149 |
+ if h.Status() == types.Starting { |
|
150 | 150 |
startPeriod := timeoutWithDefault(c.Config.Healthcheck.StartPeriod, defaultStartPeriod) |
151 | 151 |
timeSinceStart := result.Start.Sub(c.State.StartedAt) |
152 | 152 |
|
... | ... |
@@ -160,7 +160,7 @@ func handleProbeResult(d *Daemon, c *container.Container, result *types.Healthch |
160 | 160 |
h.FailingStreak++ |
161 | 161 |
|
162 | 162 |
if h.FailingStreak >= retries { |
163 |
- h.Status = types.Unhealthy |
|
163 |
+ h.SetStatus(types.Unhealthy) |
|
164 | 164 |
} |
165 | 165 |
} |
166 | 166 |
// Else we're starting or healthy. Stay in that state. |
... | ... |
@@ -173,8 +173,9 @@ func handleProbeResult(d *Daemon, c *container.Container, result *types.Healthch |
173 | 173 |
logrus.Errorf("Error replicating health state for container %s: %v", c.ID, err) |
174 | 174 |
} |
175 | 175 |
|
176 |
- if oldStatus != h.Status { |
|
177 |
- d.LogContainerEvent(c, "health_status: "+h.Status) |
|
176 |
+ current := h.Status() |
|
177 |
+ if oldStatus != current { |
|
178 |
+ d.LogContainerEvent(c, "health_status: "+current) |
|
178 | 179 |
} |
179 | 180 |
} |
180 | 181 |
|
... | ... |
@@ -293,11 +294,11 @@ func (d *Daemon) initHealthMonitor(c *container.Container) { |
293 | 293 |
d.stopHealthchecks(c) |
294 | 294 |
|
295 | 295 |
if h := c.State.Health; h != nil { |
296 |
- h.Status = types.Starting |
|
296 |
+ h.SetStatus(types.Starting) |
|
297 | 297 |
h.FailingStreak = 0 |
298 | 298 |
} else { |
299 | 299 |
h := &container.Health{} |
300 |
- h.Status = types.Starting |
|
300 |
+ h.SetStatus(types.Starting) |
|
301 | 301 |
c.State.Health = h |
302 | 302 |
} |
303 | 303 |
|
... | ... |
@@ -14,7 +14,7 @@ import ( |
14 | 14 |
func reset(c *container.Container) { |
15 | 15 |
c.State = &container.State{} |
16 | 16 |
c.State.Health = &container.Health{} |
17 |
- c.State.Health.Status = types.Starting |
|
17 |
+ c.State.Health.SetStatus(types.Starting) |
|
18 | 18 |
} |
19 | 19 |
|
20 | 20 |
func TestNoneHealthcheck(t *testing.T) { |
... | ... |
@@ -111,8 +111,8 @@ func TestHealthStates(t *testing.T) { |
111 | 111 |
|
112 | 112 |
handleResult(c.State.StartedAt.Add(20*time.Second), 1) |
113 | 113 |
handleResult(c.State.StartedAt.Add(40*time.Second), 1) |
114 |
- if c.State.Health.Status != types.Starting { |
|
115 |
- t.Errorf("Expecting starting, but got %#v\n", c.State.Health.Status) |
|
114 |
+ if status := c.State.Health.Status(); status != types.Starting { |
|
115 |
+ t.Errorf("Expecting starting, but got %#v\n", status) |
|
116 | 116 |
} |
117 | 117 |
if c.State.Health.FailingStreak != 2 { |
118 | 118 |
t.Errorf("Expecting FailingStreak=2, but got %d\n", c.State.Health.FailingStreak) |
... | ... |
@@ -133,15 +133,15 @@ func TestHealthStates(t *testing.T) { |
133 | 133 |
c.Config.Healthcheck.StartPeriod = 30 * time.Second |
134 | 134 |
|
135 | 135 |
handleResult(c.State.StartedAt.Add(20*time.Second), 1) |
136 |
- if c.State.Health.Status != types.Starting { |
|
137 |
- t.Errorf("Expecting starting, but got %#v\n", c.State.Health.Status) |
|
136 |
+ if status := c.State.Health.Status(); status != types.Starting { |
|
137 |
+ t.Errorf("Expecting starting, but got %#v\n", status) |
|
138 | 138 |
} |
139 | 139 |
if c.State.Health.FailingStreak != 0 { |
140 | 140 |
t.Errorf("Expecting FailingStreak=0, but got %d\n", c.State.Health.FailingStreak) |
141 | 141 |
} |
142 | 142 |
handleResult(c.State.StartedAt.Add(50*time.Second), 1) |
143 |
- if c.State.Health.Status != types.Starting { |
|
144 |
- t.Errorf("Expecting starting, but got %#v\n", c.State.Health.Status) |
|
143 |
+ if status := c.State.Health.Status(); status != types.Starting { |
|
144 |
+ t.Errorf("Expecting starting, but got %#v\n", status) |
|
145 | 145 |
} |
146 | 146 |
if c.State.Health.FailingStreak != 1 { |
147 | 147 |
t.Errorf("Expecting FailingStreak=1, but got %d\n", c.State.Health.FailingStreak) |
... | ... |
@@ -139,7 +139,7 @@ func (daemon *Daemon) getInspectData(container *container.Container) (*types.Con |
139 | 139 |
var containerHealth *types.Health |
140 | 140 |
if container.State.Health != nil { |
141 | 141 |
containerHealth = &types.Health{ |
142 |
- Status: container.State.Health.Status, |
|
142 |
+ Status: container.State.Health.Status(), |
|
143 | 143 |
FailingStreak: container.State.Health.FailingStreak, |
144 | 144 |
Log: append([]*types.HealthcheckResult{}, container.State.Health.Log...), |
145 | 145 |
} |