Browse code

Merge Container and State mutexes

Resolved all deadlocks and fixed race between kill and
monitor.resetContainer
Fixes #7600

Signed-off-by: Alexandr Morozov <lk4d4math@gmail.com>

Alexandr Morozov authored on 2014/08/28 20:39:27
Showing 4 changed files
... ...
@@ -10,7 +10,6 @@ import (
10 10
 	"path"
11 11
 	"path/filepath"
12 12
 	"strings"
13
-	"sync"
14 13
 	"syscall"
15 14
 	"time"
16 15
 
... ...
@@ -43,7 +42,7 @@ var (
43 43
 )
44 44
 
45 45
 type Container struct {
46
-	sync.Mutex
46
+	*State
47 47
 	root   string // Path to the "home" of the container, including metadata.
48 48
 	basefs string // Path to the graphdriver mountpoint
49 49
 
... ...
@@ -55,7 +54,6 @@ type Container struct {
55 55
 	Args []string
56 56
 
57 57
 	Config *runconfig.Config
58
-	State  *State
59 58
 	Image  string
60 59
 
61 60
 	NetworkSettings *NetworkSettings
... ...
@@ -276,7 +274,7 @@ func (container *Container) Start() (err error) {
276 276
 	container.Lock()
277 277
 	defer container.Unlock()
278 278
 
279
-	if container.State.IsRunning() {
279
+	if container.State.Running {
280 280
 		return nil
281 281
 	}
282 282
 
... ...
@@ -526,11 +524,11 @@ func (container *Container) KillSig(sig int) error {
526 526
 	defer container.Unlock()
527 527
 
528 528
 	// We could unpause the container for them rather than returning this error
529
-	if container.State.IsPaused() {
529
+	if container.State.Paused {
530 530
 		return fmt.Errorf("Container %s is paused. Unpause the container before stopping", container.ID)
531 531
 	}
532 532
 
533
-	if !container.State.IsRunning() {
533
+	if !container.State.Running {
534 534
 		return nil
535 535
 	}
536 536
 
... ...
@@ -541,7 +539,7 @@ func (container *Container) KillSig(sig int) error {
541 541
 	// if the container is currently restarting we do not need to send the signal
542 542
 	// to the process.  Telling the monitor that it should exit on it's next event
543 543
 	// loop is enough
544
-	if container.State.IsRestarting() {
544
+	if container.State.Restarting {
545 545
 		return nil
546 546
 	}
547 547
 
... ...
@@ -70,7 +70,7 @@ func (daemon *Daemon) Containers(job *engine.Job) engine.Status {
70 70
 	writeCont := func(container *Container) error {
71 71
 		container.Lock()
72 72
 		defer container.Unlock()
73
-		if !container.State.IsRunning() && !all && n <= 0 && since == "" && before == "" {
73
+		if !container.State.Running && !all && n <= 0 && since == "" && before == "" {
74 74
 			return nil
75 75
 		}
76 76
 		if before != "" && !foundBefore {
... ...
@@ -87,7 +87,7 @@ func (daemon *Daemon) Containers(job *engine.Job) engine.Status {
87 87
 				return errLast
88 88
 			}
89 89
 		}
90
-		if len(filt_exited) > 0 && !container.State.IsRunning() {
90
+		if len(filt_exited) > 0 && !container.State.Running {
91 91
 			should_skip := true
92 92
 			for _, code := range filt_exited {
93 93
 				if code == container.State.GetExitCode() {
... ...
@@ -110,7 +110,7 @@ func (m *containerMonitor) Start() error {
110 110
 	defer func() {
111 111
 		if afterRun {
112 112
 			m.container.Lock()
113
-			m.container.State.SetStopped(exitStatus)
113
+			m.container.State.setStopped(exitStatus)
114 114
 			defer m.container.Unlock()
115 115
 		}
116 116
 		m.Close()
... ...
@@ -123,7 +123,7 @@ func (m *containerMonitor) Start() error {
123 123
 		m.container.RestartCount++
124 124
 
125 125
 		if err := m.container.startLoggingToDisk(); err != nil {
126
-			m.resetContainer()
126
+			m.resetContainer(false)
127 127
 
128 128
 			return err
129 129
 		}
... ...
@@ -138,7 +138,7 @@ func (m *containerMonitor) Start() error {
138 138
 			// if we receive an internal error from the initial start of a container then lets
139 139
 			// return it instead of entering the restart loop
140 140
 			if m.container.RestartCount == 0 {
141
-				m.resetContainer()
141
+				m.resetContainer(false)
142 142
 
143 143
 				return err
144 144
 			}
... ...
@@ -154,7 +154,7 @@ func (m *containerMonitor) Start() error {
154 154
 		if m.shouldRestart(exitStatus) {
155 155
 			m.container.State.SetRestarting(exitStatus)
156 156
 			m.container.LogEvent("die")
157
-			m.resetContainer()
157
+			m.resetContainer(true)
158 158
 
159 159
 			// sleep with a small time increment between each restart to help avoid issues cased by quickly
160 160
 			// restarting the container because of some types of errors ( networking cut out, etc... )
... ...
@@ -168,7 +168,7 @@ func (m *containerMonitor) Start() error {
168 168
 			continue
169 169
 		}
170 170
 		m.container.LogEvent("die")
171
-		m.resetContainer()
171
+		m.resetContainer(true)
172 172
 		return err
173 173
 	}
174 174
 }
... ...
@@ -243,7 +243,7 @@ func (m *containerMonitor) callback(command *execdriver.Command) {
243 243
 		}
244 244
 	}
245 245
 
246
-	m.container.State.SetRunning(command.Pid())
246
+	m.container.State.setRunning(command.Pid())
247 247
 
248 248
 	// signal that the process has started
249 249
 	// close channel only if not closed
... ...
@@ -260,8 +260,13 @@ func (m *containerMonitor) callback(command *execdriver.Command) {
260 260
 
261 261
 // resetContainer resets the container's IO and ensures that the command is able to be executed again
262 262
 // by copying the data into a new struct
263
-func (m *containerMonitor) resetContainer() {
263
+// if lock is true, then container locked during reset
264
+func (m *containerMonitor) resetContainer(lock bool) {
264 265
 	container := m.container
266
+	if lock {
267
+		container.Lock()
268
+		defer container.Unlock()
269
+	}
265 270
 
266 271
 	if container.Config.OpenStdin {
267 272
 		if err := container.stdin.Close(); err != nil {
... ...
@@ -1,7 +1,6 @@
1 1
 package daemon
2 2
 
3 3
 import (
4
-	"encoding/json"
5 4
 	"fmt"
6 5
 	"sync"
7 6
 	"time"
... ...
@@ -10,7 +9,7 @@ import (
10 10
 )
11 11
 
12 12
 type State struct {
13
-	sync.RWMutex
13
+	sync.Mutex
14 14
 	Running    bool
15 15
 	Paused     bool
16 16
 	Restarting bool
... ...
@@ -29,9 +28,6 @@ func NewState() *State {
29 29
 
30 30
 // String returns a human-readable description of the state
31 31
 func (s *State) String() string {
32
-	s.RLock()
33
-	defer s.RUnlock()
34
-
35 32
 	if s.Running {
36 33
 		if s.Paused {
37 34
 			return fmt.Sprintf("Up %s (Paused)", units.HumanDuration(time.Now().UTC().Sub(s.StartedAt)))
... ...
@@ -50,16 +46,6 @@ func (s *State) String() string {
50 50
 	return fmt.Sprintf("Exited (%d) %s ago", s.ExitCode, units.HumanDuration(time.Now().UTC().Sub(s.FinishedAt)))
51 51
 }
52 52
 
53
-type jState State
54
-
55
-// MarshalJSON for state is needed to avoid race conditions on inspect
56
-func (s *State) MarshalJSON() ([]byte, error) {
57
-	s.RLock()
58
-	b, err := json.Marshal(jState(*s))
59
-	s.RUnlock()
60
-	return b, err
61
-}
62
-
63 53
 func wait(waitChan <-chan struct{}, timeout time.Duration) error {
64 54
 	if timeout < 0 {
65 55
 		<-waitChan
... ...
@@ -77,14 +63,14 @@ func wait(waitChan <-chan struct{}, timeout time.Duration) error {
77 77
 // immediatly. If you want wait forever you must supply negative timeout.
78 78
 // Returns pid, that was passed to SetRunning
79 79
 func (s *State) WaitRunning(timeout time.Duration) (int, error) {
80
-	s.RLock()
81
-	if s.IsRunning() {
80
+	s.Lock()
81
+	if s.Running {
82 82
 		pid := s.Pid
83
-		s.RUnlock()
83
+		s.Unlock()
84 84
 		return pid, nil
85 85
 	}
86 86
 	waitChan := s.waitChan
87
-	s.RUnlock()
87
+	s.Unlock()
88 88
 	if err := wait(waitChan, timeout); err != nil {
89 89
 		return -1, err
90 90
 	}
... ...
@@ -95,14 +81,14 @@ func (s *State) WaitRunning(timeout time.Duration) (int, error) {
95 95
 // immediatly. If you want wait forever you must supply negative timeout.
96 96
 // Returns exit code, that was passed to SetStopped
97 97
 func (s *State) WaitStop(timeout time.Duration) (int, error) {
98
-	s.RLock()
98
+	s.Lock()
99 99
 	if !s.Running {
100 100
 		exitCode := s.ExitCode
101
-		s.RUnlock()
101
+		s.Unlock()
102 102
 		return exitCode, nil
103 103
 	}
104 104
 	waitChan := s.waitChan
105
-	s.RUnlock()
105
+	s.Unlock()
106 106
 	if err := wait(waitChan, timeout); err != nil {
107 107
 		return -1, err
108 108
 	}
... ...
@@ -110,28 +96,33 @@ func (s *State) WaitStop(timeout time.Duration) (int, error) {
110 110
 }
111 111
 
112 112
 func (s *State) IsRunning() bool {
113
-	s.RLock()
113
+	s.Lock()
114 114
 	res := s.Running
115
-	s.RUnlock()
115
+	s.Unlock()
116 116
 	return res
117 117
 }
118 118
 
119 119
 func (s *State) GetPid() int {
120
-	s.RLock()
120
+	s.Lock()
121 121
 	res := s.Pid
122
-	s.RUnlock()
122
+	s.Unlock()
123 123
 	return res
124 124
 }
125 125
 
126 126
 func (s *State) GetExitCode() int {
127
-	s.RLock()
127
+	s.Lock()
128 128
 	res := s.ExitCode
129
-	s.RUnlock()
129
+	s.Unlock()
130 130
 	return res
131 131
 }
132 132
 
133 133
 func (s *State) SetRunning(pid int) {
134 134
 	s.Lock()
135
+	s.setRunning(pid)
136
+	s.Unlock()
137
+}
138
+
139
+func (s *State) setRunning(pid int) {
135 140
 	s.Running = true
136 141
 	s.Paused = false
137 142
 	s.Restarting = false
... ...
@@ -140,11 +131,15 @@ func (s *State) SetRunning(pid int) {
140 140
 	s.StartedAt = time.Now().UTC()
141 141
 	close(s.waitChan) // fire waiters for start
142 142
 	s.waitChan = make(chan struct{})
143
-	s.Unlock()
144 143
 }
145 144
 
146 145
 func (s *State) SetStopped(exitCode int) {
147 146
 	s.Lock()
147
+	s.setStopped(exitCode)
148
+	s.Unlock()
149
+}
150
+
151
+func (s *State) setStopped(exitCode int) {
148 152
 	s.Running = false
149 153
 	s.Restarting = false
150 154
 	s.Pid = 0
... ...
@@ -152,7 +147,6 @@ func (s *State) SetStopped(exitCode int) {
152 152
 	s.ExitCode = exitCode
153 153
 	close(s.waitChan) // fire waiters for stop
154 154
 	s.waitChan = make(chan struct{})
155
-	s.Unlock()
156 155
 }
157 156
 
158 157
 // SetRestarting is when docker hanldes the auto restart of containers when they are
... ...
@@ -172,9 +166,9 @@ func (s *State) SetRestarting(exitCode int) {
172 172
 }
173 173
 
174 174
 func (s *State) IsRestarting() bool {
175
-	s.RLock()
175
+	s.Lock()
176 176
 	res := s.Restarting
177
-	s.RUnlock()
177
+	s.Unlock()
178 178
 	return res
179 179
 }
180 180
 
... ...
@@ -191,8 +185,8 @@ func (s *State) SetUnpaused() {
191 191
 }
192 192
 
193 193
 func (s *State) IsPaused() bool {
194
-	s.RLock()
194
+	s.Lock()
195 195
 	res := s.Paused
196
-	s.RUnlock()
196
+	s.Unlock()
197 197
 	return res
198 198
 }