Browse code

Decouple removing the fileWatcher from reading

Fixes #27779

Currently `followLogs` can get into a deadlock if we receive an inotify
IN_MODIFY event while we are trying to close the `fileWatcher`. This is
due to the fact that closing the `fileWatcher` happens in the same block
as consumes events from the `fileWatcher`. We are trying to run
`fileWatcher.Close`, which is waiting for an IN_IGNORE event to come in
over inotify to confirm the watch was been removed. But, because an
IN_MODIFY event has appeared after `Close` was entered but before the
IN_IGNORE, the broadcast never comes. The IN_MODIFY cannot be consumed
as the events channel is unbuffered and the only `select` that reads
from it is busy waiting for the IN_IGNORE event.

In order to try and fix this race condition I've moved the removal of
the `fileWatcher` out to a separate go block that waits for a signal to
close, removes the watcher and then signals to the previous selects on
the close signal.

This has introduced a `fileWatcher.Remove` in the final case, but if we
try and remove a watcher that does not exist it will just return an
error saying so. We are not doing any checking on the return of `Remove`
so this shouldn't cause any side-effects.

Signed-off-by: Tom Booth <tombooth@gmail.com>

Tom Booth authored on 2016/10/26 21:56:10
Showing 1 changed files
... ...
@@ -9,6 +9,7 @@ import (
9 9
 	"os"
10 10
 	"time"
11 11
 
12
+	"golang.org/x/net/context"
12 13
 	"gopkg.in/fsnotify.v1"
13 14
 
14 15
 	"github.com/Sirupsen/logrus"
... ...
@@ -172,9 +173,22 @@ func followLogs(f *os.File, logWatcher *logger.LogWatcher, notifyRotate chan int
172 172
 	}
173 173
 	defer func() {
174 174
 		f.Close()
175
+		fileWatcher.Remove(name)
175 176
 		fileWatcher.Close()
176 177
 	}()
177 178
 
179
+	ctx, cancel := context.WithCancel(context.Background())
180
+	defer cancel()
181
+	go func() {
182
+		select {
183
+		case <-logWatcher.WatchClose():
184
+			fileWatcher.Remove(name)
185
+			cancel()
186
+		case <-ctx.Done():
187
+			return
188
+		}
189
+	}()
190
+
178 191
 	var retries int
179 192
 	handleRotate := func() error {
180 193
 		f.Close()
... ...
@@ -209,8 +223,7 @@ func followLogs(f *os.File, logWatcher *logger.LogWatcher, notifyRotate chan int
209 209
 			case fsnotify.Rename, fsnotify.Remove:
210 210
 				select {
211 211
 				case <-notifyRotate:
212
-				case <-logWatcher.WatchClose():
213
-					fileWatcher.Remove(name)
212
+				case <-ctx.Done():
214 213
 					return errDone
215 214
 				}
216 215
 				if err := handleRotate(); err != nil {
... ...
@@ -232,8 +245,7 @@ func followLogs(f *os.File, logWatcher *logger.LogWatcher, notifyRotate chan int
232 232
 				return errRetry
233 233
 			}
234 234
 			return err
235
-		case <-logWatcher.WatchClose():
236
-			fileWatcher.Remove(name)
235
+		case <-ctx.Done():
237 236
 			return errDone
238 237
 		}
239 238
 	}
... ...
@@ -290,7 +302,7 @@ func followLogs(f *os.File, logWatcher *logger.LogWatcher, notifyRotate chan int
290 290
 		}
291 291
 		select {
292 292
 		case logWatcher.Msg <- msg:
293
-		case <-logWatcher.WatchClose():
293
+		case <-ctx.Done():
294 294
 			logWatcher.Msg <- msg
295 295
 			for {
296 296
 				msg, err := decodeLogLine(dec, l)