Browse code

journald logs: drain 1 more time at container exit

In the journald log driver, attempt to drain the journal 1 more time
after being told to stop following the log. Due to a possible race
condition, sometimes data is written to the journal at almost the same
time the log watch is closed, and depending on the order of operations,
sometimes you miss the last journal entry.

Signed-off-by: Andy Goldstein <agoldste@redhat.com>

Andy Goldstein authored on 2016/12/21 23:17:12
Showing 1 changed files
... ...
@@ -245,20 +245,28 @@ func (s *journald) followJournal(logWatcher *logger.LogWatcher, config logger.Re
245 245
 	s.readers.mu.Lock()
246 246
 	s.readers.readers[logWatcher] = logWatcher
247 247
 	s.readers.mu.Unlock()
248
+
248 249
 	go func() {
249
-		// Keep copying journal data out until we're notified to stop
250
-		// or we hit an error.
251
-		status := C.wait_for_data_cancelable(j, pfd[0])
252
-		for status == 1 {
250
+		for {
251
+			// Keep copying journal data out until we're notified to stop
252
+			// or we hit an error.
253
+			status := C.wait_for_data_cancelable(j, pfd[0])
254
+			if status < 0 {
255
+				cerrstr := C.strerror(C.int(-status))
256
+				errstr := C.GoString(cerrstr)
257
+				fmtstr := "error %q while attempting to follow journal for container %q"
258
+				logrus.Errorf(fmtstr, errstr, s.vars["CONTAINER_ID_FULL"])
259
+				break
260
+			}
261
+
253 262
 			cursor = s.drainJournal(logWatcher, config, j, cursor)
254
-			status = C.wait_for_data_cancelable(j, pfd[0])
255
-		}
256
-		if status < 0 {
257
-			cerrstr := C.strerror(C.int(-status))
258
-			errstr := C.GoString(cerrstr)
259
-			fmtstr := "error %q while attempting to follow journal for container %q"
260
-			logrus.Errorf(fmtstr, errstr, s.vars["CONTAINER_ID_FULL"])
263
+
264
+			if status != 1 {
265
+				// We were notified to stop
266
+				break
267
+			}
261 268
 		}
269
+
262 270
 		// Clean up.
263 271
 		C.close(pfd[0])
264 272
 		s.readers.mu.Lock()
... ...
@@ -267,6 +275,7 @@ func (s *journald) followJournal(logWatcher *logger.LogWatcher, config logger.Re
267 267
 		C.sd_journal_close(j)
268 268
 		close(logWatcher.Msg)
269 269
 	}()
270
+
270 271
 	// Wait until we're told to stop.
271 272
 	select {
272 273
 	case <-logWatcher.WatchClose():