Browse code

Do not remove containers from stats list on err

Before this patch, containers are silently removed from the stats list
on error. This patch instead will display `--` for all fields for the
container that had the error, allowing it to recover from errors.

Signed-off-by: Brian Goff <cpuguy83@gmail.com>

Brian Goff authored on 2016/03/02 07:10:13
Showing 4 changed files
... ...
@@ -10,6 +10,7 @@ import (
10 10
 
11 11
 	"golang.org/x/net/context"
12 12
 
13
+	"github.com/Sirupsen/logrus"
13 14
 	Cli "github.com/docker/docker/cli"
14 15
 	"github.com/docker/engine-api/types"
15 16
 	"github.com/docker/engine-api/types/events"
... ...
@@ -169,20 +170,12 @@ func (cli *DockerCli) CmdStats(args ...string) error {
169 169
 
170 170
 	for range time.Tick(500 * time.Millisecond) {
171 171
 		printHeader()
172
-		toRemove := []int{}
173 172
 		cStats.mu.Lock()
174
-		for i, s := range cStats.cs {
173
+		for _, s := range cStats.cs {
175 174
 			if err := s.Display(w); err != nil && !*noStream {
176
-				toRemove = append(toRemove, i)
175
+				logrus.Debugf("stats: got error for %s: %v", s.Name, err)
177 176
 			}
178 177
 		}
179
-		for j := len(toRemove) - 1; j >= 0; j-- {
180
-			i := toRemove[j]
181
-			cStats.cs = append(cStats.cs[:i], cStats.cs[i+1:]...)
182
-		}
183
-		if len(cStats.cs) == 0 && !showAll {
184
-			return nil
185
-		}
186 178
 		cStats.mu.Unlock()
187 179
 		w.Flush()
188 180
 		if *noStream {
... ...
@@ -2,12 +2,14 @@ package client
2 2
 
3 3
 import (
4 4
 	"encoding/json"
5
+	"errors"
5 6
 	"fmt"
6 7
 	"io"
7 8
 	"strings"
8 9
 	"sync"
9 10
 	"time"
10 11
 
12
+	"github.com/Sirupsen/logrus"
11 13
 	"github.com/docker/engine-api/client"
12 14
 	"github.com/docker/engine-api/types"
13 15
 	"github.com/docker/go-units"
... ...
@@ -25,7 +27,7 @@ type containerStats struct {
25 25
 	BlockRead        float64
26 26
 	BlockWrite       float64
27 27
 	PidsCurrent      uint64
28
-	mu               sync.RWMutex
28
+	mu               sync.Mutex
29 29
 	err              error
30 30
 }
31 31
 
... ...
@@ -62,6 +64,7 @@ func (s *stats) isKnownContainer(cid string) (int, bool) {
62 62
 }
63 63
 
64 64
 func (s *containerStats) Collect(cli client.APIClient, streamStats bool, waitFirst *sync.WaitGroup) {
65
+	logrus.Debugf("collecting stats for %s", s.Name)
65 66
 	var (
66 67
 		getFirst       bool
67 68
 		previousCPU    uint64
... ...
@@ -90,9 +93,11 @@ func (s *containerStats) Collect(cli client.APIClient, streamStats bool, waitFir
90 90
 	go func() {
91 91
 		for {
92 92
 			var v *types.StatsJSON
93
+
93 94
 			if err := dec.Decode(&v); err != nil {
95
+				dec = json.NewDecoder(io.MultiReader(dec.Buffered(), responseBody))
94 96
 				u <- err
95
-				return
97
+				continue
96 98
 			}
97 99
 
98 100
 			var memPercent = 0.0
... ...
@@ -139,6 +144,7 @@ func (s *containerStats) Collect(cli client.APIClient, streamStats bool, waitFir
139 139
 			s.BlockRead = 0
140 140
 			s.BlockWrite = 0
141 141
 			s.PidsCurrent = 0
142
+			s.err = errors.New("timeout waiting for stats")
142 143
 			s.mu.Unlock()
143 144
 			// if this is the first stat you get, release WaitGroup
144 145
 			if !getFirst {
... ...
@@ -150,8 +156,9 @@ func (s *containerStats) Collect(cli client.APIClient, streamStats bool, waitFir
150 150
 				s.mu.Lock()
151 151
 				s.err = err
152 152
 				s.mu.Unlock()
153
-				return
153
+				continue
154 154
 			}
155
+			s.err = nil
155 156
 			// if this is the first stat you get, release WaitGroup
156 157
 			if !getFirst {
157 158
 				getFirst = true
... ...
@@ -165,12 +172,20 @@ func (s *containerStats) Collect(cli client.APIClient, streamStats bool, waitFir
165 165
 }
166 166
 
167 167
 func (s *containerStats) Display(w io.Writer) error {
168
-	s.mu.RLock()
169
-	defer s.mu.RUnlock()
168
+	s.mu.Lock()
169
+	defer s.mu.Unlock()
170
+	// NOTE: if you change this format, you must also change the err format below!
171
+	format := "%s\t%.2f%%\t%s / %s\t%.2f%%\t%s / %s\t%s / %s\t%d\n"
170 172
 	if s.err != nil {
171
-		return s.err
173
+		format = "%s\t%s\t%s / %s\t%s\t%s / %s\t%s / %s\t%s\n"
174
+		errStr := "--"
175
+		fmt.Fprintf(w, format,
176
+			s.Name, errStr, errStr, errStr, errStr, errStr, errStr, errStr, errStr, errStr,
177
+		)
178
+		err := s.err
179
+		return err
172 180
 	}
173
-	fmt.Fprintf(w, "%s\t%.2f%%\t%s / %s\t%.2f%%\t%s / %s\t%s / %s\t%d\n",
181
+	fmt.Fprintf(w, format,
174 182
 		s.Name,
175 183
 		s.CPUPercentage,
176 184
 		units.BytesSize(s.Memory), units.BytesSize(s.MemoryLimit),
... ...
@@ -2,7 +2,6 @@ package client
2 2
 
3 3
 import (
4 4
 	"bytes"
5
-	"sync"
6 5
 	"testing"
7 6
 
8 7
 	"github.com/docker/engine-api/types"
... ...
@@ -20,7 +19,6 @@ func TestDisplay(t *testing.T) {
20 20
 		BlockRead:        100 * 1024 * 1024,
21 21
 		BlockWrite:       800 * 1024 * 1024,
22 22
 		PidsCurrent:      1,
23
-		mu:               sync.RWMutex{},
24 23
 	}
25 24
 	var b bytes.Buffer
26 25
 	if err := c.Display(&b); err != nil {
... ...
@@ -120,10 +120,7 @@ func (s *DockerSuite) TestStatsAllNoStream(c *check.C) {
120 120
 
121 121
 func (s *DockerSuite) TestStatsAllNewContainersAdded(c *check.C) {
122 122
 	// Windows does not support stats
123
-	// TODO: remove SameHostDaemon
124
-	//	The reason it was added is because, there seems to be some race that makes this test fail
125
-	//	for remote daemons (namely in the win2lin CI). We highly welcome contributions to fix this.
126
-	testRequires(c, DaemonIsLinux, SameHostDaemon)
123
+	testRequires(c, DaemonIsLinux)
127 124
 
128 125
 	id := make(chan string)
129 126
 	addedChan := make(chan struct{})