Browse code

Evict stopped containers

Signed-off-by: Michael Crosby <crosbymichael@gmail.com>

Michael Crosby authored on 2015/01/08 11:02:08
Showing 15 changed files
... ...
@@ -16,15 +16,16 @@ import (
16 16
 	"path"
17 17
 	"path/filepath"
18 18
 	"runtime"
19
-	"sort"
20 19
 	"strconv"
21 20
 	"strings"
21
+	"sync"
22 22
 	"text/tabwriter"
23 23
 	"text/template"
24 24
 	"time"
25 25
 
26 26
 	log "github.com/Sirupsen/logrus"
27 27
 	"github.com/docker/docker/api"
28
+	"github.com/docker/docker/api/stats"
28 29
 	"github.com/docker/docker/dockerversion"
29 30
 	"github.com/docker/docker/engine"
30 31
 	"github.com/docker/docker/graph"
... ...
@@ -43,7 +44,6 @@ import (
43 43
 	"github.com/docker/docker/pkg/urlutil"
44 44
 	"github.com/docker/docker/registry"
45 45
 	"github.com/docker/docker/runconfig"
46
-	"github.com/docker/docker/stats"
47 46
 	"github.com/docker/docker/utils"
48 47
 	"github.com/docker/libtrust"
49 48
 )
... ...
@@ -2625,25 +2625,10 @@ type containerStats struct {
2625 2625
 	Name             string
2626 2626
 	CpuPercentage    float64
2627 2627
 	Memory           float64
2628
+	MemoryLimit      float64
2628 2629
 	MemoryPercentage float64
2629
-	NetworkRx        int
2630
-	NetworkTx        int
2631
-}
2632
-
2633
-type statSorter struct {
2634
-	stats []containerStats
2635
-}
2636
-
2637
-func (s *statSorter) Len() int {
2638
-	return len(s.stats)
2639
-}
2640
-
2641
-func (s *statSorter) Swap(i, j int) {
2642
-	s.stats[i], s.stats[j] = s.stats[j], s.stats[i]
2643
-}
2644
-
2645
-func (s *statSorter) Less(i, j int) bool {
2646
-	return s.stats[i].Name < s.stats[j].Name
2630
+	NetworkRx        float64
2631
+	NetworkTx        float64
2647 2632
 }
2648 2633
 
2649 2634
 func (cli *DockerCli) CmdStats(args ...string) error {
... ...
@@ -2651,40 +2636,49 @@ func (cli *DockerCli) CmdStats(args ...string) error {
2651 2651
 	cmd.Require(flag.Min, 1)
2652 2652
 	utils.ParseFlags(cmd, args, true)
2653 2653
 
2654
+	m := &sync.Mutex{}
2654 2655
 	cStats := map[string]containerStats{}
2655 2656
 	for _, name := range cmd.Args() {
2656
-		go cli.streamStats(name, cStats)
2657
+		go cli.streamStats(name, cStats, m)
2657 2658
 	}
2658 2659
 	w := tabwriter.NewWriter(cli.out, 20, 1, 3, ' ', 0)
2659
-	for _ = range time.Tick(1000 * time.Millisecond) {
2660
+	for _ = range time.Tick(500 * time.Millisecond) {
2660 2661
 		fmt.Fprint(cli.out, "\033[2J")
2661 2662
 		fmt.Fprint(cli.out, "\033[H")
2662
-		fmt.Fprintln(w, "CONTAINER\tCPU %\tMEM\tMEM %\tNET I/O")
2663
-		sStats := []containerStats{}
2664
-		for _, s := range cStats {
2665
-			sStats = append(sStats, s)
2666
-		}
2667
-		sorter := &statSorter{sStats}
2668
-		sort.Sort(sorter)
2669
-		for _, s := range sStats {
2670
-			fmt.Fprintf(w, "%s\t%f%%\t%s\t%f%%\t%d/%d\n",
2663
+		fmt.Fprintln(w, "CONTAINER\tCPU %\tMEM USAGE/LIMIT\tMEM %\tNET I/O")
2664
+		m.Lock()
2665
+		ss := sortStatsByName(cStats)
2666
+		m.Unlock()
2667
+		for _, s := range ss {
2668
+			fmt.Fprintf(w, "%s\t%.2f%%\t%s/%s\t%.2f%%\t%s/%s\n",
2671 2669
 				s.Name,
2672 2670
 				s.CpuPercentage,
2673
-				units.HumanSize(s.Memory),
2671
+				units.BytesSize(s.Memory), units.BytesSize(s.MemoryLimit),
2674 2672
 				s.MemoryPercentage,
2675
-				s.NetworkRx, s.NetworkTx)
2673
+				units.BytesSize(s.NetworkRx), units.BytesSize(s.NetworkTx))
2676 2674
 		}
2677 2675
 		w.Flush()
2678 2676
 	}
2679 2677
 	return nil
2680 2678
 }
2681 2679
 
2682
-func (cli *DockerCli) streamStats(name string, data map[string]containerStats) error {
2680
+func (cli *DockerCli) streamStats(name string, data map[string]containerStats, m *sync.Mutex) error {
2681
+	m.Lock()
2682
+	data[name] = containerStats{
2683
+		Name: name,
2684
+	}
2685
+	m.Unlock()
2686
+
2683 2687
 	stream, _, err := cli.call("GET", "/containers/"+name+"/stats", nil, false)
2684 2688
 	if err != nil {
2685 2689
 		return err
2686 2690
 	}
2687
-
2691
+	defer func() {
2692
+		stream.Close()
2693
+		m.Lock()
2694
+		delete(data, name)
2695
+		m.Unlock()
2696
+	}()
2688 2697
 	var (
2689 2698
 		previousCpu    uint64
2690 2699
 		previousSystem uint64
... ...
@@ -2696,30 +2690,37 @@ func (cli *DockerCli) streamStats(name string, data map[string]containerStats) e
2696 2696
 		if err := dec.Decode(&v); err != nil {
2697 2697
 			return err
2698 2698
 		}
2699
-		memPercent := float64(v.MemoryStats.Usage) / float64(v.MemoryStats.Limit) * 100.0
2700
-		cpuPercent := 0.0
2701
-
2699
+		var (
2700
+			memPercent = float64(v.MemoryStats.Usage) / float64(v.MemoryStats.Limit) * 100.0
2701
+			cpuPercent = 0.0
2702
+		)
2702 2703
 		if !start {
2703
-			cpuDelta := float64(v.CpuStats.CpuUsage.TotalUsage) - float64(previousCpu)
2704
-			systemDelta := float64(int(v.CpuStats.SystemUsage)/v.ClockTicks) - float64(int(previousSystem)/v.ClockTicks)
2705
-
2706
-			if systemDelta > 0.0 {
2707
-				cpuPercent = (cpuDelta / systemDelta) * float64(v.ClockTicks*len(v.CpuStats.CpuUsage.PercpuUsage))
2708
-			}
2704
+			cpuPercent = calcuateCpuPercent(previousCpu, previousSystem, v)
2709 2705
 		}
2710 2706
 		start = false
2707
+		m.Lock()
2711 2708
 		d := data[name]
2712
-		d.Name = name
2713 2709
 		d.CpuPercentage = cpuPercent
2714 2710
 		d.Memory = float64(v.MemoryStats.Usage)
2711
+		d.MemoryLimit = float64(v.MemoryStats.Limit)
2715 2712
 		d.MemoryPercentage = memPercent
2716
-		d.NetworkRx = int(v.Network.RxBytes)
2717
-		d.NetworkTx = int(v.Network.TxBytes)
2713
+		d.NetworkRx = float64(v.Network.RxBytes)
2714
+		d.NetworkTx = float64(v.Network.TxBytes)
2718 2715
 		data[name] = d
2716
+		m.Unlock()
2719 2717
 
2720 2718
 		previousCpu = v.CpuStats.CpuUsage.TotalUsage
2721 2719
 		previousSystem = v.CpuStats.SystemUsage
2722 2720
 	}
2723 2721
 	return nil
2722
+}
2724 2723
 
2724
+func calcuateCpuPercent(previousCpu, previousSystem uint64, v *stats.Stats) float64 {
2725
+	cpuPercent := 0.0
2726
+	cpuDelta := float64(v.CpuStats.CpuUsage.TotalUsage) - float64(previousCpu)
2727
+	systemDelta := float64(int(v.CpuStats.SystemUsage)/v.ClockTicks) - float64(int(previousSystem)/v.ClockTicks)
2728
+	if systemDelta > 0.0 {
2729
+		cpuPercent = (cpuDelta / systemDelta) * float64(v.ClockTicks*len(v.CpuStats.CpuUsage.PercpuUsage))
2730
+	}
2731
+	return cpuPercent
2725 2732
 }
2726 2733
new file mode 100644
... ...
@@ -0,0 +1,29 @@
0
+package client
1
+
2
+import "sort"
3
+
4
+func sortStatsByName(cStats map[string]containerStats) []containerStats {
5
+	sStats := []containerStats{}
6
+	for _, s := range cStats {
7
+		sStats = append(sStats, s)
8
+	}
9
+	sorter := &statSorter{sStats}
10
+	sort.Sort(sorter)
11
+	return sStats
12
+}
13
+
14
+type statSorter struct {
15
+	stats []containerStats
16
+}
17
+
18
+func (s *statSorter) Len() int {
19
+	return len(s.stats)
20
+}
21
+
22
+func (s *statSorter) Swap(i, j int) {
23
+	s.stats[i], s.stats[j] = s.stats[j], s.stats[i]
24
+}
25
+
26
+func (s *statSorter) Less(i, j int) bool {
27
+	return s.stats[i].Name < s.stats[j].Name
28
+}
0 29
new file mode 100644
... ...
@@ -0,0 +1,158 @@
0
+package stats
1
+
2
+import (
3
+	"time"
4
+
5
+	"github.com/docker/libcontainer"
6
+	"github.com/docker/libcontainer/cgroups"
7
+)
8
+
9
+type ThrottlingData struct {
10
+	// Number of periods with throttling active
11
+	Periods uint64 `json:"periods,omitempty"`
12
+	// Number of periods when the container hit its throttling limit.
13
+	ThrottledPeriods uint64 `json:"throttled_periods,omitempty"`
14
+	// Aggregate time the container was throttled for in nanoseconds.
15
+	ThrottledTime uint64 `json:"throttled_time,omitempty"`
16
+}
17
+
18
+// All CPU stats are aggregated since container inception.
19
+type CpuUsage struct {
20
+	// Total CPU time consumed.
21
+	// Units: nanoseconds.
22
+	TotalUsage uint64 `json:"total_usage,omitempty"`
23
+	// Total CPU time consumed per core.
24
+	// Units: nanoseconds.
25
+	PercpuUsage []uint64 `json:"percpu_usage,omitempty"`
26
+	// Time spent by tasks of the cgroup in kernel mode.
27
+	// Units: nanoseconds.
28
+	UsageInKernelmode uint64 `json:"usage_in_kernelmode"`
29
+	// Time spent by tasks of the cgroup in user mode.
30
+	// Units: nanoseconds.
31
+	UsageInUsermode uint64 `json:"usage_in_usermode"`
32
+}
33
+
34
+type CpuStats struct {
35
+	CpuUsage       CpuUsage       `json:"cpu_usage,omitempty"`
36
+	SystemUsage    uint64         `json:"system_cpu_usage"`
37
+	ThrottlingData ThrottlingData `json:"throttling_data,omitempty"`
38
+}
39
+
40
+type MemoryStats struct {
41
+	// current res_counter usage for memory
42
+	Usage uint64 `json:"usage,omitempty"`
43
+	// maximum usage ever recorded.
44
+	MaxUsage uint64 `json:"max_usage,omitempty"`
45
+	// TODO(vishh): Export these as stronger types.
46
+	// all the stats exported via memory.stat.
47
+	Stats map[string]uint64 `json:"stats,omitempty"`
48
+	// number of times memory usage hits limits.
49
+	Failcnt uint64 `json:"failcnt"`
50
+	Limit   uint64 `json:"limit"`
51
+}
52
+
53
+type BlkioStatEntry struct {
54
+	Major uint64 `json:"major,omitempty"`
55
+	Minor uint64 `json:"minor,omitempty"`
56
+	Op    string `json:"op,omitempty"`
57
+	Value uint64 `json:"value,omitempty"`
58
+}
59
+
60
+type BlkioStats struct {
61
+	// number of bytes tranferred to and from the block device
62
+	IoServiceBytesRecursive []BlkioStatEntry `json:"io_service_bytes_recursive,omitempty"`
63
+	IoServicedRecursive     []BlkioStatEntry `json:"io_serviced_recursive,omitempty"`
64
+	IoQueuedRecursive       []BlkioStatEntry `json:"io_queue_recursive,omitempty"`
65
+	IoServiceTimeRecursive  []BlkioStatEntry `json:"io_service_time_recursive,omitempty"`
66
+	IoWaitTimeRecursive     []BlkioStatEntry `json:"io_wait_time_recursive,omitempty"`
67
+	IoMergedRecursive       []BlkioStatEntry `json:"io_merged_recursive,omitempty"`
68
+	IoTimeRecursive         []BlkioStatEntry `json:"io_time_recursive,omitempty"`
69
+	SectorsRecursive        []BlkioStatEntry `json:"sectors_recursive,omitempty"`
70
+}
71
+
72
+type Network struct {
73
+	RxBytes   uint64 `json:"rx_bytes"`
74
+	RxPackets uint64 `json:"rx_packets"`
75
+	RxErrors  uint64 `json:"rx_errors"`
76
+	RxDropped uint64 `json:"rx_dropped"`
77
+	TxBytes   uint64 `json:"tx_bytes"`
78
+	TxPackets uint64 `json:"tx_packets"`
79
+	TxErrors  uint64 `json:"tx_errors"`
80
+	TxDropped uint64 `json:"tx_dropped"`
81
+}
82
+
83
+type Stats struct {
84
+	Read        time.Time   `json:"read"`
85
+	ClockTicks  int         `json:"clock_ticks"`
86
+	Interval    int         `json:"interval"` // in ms
87
+	Network     Network     `json:"network,omitempty"`
88
+	CpuStats    CpuStats    `json:"cpu_stats,omitempty"`
89
+	MemoryStats MemoryStats `json:"memory_stats,omitempty"`
90
+	BlkioStats  BlkioStats  `json:"blkio_stats,omitempty"`
91
+}
92
+
93
+// ToStats converts the libcontainer.ContainerStats to the api specific
94
+// structs.  This is done to preserve API compatibility and versioning.
95
+func ToStats(ls *libcontainer.ContainerStats) *Stats {
96
+	s := &Stats{}
97
+	if ls.NetworkStats != nil {
98
+		s.Network = Network{
99
+			RxBytes:   ls.NetworkStats.RxBytes,
100
+			RxPackets: ls.NetworkStats.RxPackets,
101
+			RxErrors:  ls.NetworkStats.RxErrors,
102
+			RxDropped: ls.NetworkStats.RxDropped,
103
+			TxBytes:   ls.NetworkStats.TxBytes,
104
+			TxPackets: ls.NetworkStats.TxPackets,
105
+			TxErrors:  ls.NetworkStats.TxErrors,
106
+			TxDropped: ls.NetworkStats.TxDropped,
107
+		}
108
+	}
109
+	cs := ls.CgroupStats
110
+	if cs != nil {
111
+		s.BlkioStats = BlkioStats{
112
+			IoServiceBytesRecursive: copyBlkioEntry(cs.BlkioStats.IoServiceBytesRecursive),
113
+			IoServicedRecursive:     copyBlkioEntry(cs.BlkioStats.IoServicedRecursive),
114
+			IoQueuedRecursive:       copyBlkioEntry(cs.BlkioStats.IoQueuedRecursive),
115
+			IoServiceTimeRecursive:  copyBlkioEntry(cs.BlkioStats.IoServiceTimeRecursive),
116
+			IoWaitTimeRecursive:     copyBlkioEntry(cs.BlkioStats.IoWaitTimeRecursive),
117
+			IoMergedRecursive:       copyBlkioEntry(cs.BlkioStats.IoMergedRecursive),
118
+			IoTimeRecursive:         copyBlkioEntry(cs.BlkioStats.IoTimeRecursive),
119
+			SectorsRecursive:        copyBlkioEntry(cs.BlkioStats.SectorsRecursive),
120
+		}
121
+		cpu := cs.CpuStats
122
+		s.CpuStats = CpuStats{
123
+			CpuUsage: CpuUsage{
124
+				TotalUsage:        cpu.CpuUsage.TotalUsage,
125
+				PercpuUsage:       cpu.CpuUsage.PercpuUsage,
126
+				UsageInKernelmode: cpu.CpuUsage.UsageInKernelmode,
127
+				UsageInUsermode:   cpu.CpuUsage.UsageInUsermode,
128
+			},
129
+			ThrottlingData: ThrottlingData{
130
+				Periods:          cpu.ThrottlingData.Periods,
131
+				ThrottledPeriods: cpu.ThrottlingData.ThrottledPeriods,
132
+				ThrottledTime:    cpu.ThrottlingData.ThrottledTime,
133
+			},
134
+		}
135
+		mem := cs.MemoryStats
136
+		s.MemoryStats = MemoryStats{
137
+			Usage:    mem.Usage,
138
+			MaxUsage: mem.MaxUsage,
139
+			Stats:    mem.Stats,
140
+			Failcnt:  mem.Failcnt,
141
+		}
142
+	}
143
+	return s
144
+}
145
+
146
+func copyBlkioEntry(entries []cgroups.BlkioStatEntry) []BlkioStatEntry {
147
+	out := make([]BlkioStatEntry, len(entries))
148
+	for i, re := range entries {
149
+		out[i] = BlkioStatEntry{
150
+			Major: re.Major,
151
+			Minor: re.Minor,
152
+			Op:    re.Op,
153
+			Value: re.Value,
154
+		}
155
+	}
156
+	return out
157
+}
... ...
@@ -1416,8 +1416,5 @@ func (container *Container) getNetworkedContainer() (*Container, error) {
1416 1416
 }
1417 1417
 
1418 1418
 func (container *Container) Stats() (*execdriver.ResourceStats, error) {
1419
-	if !container.IsRunning() {
1420
-		return nil, fmt.Errorf("cannot collect stats on a non running container")
1421
-	}
1422 1419
 	return container.daemon.Stats(container)
1423 1420
 }
... ...
@@ -1099,7 +1099,7 @@ func (daemon *Daemon) Stats(c *Container) (*execdriver.ResourceStats, error) {
1099 1099
 	return daemon.execDriver.Stats(c.ID)
1100 1100
 }
1101 1101
 
1102
-func (daemon *Daemon) SubscribeToContainerStats(name string) (<-chan *execdriver.ResourceStats, error) {
1102
+func (daemon *Daemon) SubscribeToContainerStats(name string) (chan *execdriver.ResourceStats, error) {
1103 1103
 	c := daemon.Get(name)
1104 1104
 	if c == nil {
1105 1105
 		return nil, fmt.Errorf("no such container")
... ...
@@ -1108,6 +1108,15 @@ func (daemon *Daemon) SubscribeToContainerStats(name string) (<-chan *execdriver
1108 1108
 	return ch, nil
1109 1109
 }
1110 1110
 
1111
+func (daemon *Daemon) UnsubscribeToContainerStats(name string, ch chan *execdriver.ResourceStats) error {
1112
+	c := daemon.Get(name)
1113
+	if c == nil {
1114
+		return fmt.Errorf("no such container")
1115
+	}
1116
+	daemon.statsCollector.unsubscribe(c, ch)
1117
+	return nil
1118
+}
1119
+
1111 1120
 // Nuke kills all containers then removes all content
1112 1121
 // from the content root, including images, volumes and
1113 1122
 // container filesystems.
... ...
@@ -49,6 +49,9 @@ func (daemon *Daemon) ContainerRm(job *engine.Job) engine.Status {
49 49
 	}
50 50
 
51 51
 	if container != nil {
52
+		// stop collection of stats for the container regardless
53
+		// if stats are currently getting collected.
54
+		daemon.statsCollector.stopCollection(container)
52 55
 		if container.IsRunning() {
53 56
 			if forceRemove {
54 57
 				if err := container.Kill(); err != nil {
... ...
@@ -16,7 +16,7 @@ import (
16 16
 type Context map[string]string
17 17
 
18 18
 var (
19
-	ErrNotRunning              = errors.New("Process could not be started")
19
+	ErrNotRunning              = errors.New("Container is not running")
20 20
 	ErrWaitTimeoutReached      = errors.New("Wait timeout reached")
21 21
 	ErrDriverAlreadyRegistered = errors.New("A driver already registered this docker init function")
22 22
 	ErrDriverNotFound          = errors.New("The requested docker init has not been found")
... ...
@@ -24,7 +24,7 @@ func NewDriver(name, root, initPath string, sysInfo *sysinfo.SysInfo) (execdrive
24 24
 		// to be backwards compatible
25 25
 		return lxc.NewDriver(root, initPath, sysInfo.AppArmor)
26 26
 	case "native":
27
-		return native.NewDriver(path.Join(root, "execdriver", "native"), initPath, meminfo.MemTotal/1000)
27
+		return native.NewDriver(path.Join(root, "execdriver", "native"), initPath, meminfo.MemTotal)
28 28
 	}
29 29
 	return nil, fmt.Errorf("unknown exec driver %s", name)
30 30
 }
... ...
@@ -526,6 +526,6 @@ func (d *driver) Exec(c *execdriver.Command, processConfig *execdriver.ProcessCo
526 526
 }
527 527
 
528 528
 func (d *driver) Stats(id string) (*execdriver.ResourceStats, error) {
529
-	return nil, fmt.Errorf("container stats are not support with LXC")
529
+	return nil, fmt.Errorf("container stats are not supported with LXC")
530 530
 
531 531
 }
... ...
@@ -284,6 +284,9 @@ func (d *driver) Stats(id string) (*execdriver.ResourceStats, error) {
284 284
 	c := d.activeContainers[id]
285 285
 	state, err := libcontainer.GetState(filepath.Join(d.root, id))
286 286
 	if err != nil {
287
+		if os.IsNotExist(err) {
288
+			return nil, execdriver.ErrNotRunning
289
+		}
287 290
 		return nil, err
288 291
 	}
289 292
 	now := time.Now()
... ...
@@ -292,13 +295,15 @@ func (d *driver) Stats(id string) (*execdriver.ResourceStats, error) {
292 292
 		return nil, err
293 293
 	}
294 294
 	memoryLimit := c.container.Cgroups.Memory
295
+	// if the container does not have any memory limit specified set the
296
+	// limit to the machines memory
295 297
 	if memoryLimit == 0 {
296 298
 		memoryLimit = d.machineMemory
297 299
 	}
298 300
 	return &execdriver.ResourceStats{
301
+		Read:           now,
299 302
 		ContainerStats: stats,
300 303
 		ClockTicks:     system.GetClockTicks(),
301
-		Read:           now,
302 304
 		MemoryLimit:    memoryLimit,
303 305
 	}, nil
304 306
 }
... ...
@@ -1,14 +1,12 @@
1 1
 package daemon
2 2
 
3 3
 import (
4
-	"encoding/json"
5 4
 	"fmt"
6 5
 	"os"
7 6
 	"strings"
8 7
 
9 8
 	"github.com/docker/docker/engine"
10 9
 	"github.com/docker/docker/runconfig"
11
-	"github.com/docker/docker/stats"
12 10
 )
13 11
 
14 12
 func (daemon *Daemon) ContainerStart(job *engine.Job) engine.Status {
... ...
@@ -79,26 +77,3 @@ func (daemon *Daemon) setHostConfig(container *Container, hostConfig *runconfig.
79 79
 
80 80
 	return nil
81 81
 }
82
-
83
-func (daemon *Daemon) ContainerStats(job *engine.Job) engine.Status {
84
-	s, err := daemon.SubscribeToContainerStats(job.Args[0])
85
-	if err != nil {
86
-		return job.Error(err)
87
-	}
88
-	enc := json.NewEncoder(job.Stdout)
89
-	for update := range s {
90
-		ss := stats.ToStats(update.ContainerStats)
91
-		ss.MemoryStats.Limit = uint64(update.MemoryLimit)
92
-		ss.Read = update.Read
93
-		ss.ClockTicks = update.ClockTicks
94
-		ss.CpuStats.SystemUsage = update.SystemUsage
95
-		if err := enc.Encode(ss); err != nil {
96
-			return job.Error(err)
97
-		}
98
-	}
99
-	return engine.StatusOK
100
-}
101
-
102
-func mapToAPIStats() {
103
-
104
-}
105 82
new file mode 100644
... ...
@@ -0,0 +1,29 @@
0
+package daemon
1
+
2
+import (
3
+	"encoding/json"
4
+
5
+	"github.com/docker/docker/api/stats"
6
+	"github.com/docker/docker/engine"
7
+)
8
+
9
+func (daemon *Daemon) ContainerStats(job *engine.Job) engine.Status {
10
+	s, err := daemon.SubscribeToContainerStats(job.Args[0])
11
+	if err != nil {
12
+		return job.Error(err)
13
+	}
14
+	enc := json.NewEncoder(job.Stdout)
15
+	for update := range s {
16
+		ss := stats.ToStats(update.ContainerStats)
17
+		ss.MemoryStats.Limit = uint64(update.MemoryLimit)
18
+		ss.Read = update.Read
19
+		ss.ClockTicks = update.ClockTicks
20
+		ss.CpuStats.SystemUsage = update.SystemUsage
21
+		if err := enc.Encode(ss); err != nil {
22
+			// TODO: handle the specific broken pipe
23
+			daemon.UnsubscribeToContainerStats(job.Args[0], s)
24
+			return job.Error(err)
25
+		}
26
+	}
27
+	return engine.StatusOK
28
+}
... ...
@@ -13,16 +13,20 @@ import (
13 13
 	"github.com/docker/docker/daemon/execdriver"
14 14
 )
15 15
 
16
+// newStatsCollector returns a new statsCollector that collections
17
+// network and cgroup stats for a registered container at the specified
18
+// interval.  The collector allows non-running containers to be added
19
+// and will start processing stats when they are started.
16 20
 func newStatsCollector(interval time.Duration) *statsCollector {
17 21
 	s := &statsCollector{
18 22
 		interval:   interval,
19
-		containers: make(map[string]*statsCollectorData),
23
+		containers: make(map[string]*statsData),
20 24
 	}
21 25
 	s.start()
22 26
 	return s
23 27
 }
24 28
 
25
-type statsCollectorData struct {
29
+type statsData struct {
26 30
 	c         *Container
27 31
 	lastStats *execdriver.ResourceStats
28 32
 	subs      []chan *execdriver.ResourceStats
... ...
@@ -32,43 +36,86 @@ type statsCollectorData struct {
32 32
 type statsCollector struct {
33 33
 	m          sync.Mutex
34 34
 	interval   time.Duration
35
-	containers map[string]*statsCollectorData
35
+	containers map[string]*statsData
36 36
 }
37 37
 
38
-func (s *statsCollector) collect(c *Container) <-chan *execdriver.ResourceStats {
38
+// collect registers the container with the collector and adds it to
39
+// the event loop for collection on the specified interval returning
40
+// a channel for the subscriber to receive on.
41
+func (s *statsCollector) collect(c *Container) chan *execdriver.ResourceStats {
39 42
 	s.m.Lock()
43
+	defer s.m.Unlock()
40 44
 	ch := make(chan *execdriver.ResourceStats, 1024)
41
-	s.containers[c.ID] = &statsCollectorData{
45
+	if _, exists := s.containers[c.ID]; exists {
46
+		s.containers[c.ID].subs = append(s.containers[c.ID].subs, ch)
47
+		return ch
48
+	}
49
+	s.containers[c.ID] = &statsData{
42 50
 		c: c,
43 51
 		subs: []chan *execdriver.ResourceStats{
44 52
 			ch,
45 53
 		},
46 54
 	}
47
-	s.m.Unlock()
48 55
 	return ch
49 56
 }
50 57
 
58
+// stopCollection closes the channels for all subscribers and removes
59
+// the container from metrics collection.
51 60
 func (s *statsCollector) stopCollection(c *Container) {
52 61
 	s.m.Lock()
62
+	defer s.m.Unlock()
63
+	d := s.containers[c.ID]
64
+	if d == nil {
65
+		return
66
+	}
67
+	for _, sub := range d.subs {
68
+		close(sub)
69
+	}
53 70
 	delete(s.containers, c.ID)
71
+}
72
+
73
+// unsubscribe removes a specific subscriber from receiving updates for a
74
+// container's stats.
75
+func (s *statsCollector) unsubscribe(c *Container, ch chan *execdriver.ResourceStats) {
76
+	s.m.Lock()
77
+	cd := s.containers[c.ID]
78
+	for i, sub := range cd.subs {
79
+		if ch == sub {
80
+			cd.subs = append(cd.subs[:i], cd.subs[i+1:]...)
81
+			close(ch)
82
+		}
83
+	}
84
+	// if there are no more subscribers then remove the entire container
85
+	// from collection.
86
+	if len(cd.subs) == 0 {
87
+		delete(s.containers, c.ID)
88
+	}
54 89
 	s.m.Unlock()
55 90
 }
56 91
 
57 92
 func (s *statsCollector) start() {
58 93
 	go func() {
59 94
 		for _ = range time.Tick(s.interval) {
60
-			log.Debugf("starting collection of container stats")
61 95
 			s.m.Lock()
62 96
 			for id, d := range s.containers {
63
-				systemUsage, err := getSystemCpuUsage()
97
+				systemUsage, err := s.getSystemCpuUsage()
64 98
 				if err != nil {
65 99
 					log.Errorf("collecting system cpu usage for %s: %v", id, err)
66 100
 					continue
67 101
 				}
68 102
 				stats, err := d.c.Stats()
69 103
 				if err != nil {
70
-					// TODO: @crosbymichael evict container depending on error
104
+					if err == execdriver.ErrNotRunning {
105
+						continue
106
+					}
107
+					// if the error is not because the container is currently running then
108
+					// evict the container from the collector and close the channel for
109
+					// any subscribers currently waiting on changes.
71 110
 					log.Errorf("collecting stats for %s: %v", id, err)
111
+					for _, sub := range s.containers[id].subs {
112
+						close(sub)
113
+					}
114
+					delete(s.containers, id)
72 115
 					continue
73 116
 				}
74 117
 				stats.SystemUsage = systemUsage
... ...
@@ -81,14 +128,14 @@ func (s *statsCollector) start() {
81 81
 	}()
82 82
 }
83 83
 
84
-// returns value in nanoseconds
85
-func getSystemCpuUsage() (uint64, error) {
84
+// getSystemdCpuUSage returns the host system's cpu usage
85
+// in nanoseconds.
86
+func (s *statsCollector) getSystemCpuUsage() (uint64, error) {
86 87
 	f, err := os.Open("/proc/stat")
87 88
 	if err != nil {
88 89
 		return 0, err
89 90
 	}
90 91
 	defer f.Close()
91
-
92 92
 	sc := bufio.NewScanner(f)
93 93
 	for sc.Scan() {
94 94
 		parts := strings.Fields(sc.Text())
... ...
@@ -97,7 +144,6 @@ func getSystemCpuUsage() (uint64, error) {
97 97
 			if len(parts) < 8 {
98 98
 				return 0, fmt.Errorf("invalid number of cpu fields")
99 99
 			}
100
-
101 100
 			var total uint64
102 101
 			for _, i := range parts[1:8] {
103 102
 				v, err := strconv.ParseUint(i, 10, 64)
... ...
@@ -98,6 +98,7 @@ func init() {
98 98
 			{"save", "Save an image to a tar archive"},
99 99
 			{"search", "Search for an image on the Docker Hub"},
100 100
 			{"start", "Start a stopped container"},
101
+			{"stats", "Receive container stats"},
101 102
 			{"stop", "Stop a running container"},
102 103
 			{"tag", "Tag an image into a repository"},
103 104
 			{"top", "Lookup the running processes of a container"},
104 105
deleted file mode 100644
... ...
@@ -1,156 +0,0 @@
1
-package stats
2
-
3
-import (
4
-	"time"
5
-
6
-	"github.com/docker/libcontainer"
7
-	"github.com/docker/libcontainer/cgroups"
8
-)
9
-
10
-type ThrottlingData struct {
11
-	// Number of periods with throttling active
12
-	Periods uint64 `json:"periods,omitempty"`
13
-	// Number of periods when the container hit its throttling limit.
14
-	ThrottledPeriods uint64 `json:"throttled_periods,omitempty"`
15
-	// Aggregate time the container was throttled for in nanoseconds.
16
-	ThrottledTime uint64 `json:"throttled_time,omitempty"`
17
-}
18
-
19
-// All CPU stats are aggregate since container inception.
20
-type CpuUsage struct {
21
-	// Total CPU time consumed.
22
-	// Units: nanoseconds.
23
-	TotalUsage uint64 `json:"total_usage,omitempty"`
24
-	// Total CPU time consumed per core.
25
-	// Units: nanoseconds.
26
-	PercpuUsage []uint64 `json:"percpu_usage,omitempty"`
27
-	// Time spent by tasks of the cgroup in kernel mode.
28
-	// Units: nanoseconds.
29
-	UsageInKernelmode uint64 `json:"usage_in_kernelmode"`
30
-	// Time spent by tasks of the cgroup in user mode.
31
-	// Units: nanoseconds.
32
-	UsageInUsermode uint64 `json:"usage_in_usermode"`
33
-}
34
-
35
-type CpuStats struct {
36
-	CpuUsage       CpuUsage       `json:"cpu_usage,omitempty"`
37
-	SystemUsage    uint64         `json:"system_cpu_usage"`
38
-	ThrottlingData ThrottlingData `json:"throttling_data,omitempty"`
39
-}
40
-
41
-type MemoryStats struct {
42
-	// current res_counter usage for memory
43
-	Usage uint64 `json:"usage,omitempty"`
44
-	// maximum usage ever recorded.
45
-	MaxUsage uint64 `json:"max_usage,omitempty"`
46
-	// TODO(vishh): Export these as stronger types.
47
-	// all the stats exported via memory.stat.
48
-	Stats map[string]uint64 `json:"stats,omitempty"`
49
-	// number of times memory usage hits limits.
50
-	Failcnt uint64 `json:"failcnt"`
51
-	Limit   uint64 `json:"limit"`
52
-}
53
-
54
-type BlkioStatEntry struct {
55
-	Major uint64 `json:"major,omitempty"`
56
-	Minor uint64 `json:"minor,omitempty"`
57
-	Op    string `json:"op,omitempty"`
58
-	Value uint64 `json:"value,omitempty"`
59
-}
60
-
61
-type BlkioStats struct {
62
-	// number of bytes tranferred to and from the block device
63
-	IoServiceBytesRecursive []BlkioStatEntry `json:"io_service_bytes_recursive,omitempty"`
64
-	IoServicedRecursive     []BlkioStatEntry `json:"io_serviced_recursive,omitempty"`
65
-	IoQueuedRecursive       []BlkioStatEntry `json:"io_queue_recursive,omitempty"`
66
-	IoServiceTimeRecursive  []BlkioStatEntry `json:"io_service_time_recursive,omitempty"`
67
-	IoWaitTimeRecursive     []BlkioStatEntry `json:"io_wait_time_recursive,omitempty"`
68
-	IoMergedRecursive       []BlkioStatEntry `json:"io_merged_recursive,omitempty"`
69
-	IoTimeRecursive         []BlkioStatEntry `json:"io_time_recursive,omitempty"`
70
-	SectorsRecursive        []BlkioStatEntry `json:"sectors_recursive,omitempty"`
71
-}
72
-
73
-type Network struct {
74
-	RxBytes   uint64 `json:"rx_bytes"`
75
-	RxPackets uint64 `json:"rx_packets"`
76
-	RxErrors  uint64 `json:"rx_errors"`
77
-	RxDropped uint64 `json:"rx_dropped"`
78
-	TxBytes   uint64 `json:"tx_bytes"`
79
-	TxPackets uint64 `json:"tx_packets"`
80
-	TxErrors  uint64 `json:"tx_errors"`
81
-	TxDropped uint64 `json:"tx_dropped"`
82
-}
83
-
84
-type Stats struct {
85
-	Read        time.Time   `json:"read"`
86
-	ClockTicks  int         `json:"clock_ticks"`
87
-	Interval    int         `json:"interval"` // in ms
88
-	Network     Network     `json:"network,omitempty"`
89
-	CpuStats    CpuStats    `json:"cpu_stats,omitempty"`
90
-	MemoryStats MemoryStats `json:"memory_stats,omitempty"`
91
-	BlkioStats  BlkioStats  `json:"blkio_stats,omitempty"`
92
-}
93
-
94
-func ToStats(ls *libcontainer.ContainerStats) *Stats {
95
-	s := &Stats{}
96
-	if ls.NetworkStats != nil {
97
-		s.Network = Network{
98
-			RxBytes:   ls.NetworkStats.RxBytes,
99
-			RxPackets: ls.NetworkStats.RxPackets,
100
-			RxErrors:  ls.NetworkStats.RxErrors,
101
-			RxDropped: ls.NetworkStats.RxDropped,
102
-			TxBytes:   ls.NetworkStats.TxBytes,
103
-			TxPackets: ls.NetworkStats.TxPackets,
104
-			TxErrors:  ls.NetworkStats.TxErrors,
105
-			TxDropped: ls.NetworkStats.TxDropped,
106
-		}
107
-	}
108
-	cs := ls.CgroupStats
109
-	if cs != nil {
110
-		s.BlkioStats = BlkioStats{
111
-			IoServiceBytesRecursive: copyBlkioEntry(cs.BlkioStats.IoServiceBytesRecursive),
112
-			IoServicedRecursive:     copyBlkioEntry(cs.BlkioStats.IoServicedRecursive),
113
-			IoQueuedRecursive:       copyBlkioEntry(cs.BlkioStats.IoQueuedRecursive),
114
-			IoServiceTimeRecursive:  copyBlkioEntry(cs.BlkioStats.IoServiceTimeRecursive),
115
-			IoWaitTimeRecursive:     copyBlkioEntry(cs.BlkioStats.IoWaitTimeRecursive),
116
-			IoMergedRecursive:       copyBlkioEntry(cs.BlkioStats.IoMergedRecursive),
117
-			IoTimeRecursive:         copyBlkioEntry(cs.BlkioStats.IoTimeRecursive),
118
-			SectorsRecursive:        copyBlkioEntry(cs.BlkioStats.SectorsRecursive),
119
-		}
120
-		cpu := cs.CpuStats
121
-		s.CpuStats = CpuStats{
122
-			CpuUsage: CpuUsage{
123
-				TotalUsage:        cpu.CpuUsage.TotalUsage,
124
-				PercpuUsage:       cpu.CpuUsage.PercpuUsage,
125
-				UsageInKernelmode: cpu.CpuUsage.UsageInKernelmode,
126
-				UsageInUsermode:   cpu.CpuUsage.UsageInUsermode,
127
-			},
128
-			ThrottlingData: ThrottlingData{
129
-				Periods:          cpu.ThrottlingData.Periods,
130
-				ThrottledPeriods: cpu.ThrottlingData.ThrottledPeriods,
131
-				ThrottledTime:    cpu.ThrottlingData.ThrottledTime,
132
-			},
133
-		}
134
-		mem := cs.MemoryStats
135
-		s.MemoryStats = MemoryStats{
136
-			Usage:    mem.Usage,
137
-			MaxUsage: mem.MaxUsage,
138
-			Stats:    mem.Stats,
139
-			Failcnt:  mem.Failcnt,
140
-		}
141
-	}
142
-	return s
143
-}
144
-
145
-func copyBlkioEntry(entries []cgroups.BlkioStatEntry) []BlkioStatEntry {
146
-	out := make([]BlkioStatEntry, len(entries))
147
-	for i, re := range entries {
148
-		out[i] = BlkioStatEntry{
149
-			Major: re.Major,
150
-			Minor: re.Minor,
151
-			Op:    re.Op,
152
-			Value: re.Value,
153
-		}
154
-	}
155
-	return out
156
-}