Browse code

Add support for metrics plugins

Allows for a plugin type that can be used to scrape metrics.
This is useful because metrics are not neccessarily at a standard
location... `--metrics-addr` must be set, and must currently be a TCP
socket.
Even if metrics are done via a unix socket, there's no guarentee where
the socket may be located on the system, making bind-mounting such a
socket into a container difficult (and racey, failure-prone on daemon
restart).

Metrics plugins side-step this issue by always listening on a unix
socket and then bind-mounting that into a known path in the plugin
container.

Note there has been similar work in the past (and ultimately punted at
the time) for consistent access to the Docker API from within a
container.

Why not add metrics to the Docker API and just provide a plugin with
access to the Docker API? Certainly this can be useful, but gives a lot
of control/access to a plugin that may only need the metrics. We can
look at supporting API plugins separately for this reason.

Signed-off-by: Brian Goff <cpuguy83@gmail.com>

Brian Goff authored on 2017/04/14 10:56:50
Showing 8 changed files
... ...
@@ -106,6 +106,7 @@ type Daemon struct {
106 106
 	defaultIsolation          containertypes.Isolation // Default isolation mode on Windows
107 107
 	clusterProvider           cluster.Provider
108 108
 	cluster                   Cluster
109
+	metricsPluginListener     net.Listener
109 110
 
110 111
 	machineMemory uint64
111 112
 
... ...
@@ -593,6 +594,12 @@ func NewDaemon(config *config.Config, registryService registry.Service, containe
593 593
 	d.PluginStore = pluginStore
594 594
 	logger.RegisterPluginGetter(d.PluginStore)
595 595
 
596
+	metricsSockPath, err := d.listenMetricsSock()
597
+	if err != nil {
598
+		return nil, err
599
+	}
600
+	registerMetricsPluginCallback(d.PluginStore, metricsSockPath)
601
+
596 602
 	// Plugin system initialization should happen before restore. Do not change order.
597 603
 	d.pluginManager, err = plugin.NewManager(plugin.ManagerConfig{
598 604
 		Root:               filepath.Join(config.Root, "plugins"),
... ...
@@ -821,6 +828,8 @@ func (daemon *Daemon) Shutdown() error {
821 821
 	if daemon.configStore.LiveRestoreEnabled && daemon.containers != nil {
822 822
 		// check if there are any running containers, if none we should do some cleanup
823 823
 		if ls, err := daemon.Containers(&types.ContainerListOptions{}); len(ls) != 0 || err != nil {
824
+			// metrics plugins still need some cleanup
825
+			daemon.cleanupMetricsPlugins()
824 826
 			return nil
825 827
 		}
826 828
 	}
... ...
@@ -861,6 +870,8 @@ func (daemon *Daemon) Shutdown() error {
861 861
 		daemon.DaemonLeavesCluster()
862 862
 	}
863 863
 
864
+	daemon.cleanupMetricsPlugins()
865
+
864 866
 	// Shutdown plugins after containers and layerstore. Don't change the order.
865 867
 	daemon.pluginShutdown()
866 868
 
... ...
@@ -1,12 +1,19 @@
1 1
 package daemon
2 2
 
3 3
 import (
4
+	"path/filepath"
4 5
 	"sync"
5 6
 
7
+	"github.com/Sirupsen/logrus"
8
+	"github.com/docker/docker/pkg/mount"
9
+	"github.com/docker/docker/pkg/plugingetter"
6 10
 	"github.com/docker/go-metrics"
11
+	"github.com/pkg/errors"
7 12
 	"github.com/prometheus/client_golang/prometheus"
8 13
 )
9 14
 
15
+const metricsPluginType = "MetricsCollector"
16
+
10 17
 var (
11 18
 	containerActions          metrics.LabeledTimer
12 19
 	containerStates           metrics.LabeledGauge
... ...
@@ -106,3 +113,62 @@ func (ctr *stateCounter) Collect(ch chan<- prometheus.Metric) {
106 106
 	ch <- prometheus.MustNewConstMetric(ctr.desc, prometheus.GaugeValue, float64(paused), "paused")
107 107
 	ch <- prometheus.MustNewConstMetric(ctr.desc, prometheus.GaugeValue, float64(stopped), "stopped")
108 108
 }
109
+
110
+func (d *Daemon) cleanupMetricsPlugins() {
111
+	ls := d.PluginStore.GetAllManagedPluginsByCap(metricsPluginType)
112
+	var wg sync.WaitGroup
113
+	wg.Add(len(ls))
114
+
115
+	for _, p := range ls {
116
+		go func() {
117
+			defer wg.Done()
118
+			pluginStopMetricsCollection(p)
119
+		}()
120
+	}
121
+	wg.Wait()
122
+
123
+	if d.metricsPluginListener != nil {
124
+		d.metricsPluginListener.Close()
125
+	}
126
+}
127
+
128
+type metricsPlugin struct {
129
+	plugingetter.CompatPlugin
130
+}
131
+
132
+func (p metricsPlugin) sock() string {
133
+	return "metrics.sock"
134
+}
135
+
136
+func (p metricsPlugin) sockBase() string {
137
+	return filepath.Join(p.BasePath(), "run", "docker")
138
+}
139
+
140
+func pluginStartMetricsCollection(p plugingetter.CompatPlugin) error {
141
+	type metricsPluginResponse struct {
142
+		Err string
143
+	}
144
+	var res metricsPluginResponse
145
+	if err := p.Client().Call(metricsPluginType+".StartMetrics", nil, &res); err != nil {
146
+		return errors.Wrap(err, "could not start metrics plugin")
147
+	}
148
+	if res.Err != "" {
149
+		return errors.New(res.Err)
150
+	}
151
+	return nil
152
+}
153
+
154
+func pluginStopMetricsCollection(p plugingetter.CompatPlugin) {
155
+	if err := p.Client().Call(metricsPluginType+".StopMetrics", nil, nil); err != nil {
156
+		logrus.WithError(err).WithField("name", p.Name()).Error("error stopping metrics collector")
157
+	}
158
+
159
+	mp := metricsPlugin{p}
160
+	sockPath := filepath.Join(mp.sockBase(), mp.sock())
161
+	if err := mount.Unmount(sockPath); err != nil {
162
+		if mounted, _ := mount.Mounted(sockPath); mounted {
163
+			logrus.WithError(err).WithField("name", p.Name()).WithField("socket", sockPath).Error("error unmounting metrics socket for plugin")
164
+		}
165
+	}
166
+	return
167
+}
109 168
new file mode 100644
... ...
@@ -0,0 +1,86 @@
0
+// +build !windows
1
+
2
+package daemon
3
+
4
+import (
5
+	"net"
6
+	"net/http"
7
+	"os"
8
+	"path/filepath"
9
+	"syscall"
10
+
11
+	"github.com/Sirupsen/logrus"
12
+	"github.com/docker/docker/pkg/mount"
13
+	"github.com/docker/docker/pkg/plugingetter"
14
+	"github.com/docker/docker/pkg/plugins"
15
+	metrics "github.com/docker/go-metrics"
16
+	"github.com/pkg/errors"
17
+)
18
+
19
+func (daemon *Daemon) listenMetricsSock() (string, error) {
20
+	path := filepath.Join(daemon.configStore.ExecRoot, "metrics.sock")
21
+	syscall.Unlink(path)
22
+	l, err := net.Listen("unix", path)
23
+	if err != nil {
24
+		return "", errors.Wrap(err, "error setting up metrics plugin listener")
25
+	}
26
+
27
+	mux := http.NewServeMux()
28
+	mux.Handle("/metrics", metrics.Handler())
29
+	go func() {
30
+		http.Serve(l, mux)
31
+	}()
32
+	daemon.metricsPluginListener = l
33
+	return path, nil
34
+}
35
+
36
+func registerMetricsPluginCallback(getter plugingetter.PluginGetter, sockPath string) {
37
+	getter.Handle(metricsPluginType, func(name string, client *plugins.Client) {
38
+		// Use lookup since nothing in the system can really reference it, no need
39
+		// to protect against removal
40
+		p, err := getter.Get(name, metricsPluginType, plugingetter.Lookup)
41
+		if err != nil {
42
+			return
43
+		}
44
+
45
+		mp := metricsPlugin{p}
46
+		sockBase := mp.sockBase()
47
+		if err := os.MkdirAll(sockBase, 0755); err != nil {
48
+			logrus.WithError(err).WithField("name", name).WithField("path", sockBase).Error("error creating metrics plugin base path")
49
+			return
50
+		}
51
+
52
+		defer func() {
53
+			if err != nil {
54
+				os.RemoveAll(sockBase)
55
+			}
56
+		}()
57
+
58
+		pluginSockPath := filepath.Join(sockBase, mp.sock())
59
+		_, err = os.Stat(pluginSockPath)
60
+		if err == nil {
61
+			mount.Unmount(pluginSockPath)
62
+		} else {
63
+			logrus.WithField("path", pluginSockPath).Debugf("creating plugin socket")
64
+			f, err := os.OpenFile(pluginSockPath, os.O_CREATE, 0600)
65
+			if err != nil {
66
+				return
67
+			}
68
+			f.Close()
69
+		}
70
+
71
+		if err := mount.Mount(sockPath, pluginSockPath, "none", "bind,ro"); err != nil {
72
+			logrus.WithError(err).WithField("name", name).Error("could not mount metrics socket to plugin")
73
+			return
74
+		}
75
+
76
+		if err := pluginStartMetricsCollection(p); err != nil {
77
+			if err := mount.Unmount(pluginSockPath); err != nil {
78
+				if mounted, _ := mount.Mounted(pluginSockPath); mounted {
79
+					logrus.WithError(err).WithField("sock_path", pluginSockPath).Error("error unmounting metrics socket from plugin during cleanup")
80
+				}
81
+			}
82
+			logrus.WithError(err).WithField("name", name).Error("error while initializing metrics plugin")
83
+		}
84
+	})
85
+}
0 86
new file mode 100644
... ...
@@ -0,0 +1,12 @@
0
+// +build windows
1
+
2
+package daemon
3
+
4
+import "github.com/docker/docker/pkg/plugingetter"
5
+
6
+func registerMetricsPluginCallback(getter plugingetter.PluginGetter, sockPath string) {
7
+}
8
+
9
+func (daemon *Daemon) listenMetricsSock() (string, error) {
10
+	return "", nil
11
+}
... ...
@@ -61,6 +61,8 @@ Config provides the base accessible fields for working with V0 plugin format
61 61
 
62 62
         - **docker.logdriver/1.0**
63 63
 
64
+        - **docker.metricscollector/1.0**
65
+
64 66
     - **`socket`** *string*
65 67
 
66 68
       socket is the name of the socket the engine should use to communicate with the plugins.
67 69
new file mode 100644
... ...
@@ -0,0 +1,85 @@
0
+---
1
+title: "Docker metrics collector plugins"
2
+description: "Metrics plugins."
3
+keywords: "Examples, Usage, plugins, docker, documentation, user guide, metrics"
4
+---
5
+
6
+<!-- This file is maintained within the docker/docker Github
7
+     repository at https://github.com/docker/docker/. Make all
8
+     pull requests against that repo. If you see this file in
9
+     another repository, consider it read-only there, as it will
10
+     periodically be overwritten by the definitive file. Pull
11
+     requests which include edits to this file in other repositories
12
+     will be rejected.
13
+-->
14
+
15
+# Metrics Collector Plugins
16
+
17
+Docker exposes internal metrics based on the prometheus format. Metrics plugins
18
+enable accessing these metrics in a consistent way by providing a Unix
19
+socket at a predefined path where the plugin can scrape the metrics.
20
+
21
+> **Note**: that while the plugin interface for metrics is non-experimental, the naming
22
+of the metrics and metric labels is still considered experimental and may change
23
+in a future version.
24
+
25
+## Creating a metrics plugin
26
+
27
+You must currently set `PropagatedMount` in the plugin `config.json` to
28
+`/run/docker`. This allows the plugin to receive updated mounts
29
+(the bind-mounted socket) from Docker after the plugin is already configured.
30
+
31
+## MetricsCollector protocol
32
+
33
+Metrics plugins must register as implementing the`MetricsCollector` interface
34
+in `config.json`.
35
+
36
+On Unix platforms, the socket is located at `/run/docker/metrics.sock` in the
37
+plugin's rootfs.
38
+
39
+`MetricsCollector` must implement two endpoints:
40
+
41
+### `MetricsCollector.StartMetrics`
42
+
43
+Signals to the plugin that the metrics socket is now available for scraping
44
+
45
+**Request**
46
+```json
47
+{}
48
+```
49
+
50
+The request has no playload.
51
+
52
+**Response**
53
+```json
54
+{
55
+	"Err": ""
56
+}
57
+```
58
+
59
+If an error occurred during this request, add an error message to the `Err` field
60
+in the response. If no error then you can either send an empty response (`{}`)
61
+or an empty value for the `Err` field. Errors will only be logged.
62
+
63
+### `MetricsCollector.StopMetrics`
64
+
65
+Signals to the plugin that the metrics socket is no longer available.
66
+This may happen when the daemon is shutting down.
67
+
68
+**Request**
69
+```json
70
+{}
71
+```
72
+
73
+The request has no playload.
74
+
75
+**Response**
76
+```json
77
+{
78
+	"Err": ""
79
+}
80
+```
81
+
82
+If an error occurred during this request, add an error message to the `Err` field
83
+in the response. If no error then you can either send an empty response (`{}`)
84
+or an empty value for the `Err` field. Errors will only be logged.
... ...
@@ -55,7 +55,7 @@ than one filter, then pass multiple flags (e.g., `--filter "foo=bar" --filter "b
55 55
 The currently supported filters are:
56 56
 
57 57
 * enabled (boolean - true or false, 0 or 1)
58
-* capability (string - currently `volumedriver`, `networkdriver`, `ipamdriver`, or `authz`)
58
+* capability (string - currently `volumedriver`, `networkdriver`, `ipamdriver`, `logdriver`, `metricscollector`, or `authz`)
59 59
 
60 60
 #### enabled
61 61
 
... ...
@@ -65,7 +65,7 @@ The `enabled` filter matches on plugins enabled or disabled.
65 65
 
66 66
 The `capability` filter matches on plugin capabilities. One plugin
67 67
 might have multiple capabilities. Currently `volumedriver`, `networkdriver`,
68
-`ipamdriver`, and `authz` are supported capabilities.
68
+`ipamdriver`, `logdriver`, `metricscollector`, and `authz` are supported capabilities.
69 69
 
70 70
 ```bash
71 71
 $ docker plugin install --disable tiborvass/no-remove
... ...
@@ -3,12 +3,14 @@ package main
3 3
 import (
4 4
 	"fmt"
5 5
 	"io/ioutil"
6
+	"net/http"
6 7
 	"os"
7 8
 	"path/filepath"
8 9
 	"strings"
9 10
 
10 11
 	"github.com/docker/docker/integration-cli/checker"
11 12
 	"github.com/docker/docker/integration-cli/cli"
13
+	"github.com/docker/docker/integration-cli/daemon"
12 14
 	icmd "github.com/docker/docker/pkg/testutil/cmd"
13 15
 	"github.com/go-check/check"
14 16
 )
... ...
@@ -455,3 +457,24 @@ func (s *DockerSuite) TestPluginUpgrade(c *check.C) {
455 455
 	dockerCmd(c, "volume", "inspect", "bananas")
456 456
 	dockerCmd(c, "run", "--rm", "-v", "bananas:/apple", "busybox", "sh", "-c", "ls -lh /apple/core")
457 457
 }
458
+
459
+func (s *DockerSuite) TestPluginMetricsCollector(c *check.C) {
460
+	testRequires(c, DaemonIsLinux, Network, SameHostDaemon, IsAmd64)
461
+	d := daemon.New(c, dockerBinary, dockerdBinary, daemon.Config{})
462
+	d.Start(c)
463
+	defer d.Stop(c)
464
+
465
+	name := "cpuguy83/docker-metrics-plugin-test:latest"
466
+	r := cli.Docker(cli.Args("plugin", "install", "--grant-all-permissions", name), cli.Daemon(d))
467
+	c.Assert(r.Error, checker.IsNil, check.Commentf(r.Combined()))
468
+
469
+	// plugin lisens on localhost:19393 and proxies the metrics
470
+	resp, err := http.Get("http://localhost:19393/metrics")
471
+	c.Assert(err, checker.IsNil)
472
+	defer resp.Body.Close()
473
+
474
+	b, err := ioutil.ReadAll(resp.Body)
475
+	c.Assert(err, checker.IsNil)
476
+	// check that a known metric is there... don't epect this metric to change over time.. probably safe
477
+	c.Assert(string(b), checker.Contains, "container_actions")
478
+}