Browse code

Merge pull request #33830 from cpuguy83/33799_fix_plugin_mounts_issue

Make plugin removes more resilient to failure

Tibor Vass authored on 2017/07/04 10:37:31
Showing 4 changed files
... ...
@@ -5,6 +5,7 @@ package main
5 5
 import (
6 6
 	"bufio"
7 7
 	"bytes"
8
+	"context"
8 9
 	"encoding/json"
9 10
 	"fmt"
10 11
 	"io"
... ...
@@ -25,6 +26,9 @@ import (
25 25
 	"crypto/x509"
26 26
 
27 27
 	"github.com/cloudflare/cfssl/helpers"
28
+	"github.com/docker/docker/api"
29
+	"github.com/docker/docker/api/types"
30
+	"github.com/docker/docker/client"
28 31
 	"github.com/docker/docker/integration-cli/checker"
29 32
 	"github.com/docker/docker/integration-cli/cli"
30 33
 	"github.com/docker/docker/integration-cli/daemon"
... ...
@@ -2980,3 +2984,45 @@ func (s *DockerDaemonSuite) TestShmSizeReload(c *check.C) {
2980 2980
 	c.Assert(err, check.IsNil, check.Commentf("Output: %s", out))
2981 2981
 	c.Assert(strings.TrimSpace(out), check.Equals, fmt.Sprintf("%v", size))
2982 2982
 }
2983
+
2984
+// TestFailedPluginRemove makes sure that a failed plugin remove does not block
2985
+// the daemon from starting
2986
+func (s *DockerDaemonSuite) TestFailedPluginRemove(c *check.C) {
2987
+	testRequires(c, DaemonIsLinux, IsAmd64, SameHostDaemon)
2988
+	d := daemon.New(c, dockerBinary, dockerdBinary, daemon.Config{})
2989
+	d.Start(c)
2990
+	cli, err := client.NewClient(d.Sock(), api.DefaultVersion, nil, nil)
2991
+	c.Assert(err, checker.IsNil)
2992
+
2993
+	ctx, cancel := context.WithTimeout(context.Background(), 300*time.Second)
2994
+	defer cancel()
2995
+
2996
+	name := "test-plugin-rm-fail"
2997
+	out, err := cli.PluginInstall(ctx, name, types.PluginInstallOptions{
2998
+		Disabled:             true,
2999
+		AcceptAllPermissions: true,
3000
+		RemoteRef:            "cpuguy83/docker-logdriver-test",
3001
+	})
3002
+	c.Assert(err, checker.IsNil)
3003
+	defer out.Close()
3004
+	io.Copy(ioutil.Discard, out)
3005
+
3006
+	ctx, cancel = context.WithTimeout(context.Background(), 30*time.Second)
3007
+	defer cancel()
3008
+	p, _, err := cli.PluginInspectWithRaw(ctx, name)
3009
+	c.Assert(err, checker.IsNil)
3010
+
3011
+	// simulate a bad/partial removal by removing the plugin config.
3012
+	configPath := filepath.Join(d.Root, "plugins", p.ID, "config.json")
3013
+	c.Assert(os.Remove(configPath), checker.IsNil)
3014
+
3015
+	d.Restart(c)
3016
+	ctx, cancel = context.WithTimeout(context.Background(), 30*time.Second)
3017
+	defer cancel()
3018
+	_, err = cli.Ping(ctx)
3019
+	c.Assert(err, checker.IsNil)
3020
+
3021
+	_, _, err = cli.PluginInspectWithRaw(ctx, name)
3022
+	// plugin should be gone since the config.json is gone
3023
+	c.Assert(err, checker.NotNil)
3024
+}
... ...
@@ -13,7 +13,6 @@ import (
13 13
 	"os"
14 14
 	"path"
15 15
 	"path/filepath"
16
-	"sort"
17 16
 	"strings"
18 17
 
19 18
 	"github.com/Sirupsen/logrus"
... ...
@@ -32,6 +31,7 @@ import (
32 32
 	"github.com/docker/docker/pkg/mount"
33 33
 	"github.com/docker/docker/pkg/pools"
34 34
 	"github.com/docker/docker/pkg/progress"
35
+	"github.com/docker/docker/pkg/system"
35 36
 	"github.com/docker/docker/plugin/v2"
36 37
 	refstore "github.com/docker/docker/reference"
37 38
 	"github.com/opencontainers/go-digest"
... ...
@@ -624,14 +624,20 @@ func (pm *Manager) Remove(name string, config *types.PluginRmConfig) error {
624 624
 	}()
625 625
 
626 626
 	id := p.GetID()
627
-	pm.config.Store.Remove(p)
628 627
 	pluginDir := filepath.Join(pm.config.Root, id)
629
-	if err := recursiveUnmount(pluginDir); err != nil {
630
-		logrus.WithField("dir", pluginDir).WithField("id", id).Warn(err)
628
+
629
+	if err := mount.RecursiveUnmount(pluginDir); err != nil {
630
+		return errors.Wrap(err, "error unmounting plugin data")
631 631
 	}
632
-	if err := os.RemoveAll(pluginDir); err != nil {
633
-		logrus.Warnf("unable to remove %q from plugin remove: %v", pluginDir, err)
632
+
633
+	if err := os.Rename(pluginDir, pluginDir+"-removing"); err != nil {
634
+		return errors.Wrap(err, "error performing atomic remove of plugin dir")
635
+	}
636
+
637
+	if err := system.EnsureRemoveAll(pluginDir); err != nil {
638
+		return errors.Wrap(err, "error removing plugin dir")
634 639
 	}
640
+	pm.config.Store.Remove(p)
635 641
 	pm.config.LogPluginEvent(id, name, "remove")
636 642
 	return nil
637 643
 }
... ...
@@ -652,27 +658,6 @@ func getMounts(root string) ([]string, error) {
652 652
 	return mounts, nil
653 653
 }
654 654
 
655
-func recursiveUnmount(root string) error {
656
-	mounts, err := getMounts(root)
657
-	if err != nil {
658
-		return err
659
-	}
660
-
661
-	// sort in reverse-lexicographic order so the root mount will always be last
662
-	sort.Sort(sort.Reverse(sort.StringSlice(mounts)))
663
-
664
-	for i, m := range mounts {
665
-		if err := mount.Unmount(m); err != nil {
666
-			if i == len(mounts)-1 {
667
-				return errors.Wrapf(err, "error performing recursive unmount on %s", root)
668
-			}
669
-			logrus.WithError(err).WithField("mountpoint", m).Warn("could not unmount")
670
-		}
671
-	}
672
-
673
-	return nil
674
-}
675
-
676 655
 // Set sets plugin args
677 656
 func (pm *Manager) Set(name string, args []string) error {
678 657
 	p, err := pm.config.Store.GetV2Plugin(name)
... ...
@@ -163,6 +163,19 @@ func (pm *Manager) StateChanged(id string, e libcontainerd.StateInfo) error {
163 163
 	return nil
164 164
 }
165 165
 
166
+func handleLoadError(err error, id string) {
167
+	if err == nil {
168
+		return
169
+	}
170
+	logger := logrus.WithError(err).WithField("id", id)
171
+	if os.IsNotExist(errors.Cause(err)) {
172
+		// Likely some error while removing on an older version of docker
173
+		logger.Warn("missing plugin config, skipping: this may be caused due to a failed remove and requires manual cleanup.")
174
+		return
175
+	}
176
+	logger.Error("error loading plugin, skipping")
177
+}
178
+
166 179
 func (pm *Manager) reload() error { // todo: restore
167 180
 	dir, err := ioutil.ReadDir(pm.config.Root)
168 181
 	if err != nil {
... ...
@@ -173,9 +186,17 @@ func (pm *Manager) reload() error { // todo: restore
173 173
 		if validFullID.MatchString(v.Name()) {
174 174
 			p, err := pm.loadPlugin(v.Name())
175 175
 			if err != nil {
176
-				return err
176
+				handleLoadError(err, v.Name())
177
+				continue
177 178
 			}
178 179
 			plugins[p.GetID()] = p
180
+		} else {
181
+			if validFullID.MatchString(strings.TrimSuffix(v.Name(), "-removing")) {
182
+				// There was likely some error while removing this plugin, let's try to remove again here
183
+				if err := system.EnsureRemoveAll(v.Name()); err != nil {
184
+					logrus.WithError(err).WithField("id", v.Name()).Warn("error while attempting to clean up previously removed plugin")
185
+				}
186
+			}
179 187
 		}
180 188
 	}
181 189
 
... ...
@@ -204,7 +204,7 @@ func (pm *Manager) upgradePlugin(p *v2.Plugin, configDigest digest.Digest, blobs
204 204
 	// Make sure nothing is mounted
205 205
 	// This could happen if the plugin was disabled with `-f` with active mounts.
206 206
 	// If there is anything in `orig` is still mounted, this should error out.
207
-	if err := recursiveUnmount(orig); err != nil {
207
+	if err := mount.RecursiveUnmount(orig); err != nil {
208 208
 		return err
209 209
 	}
210 210