Make plugin removes more resilient to failure
Tibor Vass authored on 2017/07/04 10:37:31... | ... |
@@ -5,6 +5,7 @@ package main |
5 | 5 |
import ( |
6 | 6 |
"bufio" |
7 | 7 |
"bytes" |
8 |
+ "context" |
|
8 | 9 |
"encoding/json" |
9 | 10 |
"fmt" |
10 | 11 |
"io" |
... | ... |
@@ -25,6 +26,9 @@ import ( |
25 | 25 |
"crypto/x509" |
26 | 26 |
|
27 | 27 |
"github.com/cloudflare/cfssl/helpers" |
28 |
+ "github.com/docker/docker/api" |
|
29 |
+ "github.com/docker/docker/api/types" |
|
30 |
+ "github.com/docker/docker/client" |
|
28 | 31 |
"github.com/docker/docker/integration-cli/checker" |
29 | 32 |
"github.com/docker/docker/integration-cli/cli" |
30 | 33 |
"github.com/docker/docker/integration-cli/daemon" |
... | ... |
@@ -2980,3 +2984,45 @@ func (s *DockerDaemonSuite) TestShmSizeReload(c *check.C) { |
2980 | 2980 |
c.Assert(err, check.IsNil, check.Commentf("Output: %s", out)) |
2981 | 2981 |
c.Assert(strings.TrimSpace(out), check.Equals, fmt.Sprintf("%v", size)) |
2982 | 2982 |
} |
2983 |
+ |
|
2984 |
+// TestFailedPluginRemove makes sure that a failed plugin remove does not block |
|
2985 |
+// the daemon from starting |
|
2986 |
+func (s *DockerDaemonSuite) TestFailedPluginRemove(c *check.C) { |
|
2987 |
+ testRequires(c, DaemonIsLinux, IsAmd64, SameHostDaemon) |
|
2988 |
+ d := daemon.New(c, dockerBinary, dockerdBinary, daemon.Config{}) |
|
2989 |
+ d.Start(c) |
|
2990 |
+ cli, err := client.NewClient(d.Sock(), api.DefaultVersion, nil, nil) |
|
2991 |
+ c.Assert(err, checker.IsNil) |
|
2992 |
+ |
|
2993 |
+ ctx, cancel := context.WithTimeout(context.Background(), 300*time.Second) |
|
2994 |
+ defer cancel() |
|
2995 |
+ |
|
2996 |
+ name := "test-plugin-rm-fail" |
|
2997 |
+ out, err := cli.PluginInstall(ctx, name, types.PluginInstallOptions{ |
|
2998 |
+ Disabled: true, |
|
2999 |
+ AcceptAllPermissions: true, |
|
3000 |
+ RemoteRef: "cpuguy83/docker-logdriver-test", |
|
3001 |
+ }) |
|
3002 |
+ c.Assert(err, checker.IsNil) |
|
3003 |
+ defer out.Close() |
|
3004 |
+ io.Copy(ioutil.Discard, out) |
|
3005 |
+ |
|
3006 |
+ ctx, cancel = context.WithTimeout(context.Background(), 30*time.Second) |
|
3007 |
+ defer cancel() |
|
3008 |
+ p, _, err := cli.PluginInspectWithRaw(ctx, name) |
|
3009 |
+ c.Assert(err, checker.IsNil) |
|
3010 |
+ |
|
3011 |
+ // simulate a bad/partial removal by removing the plugin config. |
|
3012 |
+ configPath := filepath.Join(d.Root, "plugins", p.ID, "config.json") |
|
3013 |
+ c.Assert(os.Remove(configPath), checker.IsNil) |
|
3014 |
+ |
|
3015 |
+ d.Restart(c) |
|
3016 |
+ ctx, cancel = context.WithTimeout(context.Background(), 30*time.Second) |
|
3017 |
+ defer cancel() |
|
3018 |
+ _, err = cli.Ping(ctx) |
|
3019 |
+ c.Assert(err, checker.IsNil) |
|
3020 |
+ |
|
3021 |
+ _, _, err = cli.PluginInspectWithRaw(ctx, name) |
|
3022 |
+ // plugin should be gone since the config.json is gone |
|
3023 |
+ c.Assert(err, checker.NotNil) |
|
3024 |
+} |
... | ... |
@@ -13,7 +13,6 @@ import ( |
13 | 13 |
"os" |
14 | 14 |
"path" |
15 | 15 |
"path/filepath" |
16 |
- "sort" |
|
17 | 16 |
"strings" |
18 | 17 |
|
19 | 18 |
"github.com/Sirupsen/logrus" |
... | ... |
@@ -32,6 +31,7 @@ import ( |
32 | 32 |
"github.com/docker/docker/pkg/mount" |
33 | 33 |
"github.com/docker/docker/pkg/pools" |
34 | 34 |
"github.com/docker/docker/pkg/progress" |
35 |
+ "github.com/docker/docker/pkg/system" |
|
35 | 36 |
"github.com/docker/docker/plugin/v2" |
36 | 37 |
refstore "github.com/docker/docker/reference" |
37 | 38 |
"github.com/opencontainers/go-digest" |
... | ... |
@@ -624,14 +624,20 @@ func (pm *Manager) Remove(name string, config *types.PluginRmConfig) error { |
624 | 624 |
}() |
625 | 625 |
|
626 | 626 |
id := p.GetID() |
627 |
- pm.config.Store.Remove(p) |
|
628 | 627 |
pluginDir := filepath.Join(pm.config.Root, id) |
629 |
- if err := recursiveUnmount(pluginDir); err != nil { |
|
630 |
- logrus.WithField("dir", pluginDir).WithField("id", id).Warn(err) |
|
628 |
+ |
|
629 |
+ if err := mount.RecursiveUnmount(pluginDir); err != nil { |
|
630 |
+ return errors.Wrap(err, "error unmounting plugin data") |
|
631 | 631 |
} |
632 |
- if err := os.RemoveAll(pluginDir); err != nil { |
|
633 |
- logrus.Warnf("unable to remove %q from plugin remove: %v", pluginDir, err) |
|
632 |
+ |
|
633 |
+ if err := os.Rename(pluginDir, pluginDir+"-removing"); err != nil { |
|
634 |
+ return errors.Wrap(err, "error performing atomic remove of plugin dir") |
|
635 |
+ } |
|
636 |
+ |
|
637 |
+ if err := system.EnsureRemoveAll(pluginDir); err != nil { |
|
638 |
+ return errors.Wrap(err, "error removing plugin dir") |
|
634 | 639 |
} |
640 |
+ pm.config.Store.Remove(p) |
|
635 | 641 |
pm.config.LogPluginEvent(id, name, "remove") |
636 | 642 |
return nil |
637 | 643 |
} |
... | ... |
@@ -652,27 +658,6 @@ func getMounts(root string) ([]string, error) { |
652 | 652 |
return mounts, nil |
653 | 653 |
} |
654 | 654 |
|
655 |
-func recursiveUnmount(root string) error { |
|
656 |
- mounts, err := getMounts(root) |
|
657 |
- if err != nil { |
|
658 |
- return err |
|
659 |
- } |
|
660 |
- |
|
661 |
- // sort in reverse-lexicographic order so the root mount will always be last |
|
662 |
- sort.Sort(sort.Reverse(sort.StringSlice(mounts))) |
|
663 |
- |
|
664 |
- for i, m := range mounts { |
|
665 |
- if err := mount.Unmount(m); err != nil { |
|
666 |
- if i == len(mounts)-1 { |
|
667 |
- return errors.Wrapf(err, "error performing recursive unmount on %s", root) |
|
668 |
- } |
|
669 |
- logrus.WithError(err).WithField("mountpoint", m).Warn("could not unmount") |
|
670 |
- } |
|
671 |
- } |
|
672 |
- |
|
673 |
- return nil |
|
674 |
-} |
|
675 |
- |
|
676 | 655 |
// Set sets plugin args |
677 | 656 |
func (pm *Manager) Set(name string, args []string) error { |
678 | 657 |
p, err := pm.config.Store.GetV2Plugin(name) |
... | ... |
@@ -163,6 +163,19 @@ func (pm *Manager) StateChanged(id string, e libcontainerd.StateInfo) error { |
163 | 163 |
return nil |
164 | 164 |
} |
165 | 165 |
|
166 |
+func handleLoadError(err error, id string) { |
|
167 |
+ if err == nil { |
|
168 |
+ return |
|
169 |
+ } |
|
170 |
+ logger := logrus.WithError(err).WithField("id", id) |
|
171 |
+ if os.IsNotExist(errors.Cause(err)) { |
|
172 |
+ // Likely some error while removing on an older version of docker |
|
173 |
+ logger.Warn("missing plugin config, skipping: this may be caused due to a failed remove and requires manual cleanup.") |
|
174 |
+ return |
|
175 |
+ } |
|
176 |
+ logger.Error("error loading plugin, skipping") |
|
177 |
+} |
|
178 |
+ |
|
166 | 179 |
func (pm *Manager) reload() error { // todo: restore |
167 | 180 |
dir, err := ioutil.ReadDir(pm.config.Root) |
168 | 181 |
if err != nil { |
... | ... |
@@ -173,9 +186,17 @@ func (pm *Manager) reload() error { // todo: restore |
173 | 173 |
if validFullID.MatchString(v.Name()) { |
174 | 174 |
p, err := pm.loadPlugin(v.Name()) |
175 | 175 |
if err != nil { |
176 |
- return err |
|
176 |
+ handleLoadError(err, v.Name()) |
|
177 |
+ continue |
|
177 | 178 |
} |
178 | 179 |
plugins[p.GetID()] = p |
180 |
+ } else { |
|
181 |
+ if validFullID.MatchString(strings.TrimSuffix(v.Name(), "-removing")) { |
|
182 |
+ // There was likely some error while removing this plugin, let's try to remove again here |
|
183 |
+ if err := system.EnsureRemoveAll(v.Name()); err != nil { |
|
184 |
+ logrus.WithError(err).WithField("id", v.Name()).Warn("error while attempting to clean up previously removed plugin") |
|
185 |
+ } |
|
186 |
+ } |
|
179 | 187 |
} |
180 | 188 |
} |
181 | 189 |
|
... | ... |
@@ -204,7 +204,7 @@ func (pm *Manager) upgradePlugin(p *v2.Plugin, configDigest digest.Digest, blobs |
204 | 204 |
// Make sure nothing is mounted |
205 | 205 |
// This could happen if the plugin was disabled with `-f` with active mounts. |
206 | 206 |
// If there is anything in `orig` is still mounted, this should error out. |
207 |
- if err := recursiveUnmount(orig); err != nil { |
|
207 |
+ if err := mount.RecursiveUnmount(orig); err != nil { |
|
208 | 208 |
return err |
209 | 209 |
} |
210 | 210 |
|