Before this patch, if the plugin's `config.json` is successfully removed
but the main plugin state dir could not be removed for some reason (e.g.
leaked mount), it will prevent the daemon from being able to be
restarted.
This patches changes this to atomically remove the plugin such that on
daemon restart we can detect that there was an error and re-try. It also
changes the logic so that it only logs errors on restore rather than
erroring out the daemon.
This also removes some code which is now duplicated elsewhere.
Signed-off-by: Brian Goff <cpuguy83@gmail.com>
... | ... |
@@ -5,6 +5,7 @@ package main |
5 | 5 |
import ( |
6 | 6 |
"bufio" |
7 | 7 |
"bytes" |
8 |
+ "context" |
|
8 | 9 |
"encoding/json" |
9 | 10 |
"fmt" |
10 | 11 |
"io" |
... | ... |
@@ -25,6 +26,9 @@ import ( |
25 | 25 |
"crypto/x509" |
26 | 26 |
|
27 | 27 |
"github.com/cloudflare/cfssl/helpers" |
28 |
+ "github.com/docker/docker/api" |
|
29 |
+ "github.com/docker/docker/api/types" |
|
30 |
+ "github.com/docker/docker/client" |
|
28 | 31 |
"github.com/docker/docker/integration-cli/checker" |
29 | 32 |
"github.com/docker/docker/integration-cli/cli" |
30 | 33 |
"github.com/docker/docker/integration-cli/daemon" |
... | ... |
@@ -2980,3 +2984,45 @@ func (s *DockerDaemonSuite) TestShmSizeReload(c *check.C) { |
2980 | 2980 |
c.Assert(err, check.IsNil, check.Commentf("Output: %s", out)) |
2981 | 2981 |
c.Assert(strings.TrimSpace(out), check.Equals, fmt.Sprintf("%v", size)) |
2982 | 2982 |
} |
2983 |
+ |
|
2984 |
+// TestFailedPluginRemove makes sure that a failed plugin remove does not block |
|
2985 |
+// the daemon from starting |
|
2986 |
+func (s *DockerDaemonSuite) TestFailedPluginRemove(c *check.C) { |
|
2987 |
+ testRequires(c, DaemonIsLinux, IsAmd64, SameHostDaemon) |
|
2988 |
+ d := daemon.New(c, dockerBinary, dockerdBinary, daemon.Config{}) |
|
2989 |
+ d.Start(c) |
|
2990 |
+ cli, err := client.NewClient(d.Sock(), api.DefaultVersion, nil, nil) |
|
2991 |
+ c.Assert(err, checker.IsNil) |
|
2992 |
+ |
|
2993 |
+ ctx, cancel := context.WithTimeout(context.Background(), 300*time.Second) |
|
2994 |
+ defer cancel() |
|
2995 |
+ |
|
2996 |
+ name := "test-plugin-rm-fail" |
|
2997 |
+ out, err := cli.PluginInstall(ctx, name, types.PluginInstallOptions{ |
|
2998 |
+ Disabled: true, |
|
2999 |
+ AcceptAllPermissions: true, |
|
3000 |
+ RemoteRef: "cpuguy83/docker-logdriver-test", |
|
3001 |
+ }) |
|
3002 |
+ c.Assert(err, checker.IsNil) |
|
3003 |
+ defer out.Close() |
|
3004 |
+ io.Copy(ioutil.Discard, out) |
|
3005 |
+ |
|
3006 |
+ ctx, cancel = context.WithTimeout(context.Background(), 30*time.Second) |
|
3007 |
+ defer cancel() |
|
3008 |
+ p, _, err := cli.PluginInspectWithRaw(ctx, name) |
|
3009 |
+ c.Assert(err, checker.IsNil) |
|
3010 |
+ |
|
3011 |
+ // simulate a bad/partial removal by removing the plugin config. |
|
3012 |
+ configPath := filepath.Join(d.Root, "plugins", p.ID, "config.json") |
|
3013 |
+ c.Assert(os.Remove(configPath), checker.IsNil) |
|
3014 |
+ |
|
3015 |
+ d.Restart(c) |
|
3016 |
+ ctx, cancel = context.WithTimeout(context.Background(), 30*time.Second) |
|
3017 |
+ defer cancel() |
|
3018 |
+ _, err = cli.Ping(ctx) |
|
3019 |
+ c.Assert(err, checker.IsNil) |
|
3020 |
+ |
|
3021 |
+ _, _, err = cli.PluginInspectWithRaw(ctx, name) |
|
3022 |
+ // plugin should be gone since the config.json is gone |
|
3023 |
+ c.Assert(err, checker.NotNil) |
|
3024 |
+} |
... | ... |
@@ -13,7 +13,6 @@ import ( |
13 | 13 |
"os" |
14 | 14 |
"path" |
15 | 15 |
"path/filepath" |
16 |
- "sort" |
|
17 | 16 |
"strings" |
18 | 17 |
|
19 | 18 |
"github.com/Sirupsen/logrus" |
... | ... |
@@ -32,6 +31,7 @@ import ( |
32 | 32 |
"github.com/docker/docker/pkg/mount" |
33 | 33 |
"github.com/docker/docker/pkg/pools" |
34 | 34 |
"github.com/docker/docker/pkg/progress" |
35 |
+ "github.com/docker/docker/pkg/system" |
|
35 | 36 |
"github.com/docker/docker/plugin/v2" |
36 | 37 |
refstore "github.com/docker/docker/reference" |
37 | 38 |
"github.com/opencontainers/go-digest" |
... | ... |
@@ -624,14 +624,20 @@ func (pm *Manager) Remove(name string, config *types.PluginRmConfig) error { |
624 | 624 |
}() |
625 | 625 |
|
626 | 626 |
id := p.GetID() |
627 |
- pm.config.Store.Remove(p) |
|
628 | 627 |
pluginDir := filepath.Join(pm.config.Root, id) |
629 |
- if err := recursiveUnmount(pluginDir); err != nil { |
|
630 |
- logrus.WithField("dir", pluginDir).WithField("id", id).Warn(err) |
|
628 |
+ |
|
629 |
+ if err := mount.RecursiveUnmount(pluginDir); err != nil { |
|
630 |
+ return errors.Wrap(err, "error unmounting plugin data") |
|
631 | 631 |
} |
632 |
- if err := os.RemoveAll(pluginDir); err != nil { |
|
633 |
- logrus.Warnf("unable to remove %q from plugin remove: %v", pluginDir, err) |
|
632 |
+ |
|
633 |
+ if err := os.Rename(pluginDir, pluginDir+"-removing"); err != nil { |
|
634 |
+ return errors.Wrap(err, "error performing atomic remove of plugin dir") |
|
635 |
+ } |
|
636 |
+ |
|
637 |
+ if err := system.EnsureRemoveAll(pluginDir); err != nil { |
|
638 |
+ return errors.Wrap(err, "error removing plugin dir") |
|
634 | 639 |
} |
640 |
+ pm.config.Store.Remove(p) |
|
635 | 641 |
pm.config.LogPluginEvent(id, name, "remove") |
636 | 642 |
return nil |
637 | 643 |
} |
... | ... |
@@ -652,27 +658,6 @@ func getMounts(root string) ([]string, error) { |
652 | 652 |
return mounts, nil |
653 | 653 |
} |
654 | 654 |
|
655 |
-func recursiveUnmount(root string) error { |
|
656 |
- mounts, err := getMounts(root) |
|
657 |
- if err != nil { |
|
658 |
- return err |
|
659 |
- } |
|
660 |
- |
|
661 |
- // sort in reverse-lexicographic order so the root mount will always be last |
|
662 |
- sort.Sort(sort.Reverse(sort.StringSlice(mounts))) |
|
663 |
- |
|
664 |
- for i, m := range mounts { |
|
665 |
- if err := mount.Unmount(m); err != nil { |
|
666 |
- if i == len(mounts)-1 { |
|
667 |
- return errors.Wrapf(err, "error performing recursive unmount on %s", root) |
|
668 |
- } |
|
669 |
- logrus.WithError(err).WithField("mountpoint", m).Warn("could not unmount") |
|
670 |
- } |
|
671 |
- } |
|
672 |
- |
|
673 |
- return nil |
|
674 |
-} |
|
675 |
- |
|
676 | 655 |
// Set sets plugin args |
677 | 656 |
func (pm *Manager) Set(name string, args []string) error { |
678 | 657 |
p, err := pm.config.Store.GetV2Plugin(name) |
... | ... |
@@ -163,6 +163,19 @@ func (pm *Manager) StateChanged(id string, e libcontainerd.StateInfo) error { |
163 | 163 |
return nil |
164 | 164 |
} |
165 | 165 |
|
166 |
+func handleLoadError(err error, id string) { |
|
167 |
+ if err == nil { |
|
168 |
+ return |
|
169 |
+ } |
|
170 |
+ logger := logrus.WithError(err).WithField("id", id) |
|
171 |
+ if os.IsNotExist(errors.Cause(err)) { |
|
172 |
+ // Likely some error while removing on an older version of docker |
|
173 |
+ logger.Warn("missing plugin config, skipping: this may be caused due to a failed remove and requires manual cleanup.") |
|
174 |
+ return |
|
175 |
+ } |
|
176 |
+ logger.Error("error loading plugin, skipping") |
|
177 |
+} |
|
178 |
+ |
|
166 | 179 |
func (pm *Manager) reload() error { // todo: restore |
167 | 180 |
dir, err := ioutil.ReadDir(pm.config.Root) |
168 | 181 |
if err != nil { |
... | ... |
@@ -173,9 +186,17 @@ func (pm *Manager) reload() error { // todo: restore |
173 | 173 |
if validFullID.MatchString(v.Name()) { |
174 | 174 |
p, err := pm.loadPlugin(v.Name()) |
175 | 175 |
if err != nil { |
176 |
- return err |
|
176 |
+ handleLoadError(err, v.Name()) |
|
177 |
+ continue |
|
177 | 178 |
} |
178 | 179 |
plugins[p.GetID()] = p |
180 |
+ } else { |
|
181 |
+ if validFullID.MatchString(strings.TrimSuffix(v.Name(), "-removing")) { |
|
182 |
+ // There was likely some error while removing this plugin, let's try to remove again here |
|
183 |
+ if err := system.EnsureRemoveAll(v.Name()); err != nil { |
|
184 |
+ logrus.WithError(err).WithField("id", v.Name()).Warn("error while attempting to clean up previously removed plugin") |
|
185 |
+ } |
|
186 |
+ } |
|
179 | 187 |
} |
180 | 188 |
} |
181 | 189 |
|
... | ... |
@@ -204,7 +204,7 @@ func (pm *Manager) upgradePlugin(p *v2.Plugin, configDigest digest.Digest, blobs |
204 | 204 |
// Make sure nothing is mounted |
205 | 205 |
// This could happen if the plugin was disabled with `-f` with active mounts. |
206 | 206 |
// If there is anything in `orig` is still mounted, this should error out. |
207 |
- if err := recursiveUnmount(orig); err != nil { |
|
207 |
+ if err := mount.RecursiveUnmount(orig); err != nil { |
|
208 | 208 |
return err |
209 | 209 |
} |
210 | 210 |
|