Browse code

c8d: Fix image commit with userns mapping

The remapping in the commit code was in the wrong place, we would create
a diff and then remap the snapshot, but the descriptor created in
"CreateDiff" was still pointing to the old snapshot, we now remap the
snapshot before creating a diff. Also make sure we don't lose any
capabilities, they used to be lost after the chown.

Signed-off-by: Djordje Lukic <djordje.lukic@docker.com>

Djordje Lukic authored on 2023/12/15 00:15:35
Showing 3 changed files
... ...
@@ -29,6 +29,7 @@ import (
29 29
 	"github.com/opencontainers/image-spec/identity"
30 30
 	"github.com/opencontainers/image-spec/specs-go"
31 31
 	ocispec "github.com/opencontainers/image-spec/specs-go/v1"
32
+	"github.com/pkg/errors"
32 33
 )
33 34
 
34 35
 /*
... ...
@@ -81,7 +82,7 @@ func (i *ImageService) CommitImage(ctx context.Context, cc backend.CommitConfig)
81 81
 		}
82 82
 	}()
83 83
 
84
-	diffLayerDesc, diffID, err := createDiff(ctx, cc.ContainerID, sn, cs, differ)
84
+	diffLayerDesc, diffID, err := i.createDiff(ctx, cc.ContainerID, sn, cs, differ)
85 85
 	if err != nil {
86 86
 		return "", fmt.Errorf("failed to export layer: %w", err)
87 87
 	}
... ...
@@ -249,10 +250,38 @@ func writeContentsForImage(ctx context.Context, snName string, cs content.Store,
249 249
 
250 250
 // createDiff creates a layer diff into containerd's content store.
251 251
 // If the diff is empty it returns nil empty digest and no error.
252
-func createDiff(ctx context.Context, name string, sn snapshots.Snapshotter, cs content.Store, comparer diff.Comparer) (*ocispec.Descriptor, digest.Digest, error) {
252
+func (i *ImageService) createDiff(ctx context.Context, name string, sn snapshots.Snapshotter, cs content.Store, comparer diff.Comparer) (*ocispec.Descriptor, digest.Digest, error) {
253
+	if !i.idMapping.Empty() {
254
+		// The rootfs of the container is remapped if an id mapping exists, we
255
+		// need to "unremap" it before committing the snapshot
256
+		rootPair := i.idMapping.RootPair()
257
+		usernsID := fmt.Sprintf("%s-%d-%d", name, rootPair.UID, rootPair.GID)
258
+		remappedID := usernsID + remapSuffix
259
+
260
+		err := sn.Commit(ctx, name+"-pre", name)
261
+		if err != nil && !cerrdefs.IsAlreadyExists(err) {
262
+			return nil, "", err
263
+		}
264
+
265
+		if !cerrdefs.IsAlreadyExists(err) {
266
+			mounts, err := sn.Prepare(ctx, remappedID, name+"-pre")
267
+			if err != nil {
268
+				return nil, "", err
269
+			}
270
+
271
+			if err := i.unremapRootFS(ctx, mounts); err != nil {
272
+				return nil, "", err
273
+			}
274
+
275
+			if err := sn.Commit(ctx, name, remappedID); err != nil {
276
+				return nil, "", err
277
+			}
278
+		}
279
+	}
280
+
253 281
 	newDesc, err := rootfs.CreateDiff(ctx, name, sn, comparer)
254 282
 	if err != nil {
255
-		return nil, "", err
283
+		return nil, "", errors.Wrap(err, "CreateDiff")
256 284
 	}
257 285
 
258 286
 	ra, err := cs.ReaderAt(ctx, newDesc)
... ...
@@ -311,7 +340,7 @@ func (i *ImageService) applyDiffLayer(ctx context.Context, name string, containe
311 311
 
312 312
 	defer func() {
313 313
 		if retErr != nil {
314
-			// NOTE: the snapshotter should be hold by lease. Even
314
+			// NOTE: the snapshotter should be held by lease. Even
315 315
 			// if the cleanup fails, the containerd gc can delete it.
316 316
 			if err := sn.Remove(ctx, key); err != nil {
317 317
 				log.G(ctx).Warnf("failed to cleanup aborted apply %s: %s", key, err)
... ...
@@ -323,35 +352,6 @@ func (i *ImageService) applyDiffLayer(ctx context.Context, name string, containe
323 323
 		return err
324 324
 	}
325 325
 
326
-	if !i.idMapping.Empty() {
327
-		// The rootfs of the container is remapped if an id mapping exists, we
328
-		// need to "unremap" it before committing the snapshot
329
-		rootPair := i.idMapping.RootPair()
330
-		usernsID := fmt.Sprintf("%s-%d-%d", key, rootPair.UID, rootPair.GID)
331
-		remappedID := usernsID + remapSuffix
332
-
333
-		if err = sn.Commit(ctx, name+"-pre", key); err != nil {
334
-			if cerrdefs.IsAlreadyExists(err) {
335
-				return nil
336
-			}
337
-			return err
338
-		}
339
-
340
-		mounts, err = sn.Prepare(ctx, remappedID, name+"-pre")
341
-		if err != nil {
342
-			return err
343
-		}
344
-
345
-		if err := i.unremapRootFS(ctx, mounts); err != nil {
346
-			return err
347
-		}
348
-
349
-		if err := sn.Commit(ctx, name, remappedID); err != nil {
350
-			return err
351
-		}
352
-		key = remappedID
353
-	}
354
-
355 326
 	if err = sn.Commit(ctx, name, key); err != nil {
356 327
 		if cerrdefs.IsAlreadyExists(err) {
357 328
 			return nil
... ...
@@ -17,8 +17,6 @@ import (
17 17
 	"github.com/pkg/errors"
18 18
 )
19 19
 
20
-const remapSuffix = "-remap"
21
-
22 20
 // PrepareSnapshot prepares a snapshot from a parent image for a container
23 21
 func (i *ImageService) PrepareSnapshot(ctx context.Context, id string, parentImage string, platform *ocispec.Platform, setupInit func(string) error) error {
24 22
 	var parentSnapshot string
... ...
@@ -11,38 +11,33 @@ import (
11 11
 
12 12
 	"github.com/containerd/containerd/mount"
13 13
 	"github.com/containerd/containerd/snapshots"
14
-	"github.com/containerd/log"
14
+	"github.com/containerd/continuity/sysx"
15 15
 	"github.com/docker/docker/pkg/idtools"
16 16
 )
17 17
 
18
-func (i *ImageService) remapSnapshot(ctx context.Context, snapshotter snapshots.Snapshotter, id string, parentSnapshot string) error {
19
-	rootPair := i.idMapping.RootPair()
20
-	usernsID := fmt.Sprintf("%s-%d-%d", parentSnapshot, rootPair.UID, rootPair.GID)
21
-	remappedID := usernsID + remapSuffix
18
+const (
19
+	// Values based on linux/include/uapi/linux/capability.h
20
+	xattrCapsSz2    = 20
21
+	versionOffset   = 3
22
+	vfsCapRevision2 = 2
23
+	vfsCapRevision3 = 3
24
+	remapSuffix     = "-remap"
25
+)
22 26
 
23
-	// If the remapped snapshot already exist we only need to prepare the new snapshot
24
-	if _, err := snapshotter.Stat(ctx, usernsID); err == nil {
25
-		_, err = snapshotter.Prepare(ctx, id, usernsID)
27
+func (i *ImageService) remapSnapshot(ctx context.Context, snapshotter snapshots.Snapshotter, id string, parentSnapshot string) error {
28
+	_, err := snapshotter.Prepare(ctx, id, parentSnapshot)
29
+	if err != nil {
26 30
 		return err
27 31
 	}
28
-
29
-	mounts, err := snapshotter.Prepare(ctx, remappedID, parentSnapshot)
32
+	mounts, err := snapshotter.Mounts(ctx, id)
30 33
 	if err != nil {
31 34
 		return err
32 35
 	}
33 36
 
34 37
 	if err := i.remapRootFS(ctx, mounts); err != nil {
35
-		if rmErr := snapshotter.Remove(ctx, usernsID); rmErr != nil {
36
-			log.G(ctx).WithError(rmErr).Warn("failed to remove snapshot after remap error")
37
-		}
38 38
 		return err
39 39
 	}
40 40
 
41
-	if err := snapshotter.Commit(ctx, usernsID, remappedID); err != nil {
42
-		return err
43
-	}
44
-
45
-	_, err = snapshotter.Prepare(ctx, id, usernsID)
46 41
 	return err
47 42
 }
48 43
 
... ...
@@ -63,7 +58,7 @@ func (i *ImageService) remapRootFS(ctx context.Context, mounts []mount.Mount) er
63 63
 				return err
64 64
 			}
65 65
 
66
-			return os.Lchown(path, ids.UID, ids.GID)
66
+			return chownWithCaps(path, ids.UID, ids.GID)
67 67
 		})
68 68
 	})
69 69
 }
... ...
@@ -85,7 +80,48 @@ func (i *ImageService) unremapRootFS(ctx context.Context, mounts []mount.Mount)
85 85
 				return err
86 86
 			}
87 87
 
88
-			return os.Lchown(path, uid, gid)
88
+			return chownWithCaps(path, uid, gid)
89 89
 		})
90 90
 	})
91 91
 }
92
+
93
+// chownWithCaps will chown path and preserve the extended attributes.
94
+// chowning a file will remove the capabilities, so we need to first get all of
95
+// them, chown the file, and then set the extended attributes
96
+func chownWithCaps(path string, uid int, gid int) error {
97
+	xattrKeys, err := sysx.LListxattr(path)
98
+	if err != nil {
99
+		return err
100
+	}
101
+
102
+	xattrs := make(map[string][]byte, len(xattrKeys))
103
+
104
+	for _, xattr := range xattrKeys {
105
+		data, err := sysx.LGetxattr(path, xattr)
106
+		if err != nil {
107
+			return err
108
+		}
109
+		xattrs[xattr] = data
110
+	}
111
+
112
+	if err := os.Lchown(path, uid, gid); err != nil {
113
+		return err
114
+	}
115
+
116
+	for xattrKey, xattrValue := range xattrs {
117
+		length := len(xattrValue)
118
+		// make sure the capabilities are version 2,
119
+		// capabilities version 3 also store the root uid of the namespace,
120
+		// we don't want this when we are in userns-remap mode
121
+		// see: https://github.com/moby/moby/pull/41724
122
+		if xattrKey == "security.capability" && xattrValue[versionOffset] == vfsCapRevision3 {
123
+			xattrValue[versionOffset] = vfsCapRevision2
124
+			length = xattrCapsSz2
125
+		}
126
+		if err := sysx.LSetxattr(path, xattrKey, xattrValue[:length], 0); err != nil {
127
+			return err
128
+		}
129
+	}
130
+
131
+	return nil
132
+}