The remapping in the commit code was in the wrong place, we would create
a diff and then remap the snapshot, but the descriptor created in
"CreateDiff" was still pointing to the old snapshot, we now remap the
snapshot before creating a diff. Also make sure we don't lose any
capabilities, they used to be lost after the chown.
Signed-off-by: Djordje Lukic <djordje.lukic@docker.com>
| ... | ... |
@@ -29,6 +29,7 @@ import ( |
| 29 | 29 |
"github.com/opencontainers/image-spec/identity" |
| 30 | 30 |
"github.com/opencontainers/image-spec/specs-go" |
| 31 | 31 |
ocispec "github.com/opencontainers/image-spec/specs-go/v1" |
| 32 |
+ "github.com/pkg/errors" |
|
| 32 | 33 |
) |
| 33 | 34 |
|
| 34 | 35 |
/* |
| ... | ... |
@@ -81,7 +82,7 @@ func (i *ImageService) CommitImage(ctx context.Context, cc backend.CommitConfig) |
| 81 | 81 |
} |
| 82 | 82 |
}() |
| 83 | 83 |
|
| 84 |
- diffLayerDesc, diffID, err := createDiff(ctx, cc.ContainerID, sn, cs, differ) |
|
| 84 |
+ diffLayerDesc, diffID, err := i.createDiff(ctx, cc.ContainerID, sn, cs, differ) |
|
| 85 | 85 |
if err != nil {
|
| 86 | 86 |
return "", fmt.Errorf("failed to export layer: %w", err)
|
| 87 | 87 |
} |
| ... | ... |
@@ -249,10 +250,38 @@ func writeContentsForImage(ctx context.Context, snName string, cs content.Store, |
| 249 | 249 |
|
| 250 | 250 |
// createDiff creates a layer diff into containerd's content store. |
| 251 | 251 |
// If the diff is empty it returns nil empty digest and no error. |
| 252 |
-func createDiff(ctx context.Context, name string, sn snapshots.Snapshotter, cs content.Store, comparer diff.Comparer) (*ocispec.Descriptor, digest.Digest, error) {
|
|
| 252 |
+func (i *ImageService) createDiff(ctx context.Context, name string, sn snapshots.Snapshotter, cs content.Store, comparer diff.Comparer) (*ocispec.Descriptor, digest.Digest, error) {
|
|
| 253 |
+ if !i.idMapping.Empty() {
|
|
| 254 |
+ // The rootfs of the container is remapped if an id mapping exists, we |
|
| 255 |
+ // need to "unremap" it before committing the snapshot |
|
| 256 |
+ rootPair := i.idMapping.RootPair() |
|
| 257 |
+ usernsID := fmt.Sprintf("%s-%d-%d", name, rootPair.UID, rootPair.GID)
|
|
| 258 |
+ remappedID := usernsID + remapSuffix |
|
| 259 |
+ |
|
| 260 |
+ err := sn.Commit(ctx, name+"-pre", name) |
|
| 261 |
+ if err != nil && !cerrdefs.IsAlreadyExists(err) {
|
|
| 262 |
+ return nil, "", err |
|
| 263 |
+ } |
|
| 264 |
+ |
|
| 265 |
+ if !cerrdefs.IsAlreadyExists(err) {
|
|
| 266 |
+ mounts, err := sn.Prepare(ctx, remappedID, name+"-pre") |
|
| 267 |
+ if err != nil {
|
|
| 268 |
+ return nil, "", err |
|
| 269 |
+ } |
|
| 270 |
+ |
|
| 271 |
+ if err := i.unremapRootFS(ctx, mounts); err != nil {
|
|
| 272 |
+ return nil, "", err |
|
| 273 |
+ } |
|
| 274 |
+ |
|
| 275 |
+ if err := sn.Commit(ctx, name, remappedID); err != nil {
|
|
| 276 |
+ return nil, "", err |
|
| 277 |
+ } |
|
| 278 |
+ } |
|
| 279 |
+ } |
|
| 280 |
+ |
|
| 253 | 281 |
newDesc, err := rootfs.CreateDiff(ctx, name, sn, comparer) |
| 254 | 282 |
if err != nil {
|
| 255 |
- return nil, "", err |
|
| 283 |
+ return nil, "", errors.Wrap(err, "CreateDiff") |
|
| 256 | 284 |
} |
| 257 | 285 |
|
| 258 | 286 |
ra, err := cs.ReaderAt(ctx, newDesc) |
| ... | ... |
@@ -311,7 +340,7 @@ func (i *ImageService) applyDiffLayer(ctx context.Context, name string, containe |
| 311 | 311 |
|
| 312 | 312 |
defer func() {
|
| 313 | 313 |
if retErr != nil {
|
| 314 |
- // NOTE: the snapshotter should be hold by lease. Even |
|
| 314 |
+ // NOTE: the snapshotter should be held by lease. Even |
|
| 315 | 315 |
// if the cleanup fails, the containerd gc can delete it. |
| 316 | 316 |
if err := sn.Remove(ctx, key); err != nil {
|
| 317 | 317 |
log.G(ctx).Warnf("failed to cleanup aborted apply %s: %s", key, err)
|
| ... | ... |
@@ -323,35 +352,6 @@ func (i *ImageService) applyDiffLayer(ctx context.Context, name string, containe |
| 323 | 323 |
return err |
| 324 | 324 |
} |
| 325 | 325 |
|
| 326 |
- if !i.idMapping.Empty() {
|
|
| 327 |
- // The rootfs of the container is remapped if an id mapping exists, we |
|
| 328 |
- // need to "unremap" it before committing the snapshot |
|
| 329 |
- rootPair := i.idMapping.RootPair() |
|
| 330 |
- usernsID := fmt.Sprintf("%s-%d-%d", key, rootPair.UID, rootPair.GID)
|
|
| 331 |
- remappedID := usernsID + remapSuffix |
|
| 332 |
- |
|
| 333 |
- if err = sn.Commit(ctx, name+"-pre", key); err != nil {
|
|
| 334 |
- if cerrdefs.IsAlreadyExists(err) {
|
|
| 335 |
- return nil |
|
| 336 |
- } |
|
| 337 |
- return err |
|
| 338 |
- } |
|
| 339 |
- |
|
| 340 |
- mounts, err = sn.Prepare(ctx, remappedID, name+"-pre") |
|
| 341 |
- if err != nil {
|
|
| 342 |
- return err |
|
| 343 |
- } |
|
| 344 |
- |
|
| 345 |
- if err := i.unremapRootFS(ctx, mounts); err != nil {
|
|
| 346 |
- return err |
|
| 347 |
- } |
|
| 348 |
- |
|
| 349 |
- if err := sn.Commit(ctx, name, remappedID); err != nil {
|
|
| 350 |
- return err |
|
| 351 |
- } |
|
| 352 |
- key = remappedID |
|
| 353 |
- } |
|
| 354 |
- |
|
| 355 | 326 |
if err = sn.Commit(ctx, name, key); err != nil {
|
| 356 | 327 |
if cerrdefs.IsAlreadyExists(err) {
|
| 357 | 328 |
return nil |
| ... | ... |
@@ -17,8 +17,6 @@ import ( |
| 17 | 17 |
"github.com/pkg/errors" |
| 18 | 18 |
) |
| 19 | 19 |
|
| 20 |
-const remapSuffix = "-remap" |
|
| 21 |
- |
|
| 22 | 20 |
// PrepareSnapshot prepares a snapshot from a parent image for a container |
| 23 | 21 |
func (i *ImageService) PrepareSnapshot(ctx context.Context, id string, parentImage string, platform *ocispec.Platform, setupInit func(string) error) error {
|
| 24 | 22 |
var parentSnapshot string |
| ... | ... |
@@ -11,38 +11,33 @@ import ( |
| 11 | 11 |
|
| 12 | 12 |
"github.com/containerd/containerd/mount" |
| 13 | 13 |
"github.com/containerd/containerd/snapshots" |
| 14 |
- "github.com/containerd/log" |
|
| 14 |
+ "github.com/containerd/continuity/sysx" |
|
| 15 | 15 |
"github.com/docker/docker/pkg/idtools" |
| 16 | 16 |
) |
| 17 | 17 |
|
| 18 |
-func (i *ImageService) remapSnapshot(ctx context.Context, snapshotter snapshots.Snapshotter, id string, parentSnapshot string) error {
|
|
| 19 |
- rootPair := i.idMapping.RootPair() |
|
| 20 |
- usernsID := fmt.Sprintf("%s-%d-%d", parentSnapshot, rootPair.UID, rootPair.GID)
|
|
| 21 |
- remappedID := usernsID + remapSuffix |
|
| 18 |
+const ( |
|
| 19 |
+ // Values based on linux/include/uapi/linux/capability.h |
|
| 20 |
+ xattrCapsSz2 = 20 |
|
| 21 |
+ versionOffset = 3 |
|
| 22 |
+ vfsCapRevision2 = 2 |
|
| 23 |
+ vfsCapRevision3 = 3 |
|
| 24 |
+ remapSuffix = "-remap" |
|
| 25 |
+) |
|
| 22 | 26 |
|
| 23 |
- // If the remapped snapshot already exist we only need to prepare the new snapshot |
|
| 24 |
- if _, err := snapshotter.Stat(ctx, usernsID); err == nil {
|
|
| 25 |
- _, err = snapshotter.Prepare(ctx, id, usernsID) |
|
| 27 |
+func (i *ImageService) remapSnapshot(ctx context.Context, snapshotter snapshots.Snapshotter, id string, parentSnapshot string) error {
|
|
| 28 |
+ _, err := snapshotter.Prepare(ctx, id, parentSnapshot) |
|
| 29 |
+ if err != nil {
|
|
| 26 | 30 |
return err |
| 27 | 31 |
} |
| 28 |
- |
|
| 29 |
- mounts, err := snapshotter.Prepare(ctx, remappedID, parentSnapshot) |
|
| 32 |
+ mounts, err := snapshotter.Mounts(ctx, id) |
|
| 30 | 33 |
if err != nil {
|
| 31 | 34 |
return err |
| 32 | 35 |
} |
| 33 | 36 |
|
| 34 | 37 |
if err := i.remapRootFS(ctx, mounts); err != nil {
|
| 35 |
- if rmErr := snapshotter.Remove(ctx, usernsID); rmErr != nil {
|
|
| 36 |
- log.G(ctx).WithError(rmErr).Warn("failed to remove snapshot after remap error")
|
|
| 37 |
- } |
|
| 38 | 38 |
return err |
| 39 | 39 |
} |
| 40 | 40 |
|
| 41 |
- if err := snapshotter.Commit(ctx, usernsID, remappedID); err != nil {
|
|
| 42 |
- return err |
|
| 43 |
- } |
|
| 44 |
- |
|
| 45 |
- _, err = snapshotter.Prepare(ctx, id, usernsID) |
|
| 46 | 41 |
return err |
| 47 | 42 |
} |
| 48 | 43 |
|
| ... | ... |
@@ -63,7 +58,7 @@ func (i *ImageService) remapRootFS(ctx context.Context, mounts []mount.Mount) er |
| 63 | 63 |
return err |
| 64 | 64 |
} |
| 65 | 65 |
|
| 66 |
- return os.Lchown(path, ids.UID, ids.GID) |
|
| 66 |
+ return chownWithCaps(path, ids.UID, ids.GID) |
|
| 67 | 67 |
}) |
| 68 | 68 |
}) |
| 69 | 69 |
} |
| ... | ... |
@@ -85,7 +80,48 @@ func (i *ImageService) unremapRootFS(ctx context.Context, mounts []mount.Mount) |
| 85 | 85 |
return err |
| 86 | 86 |
} |
| 87 | 87 |
|
| 88 |
- return os.Lchown(path, uid, gid) |
|
| 88 |
+ return chownWithCaps(path, uid, gid) |
|
| 89 | 89 |
}) |
| 90 | 90 |
}) |
| 91 | 91 |
} |
| 92 |
+ |
|
| 93 |
+// chownWithCaps will chown path and preserve the extended attributes. |
|
| 94 |
+// chowning a file will remove the capabilities, so we need to first get all of |
|
| 95 |
+// them, chown the file, and then set the extended attributes |
|
| 96 |
+func chownWithCaps(path string, uid int, gid int) error {
|
|
| 97 |
+ xattrKeys, err := sysx.LListxattr(path) |
|
| 98 |
+ if err != nil {
|
|
| 99 |
+ return err |
|
| 100 |
+ } |
|
| 101 |
+ |
|
| 102 |
+ xattrs := make(map[string][]byte, len(xattrKeys)) |
|
| 103 |
+ |
|
| 104 |
+ for _, xattr := range xattrKeys {
|
|
| 105 |
+ data, err := sysx.LGetxattr(path, xattr) |
|
| 106 |
+ if err != nil {
|
|
| 107 |
+ return err |
|
| 108 |
+ } |
|
| 109 |
+ xattrs[xattr] = data |
|
| 110 |
+ } |
|
| 111 |
+ |
|
| 112 |
+ if err := os.Lchown(path, uid, gid); err != nil {
|
|
| 113 |
+ return err |
|
| 114 |
+ } |
|
| 115 |
+ |
|
| 116 |
+ for xattrKey, xattrValue := range xattrs {
|
|
| 117 |
+ length := len(xattrValue) |
|
| 118 |
+ // make sure the capabilities are version 2, |
|
| 119 |
+ // capabilities version 3 also store the root uid of the namespace, |
|
| 120 |
+ // we don't want this when we are in userns-remap mode |
|
| 121 |
+ // see: https://github.com/moby/moby/pull/41724 |
|
| 122 |
+ if xattrKey == "security.capability" && xattrValue[versionOffset] == vfsCapRevision3 {
|
|
| 123 |
+ xattrValue[versionOffset] = vfsCapRevision2 |
|
| 124 |
+ length = xattrCapsSz2 |
|
| 125 |
+ } |
|
| 126 |
+ if err := sysx.LSetxattr(path, xattrKey, xattrValue[:length], 0); err != nil {
|
|
| 127 |
+ return err |
|
| 128 |
+ } |
|
| 129 |
+ } |
|
| 130 |
+ |
|
| 131 |
+ return nil |
|
| 132 |
+} |