Browse code

Merge pull request #26564 from miminar/prevent-blob-uploads

Avoid unnecessary blob uploads

Sebastiaan van Stijn authored on 2016/10/01 19:53:10
Showing 4 changed files
... ...
@@ -1,52 +1,130 @@
1 1
 package metadata
2 2
 
3 3
 import (
4
+	"crypto/hmac"
5
+	"crypto/sha256"
6
+	"encoding/hex"
4 7
 	"encoding/json"
5 8
 
6 9
 	"github.com/docker/distribution/digest"
10
+	"github.com/docker/docker/api/types"
7 11
 	"github.com/docker/docker/layer"
8 12
 )
9 13
 
10 14
 // V2MetadataService maps layer IDs to a set of known metadata for
11 15
 // the layer.
12
-type V2MetadataService struct {
16
+type V2MetadataService interface {
17
+	GetMetadata(diffID layer.DiffID) ([]V2Metadata, error)
18
+	GetDiffID(dgst digest.Digest) (layer.DiffID, error)
19
+	Add(diffID layer.DiffID, metadata V2Metadata) error
20
+	TagAndAdd(diffID layer.DiffID, hmacKey []byte, metadata V2Metadata) error
21
+	Remove(metadata V2Metadata) error
22
+}
23
+
24
+// v2MetadataService implements V2MetadataService
25
+type v2MetadataService struct {
13 26
 	store Store
14 27
 }
15 28
 
29
+var _ V2MetadataService = &v2MetadataService{}
30
+
16 31
 // V2Metadata contains the digest and source repository information for a layer.
17 32
 type V2Metadata struct {
18 33
 	Digest           digest.Digest
19 34
 	SourceRepository string
35
+	// HMAC hashes above attributes with recent authconfig digest used as a key in order to determine matching
36
+	// metadata entries accompanied by the same credentials without actually exposing them.
37
+	HMAC string
38
+}
39
+
40
+// CheckV2MetadataHMAC return true if the given "meta" is tagged with a hmac hashed by the given "key".
41
+func CheckV2MetadataHMAC(meta *V2Metadata, key []byte) bool {
42
+	if len(meta.HMAC) == 0 || len(key) == 0 {
43
+		return len(meta.HMAC) == 0 && len(key) == 0
44
+	}
45
+	mac := hmac.New(sha256.New, key)
46
+	mac.Write([]byte(meta.Digest))
47
+	mac.Write([]byte(meta.SourceRepository))
48
+	expectedMac := mac.Sum(nil)
49
+
50
+	storedMac, err := hex.DecodeString(meta.HMAC)
51
+	if err != nil {
52
+		return false
53
+	}
54
+
55
+	return hmac.Equal(storedMac, expectedMac)
56
+}
57
+
58
+// ComputeV2MetadataHMAC returns a hmac for the given "meta" hash by the given key.
59
+func ComputeV2MetadataHMAC(key []byte, meta *V2Metadata) string {
60
+	if len(key) == 0 || meta == nil {
61
+		return ""
62
+	}
63
+	mac := hmac.New(sha256.New, key)
64
+	mac.Write([]byte(meta.Digest))
65
+	mac.Write([]byte(meta.SourceRepository))
66
+	return hex.EncodeToString(mac.Sum(nil))
67
+}
68
+
69
+// ComputeV2MetadataHMACKey returns a key for the given "authConfig" that can be used to hash v2 metadata
70
+// entries.
71
+func ComputeV2MetadataHMACKey(authConfig *types.AuthConfig) ([]byte, error) {
72
+	if authConfig == nil {
73
+		return nil, nil
74
+	}
75
+	key := authConfigKeyInput{
76
+		Username:      authConfig.Username,
77
+		Password:      authConfig.Password,
78
+		Auth:          authConfig.Auth,
79
+		IdentityToken: authConfig.IdentityToken,
80
+		RegistryToken: authConfig.RegistryToken,
81
+	}
82
+	buf, err := json.Marshal(&key)
83
+	if err != nil {
84
+		return nil, err
85
+	}
86
+	return []byte(digest.FromBytes([]byte(buf))), nil
87
+}
88
+
89
+// authConfigKeyInput is a reduced AuthConfig structure holding just relevant credential data eligible for
90
+// hmac key creation.
91
+type authConfigKeyInput struct {
92
+	Username string `json:"username,omitempty"`
93
+	Password string `json:"password,omitempty"`
94
+	Auth     string `json:"auth,omitempty"`
95
+
96
+	IdentityToken string `json:"identitytoken,omitempty"`
97
+	RegistryToken string `json:"registrytoken,omitempty"`
20 98
 }
21 99
 
22 100
 // maxMetadata is the number of metadata entries to keep per layer DiffID.
23 101
 const maxMetadata = 50
24 102
 
25 103
 // NewV2MetadataService creates a new diff ID to v2 metadata mapping service.
26
-func NewV2MetadataService(store Store) *V2MetadataService {
27
-	return &V2MetadataService{
104
+func NewV2MetadataService(store Store) V2MetadataService {
105
+	return &v2MetadataService{
28 106
 		store: store,
29 107
 	}
30 108
 }
31 109
 
32
-func (serv *V2MetadataService) diffIDNamespace() string {
110
+func (serv *v2MetadataService) diffIDNamespace() string {
33 111
 	return "v2metadata-by-diffid"
34 112
 }
35 113
 
36
-func (serv *V2MetadataService) digestNamespace() string {
114
+func (serv *v2MetadataService) digestNamespace() string {
37 115
 	return "diffid-by-digest"
38 116
 }
39 117
 
40
-func (serv *V2MetadataService) diffIDKey(diffID layer.DiffID) string {
118
+func (serv *v2MetadataService) diffIDKey(diffID layer.DiffID) string {
41 119
 	return string(digest.Digest(diffID).Algorithm()) + "/" + digest.Digest(diffID).Hex()
42 120
 }
43 121
 
44
-func (serv *V2MetadataService) digestKey(dgst digest.Digest) string {
122
+func (serv *v2MetadataService) digestKey(dgst digest.Digest) string {
45 123
 	return string(dgst.Algorithm()) + "/" + dgst.Hex()
46 124
 }
47 125
 
48 126
 // GetMetadata finds the metadata associated with a layer DiffID.
49
-func (serv *V2MetadataService) GetMetadata(diffID layer.DiffID) ([]V2Metadata, error) {
127
+func (serv *v2MetadataService) GetMetadata(diffID layer.DiffID) ([]V2Metadata, error) {
50 128
 	jsonBytes, err := serv.store.Get(serv.diffIDNamespace(), serv.diffIDKey(diffID))
51 129
 	if err != nil {
52 130
 		return nil, err
... ...
@@ -61,7 +139,7 @@ func (serv *V2MetadataService) GetMetadata(diffID layer.DiffID) ([]V2Metadata, e
61 61
 }
62 62
 
63 63
 // GetDiffID finds a layer DiffID from a digest.
64
-func (serv *V2MetadataService) GetDiffID(dgst digest.Digest) (layer.DiffID, error) {
64
+func (serv *v2MetadataService) GetDiffID(dgst digest.Digest) (layer.DiffID, error) {
65 65
 	diffIDBytes, err := serv.store.Get(serv.digestNamespace(), serv.digestKey(dgst))
66 66
 	if err != nil {
67 67
 		return layer.DiffID(""), err
... ...
@@ -72,7 +150,7 @@ func (serv *V2MetadataService) GetDiffID(dgst digest.Digest) (layer.DiffID, erro
72 72
 
73 73
 // Add associates metadata with a layer DiffID. If too many metadata entries are
74 74
 // present, the oldest one is dropped.
75
-func (serv *V2MetadataService) Add(diffID layer.DiffID, metadata V2Metadata) error {
75
+func (serv *v2MetadataService) Add(diffID layer.DiffID, metadata V2Metadata) error {
76 76
 	oldMetadata, err := serv.GetMetadata(diffID)
77 77
 	if err != nil {
78 78
 		oldMetadata = nil
... ...
@@ -105,8 +183,15 @@ func (serv *V2MetadataService) Add(diffID layer.DiffID, metadata V2Metadata) err
105 105
 	return serv.store.Set(serv.digestNamespace(), serv.digestKey(metadata.Digest), []byte(diffID))
106 106
 }
107 107
 
108
+// TagAndAdd amends the given "meta" for hmac hashed by the given "hmacKey" and associates it with a layer
109
+// DiffID. If too many metadata entries are present, the oldest one is dropped.
110
+func (serv *v2MetadataService) TagAndAdd(diffID layer.DiffID, hmacKey []byte, meta V2Metadata) error {
111
+	meta.HMAC = ComputeV2MetadataHMAC(hmacKey, &meta)
112
+	return serv.Add(diffID, meta)
113
+}
114
+
108 115
 // Remove unassociates a metadata entry from a layer DiffID.
109
-func (serv *V2MetadataService) Remove(metadata V2Metadata) error {
116
+func (serv *v2MetadataService) Remove(metadata V2Metadata) error {
110 117
 	diffID, err := serv.GetDiffID(metadata.Digest)
111 118
 	if err != nil {
112 119
 		return err
... ...
@@ -50,7 +50,7 @@ func (e ImageConfigPullError) Error() string {
50 50
 }
51 51
 
52 52
 type v2Puller struct {
53
-	V2MetadataService *metadata.V2MetadataService
53
+	V2MetadataService metadata.V2MetadataService
54 54
 	endpoint          registry.APIEndpoint
55 55
 	config            *ImagePullConfig
56 56
 	repoInfo          *registry.RepositoryInfo
... ...
@@ -134,7 +134,7 @@ type v2LayerDescriptor struct {
134 134
 	digest            digest.Digest
135 135
 	repoInfo          *registry.RepositoryInfo
136 136
 	repo              distribution.Repository
137
-	V2MetadataService *metadata.V2MetadataService
137
+	V2MetadataService metadata.V2MetadataService
138 138
 	tmpFile           *os.File
139 139
 	verifier          digest.Verifier
140 140
 	src               distribution.Descriptor
... ...
@@ -5,8 +5,12 @@ import (
5 5
 	"fmt"
6 6
 	"io"
7 7
 	"runtime"
8
+	"sort"
9
+	"strings"
8 10
 	"sync"
9 11
 
12
+	"golang.org/x/net/context"
13
+
10 14
 	"github.com/Sirupsen/logrus"
11 15
 	"github.com/docker/distribution"
12 16
 	"github.com/docker/distribution/digest"
... ...
@@ -23,7 +27,11 @@ import (
23 23
 	"github.com/docker/docker/pkg/stringid"
24 24
 	"github.com/docker/docker/reference"
25 25
 	"github.com/docker/docker/registry"
26
-	"golang.org/x/net/context"
26
+)
27
+
28
+const (
29
+	smallLayerMaximumSize  = 100 * (1 << 10) // 100KB
30
+	middleLayerMaximumSize = 10 * (1 << 20)  // 10MB
27 31
 )
28 32
 
29 33
 // PushResult contains the tag, manifest digest, and manifest size from the
... ...
@@ -36,7 +44,7 @@ type PushResult struct {
36 36
 }
37 37
 
38 38
 type v2Pusher struct {
39
-	v2MetadataService *metadata.V2MetadataService
39
+	v2MetadataService metadata.V2MetadataService
40 40
 	ref               reference.Named
41 41
 	endpoint          registry.APIEndpoint
42 42
 	repoInfo          *registry.RepositoryInfo
... ...
@@ -133,10 +141,16 @@ func (p *v2Pusher) pushV2Tag(ctx context.Context, ref reference.NamedTagged, id
133 133
 		defer layer.ReleaseAndLog(p.config.LayerStore, l)
134 134
 	}
135 135
 
136
+	hmacKey, err := metadata.ComputeV2MetadataHMACKey(p.config.AuthConfig)
137
+	if err != nil {
138
+		return fmt.Errorf("failed to compute hmac key of auth config: %v", err)
139
+	}
140
+
136 141
 	var descriptors []xfer.UploadDescriptor
137 142
 
138 143
 	descriptorTemplate := v2PushDescriptor{
139 144
 		v2MetadataService: p.v2MetadataService,
145
+		hmacKey:           hmacKey,
140 146
 		repoInfo:          p.repoInfo,
141 147
 		ref:               p.ref,
142 148
 		repo:              p.repo,
... ...
@@ -147,6 +161,7 @@ func (p *v2Pusher) pushV2Tag(ctx context.Context, ref reference.NamedTagged, id
147 147
 	for i := 0; i < len(img.RootFS.DiffIDs); i++ {
148 148
 		descriptor := descriptorTemplate
149 149
 		descriptor.layer = l
150
+		descriptor.checkedDigests = make(map[digest.Digest]struct{})
150 151
 		descriptors = append(descriptors, &descriptor)
151 152
 
152 153
 		l = l.Parent()
... ...
@@ -232,12 +247,15 @@ func manifestFromBuilder(ctx context.Context, builder distribution.ManifestBuild
232 232
 
233 233
 type v2PushDescriptor struct {
234 234
 	layer             layer.Layer
235
-	v2MetadataService *metadata.V2MetadataService
235
+	v2MetadataService metadata.V2MetadataService
236
+	hmacKey           []byte
236 237
 	repoInfo          reference.Named
237 238
 	ref               reference.Named
238 239
 	repo              distribution.Repository
239 240
 	pushState         *pushState
240 241
 	remoteDescriptor  distribution.Descriptor
242
+	// a set of digests whose presence has been checked in a target repository
243
+	checkedDigests map[digest.Digest]struct{}
241 244
 }
242 245
 
243 246
 func (pd *v2PushDescriptor) Key() string {
... ...
@@ -272,71 +290,61 @@ func (pd *v2PushDescriptor) Upload(ctx context.Context, progressOutput progress.
272 272
 	}
273 273
 	pd.pushState.Unlock()
274 274
 
275
+	maxMountAttempts, maxExistenceChecks, checkOtherRepositories := getMaxMountAndExistenceCheckAttempts(pd.layer)
276
+
275 277
 	// Do we have any metadata associated with this layer's DiffID?
276 278
 	v2Metadata, err := pd.v2MetadataService.GetMetadata(diffID)
277 279
 	if err == nil {
278
-		descriptor, exists, err := layerAlreadyExists(ctx, v2Metadata, pd.repoInfo, pd.repo, pd.pushState)
279
-		if err != nil {
280
-			progress.Update(progressOutput, pd.ID(), "Image push failed")
281
-			return distribution.Descriptor{}, retryOnError(err)
282
-		}
283
-		if exists {
284
-			progress.Update(progressOutput, pd.ID(), "Layer already exists")
285
-			pd.pushState.Lock()
286
-			pd.pushState.remoteLayers[diffID] = descriptor
287
-			pd.pushState.Unlock()
288
-			return descriptor, nil
280
+		// check for blob existence in the target repository if we have a mapping with it
281
+		descriptor, exists, err := pd.layerAlreadyExists(ctx, progressOutput, diffID, false, 1, v2Metadata)
282
+		if exists || err != nil {
283
+			return descriptor, err
289 284
 		}
290 285
 	}
291 286
 
292
-	logrus.Debugf("Pushing layer: %s", diffID)
293
-
294 287
 	// if digest was empty or not saved, or if blob does not exist on the remote repository,
295 288
 	// then push the blob.
296 289
 	bs := pd.repo.Blobs(ctx)
297 290
 
298 291
 	var layerUpload distribution.BlobWriter
299
-	mountAttemptsRemaining := 3
300
-
301
-	// Attempt to find another repository in the same registry to mount the layer
302
-	// from to avoid an unnecessary upload.
303
-	// Note: metadata is stored from oldest to newest, so we iterate through this
304
-	// slice in reverse to maximize our chances of the blob still existing in the
305
-	// remote repository.
306
-	for i := len(v2Metadata) - 1; i >= 0 && mountAttemptsRemaining > 0; i-- {
307
-		mountFrom := v2Metadata[i]
308 292
 
309
-		sourceRepo, err := reference.ParseNamed(mountFrom.SourceRepository)
310
-		if err != nil {
311
-			continue
312
-		}
313
-		if pd.repoInfo.Hostname() != sourceRepo.Hostname() {
314
-			// don't mount blobs from another registry
315
-			continue
316
-		}
293
+	// Attempt to find another repository in the same registry to mount the layer from to avoid an unnecessary upload
294
+	candidates := getRepositoryMountCandidates(pd.repoInfo, pd.hmacKey, maxMountAttempts, v2Metadata)
295
+	for _, mountCandidate := range candidates {
296
+		logrus.Debugf("attempting to mount layer %s (%s) from %s", diffID, mountCandidate.Digest, mountCandidate.SourceRepository)
297
+		createOpts := []distribution.BlobCreateOption{}
298
+
299
+		if len(mountCandidate.SourceRepository) > 0 {
300
+			namedRef, err := reference.WithName(mountCandidate.SourceRepository)
301
+			if err != nil {
302
+				logrus.Errorf("failed to parse source repository reference %v: %v", namedRef.String(), err)
303
+				pd.v2MetadataService.Remove(mountCandidate)
304
+				continue
305
+			}
317 306
 
318
-		namedRef, err := reference.WithName(mountFrom.SourceRepository)
319
-		if err != nil {
320
-			continue
321
-		}
307
+			// TODO (brianbland): We need to construct a reference where the Name is
308
+			// only the full remote name, so clean this up when distribution has a
309
+			// richer reference package
310
+			remoteRef, err := distreference.WithName(namedRef.RemoteName())
311
+			if err != nil {
312
+				logrus.Errorf("failed to make remote reference out of %q: %v", namedRef.RemoteName(), namedRef.RemoteName())
313
+				continue
314
+			}
322 315
 
323
-		// TODO (brianbland): We need to construct a reference where the Name is
324
-		// only the full remote name, so clean this up when distribution has a
325
-		// richer reference package
326
-		remoteRef, err := distreference.WithName(namedRef.RemoteName())
327
-		if err != nil {
328
-			continue
329
-		}
316
+			canonicalRef, err := distreference.WithDigest(remoteRef, mountCandidate.Digest)
317
+			if err != nil {
318
+				logrus.Errorf("failed to make canonical reference: %v", err)
319
+				continue
320
+			}
330 321
 
331
-		canonicalRef, err := distreference.WithDigest(remoteRef, mountFrom.Digest)
332
-		if err != nil {
333
-			continue
322
+			createOpts = append(createOpts, client.WithMountFrom(canonicalRef))
334 323
 		}
335 324
 
336
-		logrus.Debugf("attempting to mount layer %s (%s) from %s", diffID, mountFrom.Digest, sourceRepo.FullName())
337
-
338
-		layerUpload, err = bs.Create(ctx, client.WithMountFrom(canonicalRef))
325
+		// send the layer
326
+		lu, err := bs.Create(ctx, createOpts...)
339 327
 		switch err := err.(type) {
328
+		case nil:
329
+			// noop
340 330
 		case distribution.ErrBlobMounted:
341 331
 			progress.Updatef(progressOutput, pd.ID(), "Mounted from %s", err.From.Name())
342 332
 
... ...
@@ -348,21 +356,44 @@ func (pd *v2PushDescriptor) Upload(ctx context.Context, progressOutput progress.
348 348
 			pd.pushState.Unlock()
349 349
 
350 350
 			// Cache mapping from this layer's DiffID to the blobsum
351
-			if err := pd.v2MetadataService.Add(diffID, metadata.V2Metadata{Digest: mountFrom.Digest, SourceRepository: pd.repoInfo.FullName()}); err != nil {
351
+			if err := pd.v2MetadataService.TagAndAdd(diffID, pd.hmacKey, metadata.V2Metadata{
352
+				Digest:           err.Descriptor.Digest,
353
+				SourceRepository: pd.repoInfo.FullName(),
354
+			}); err != nil {
352 355
 				return distribution.Descriptor{}, xfer.DoNotRetry{Err: err}
353 356
 			}
354 357
 			return err.Descriptor, nil
355
-		case nil:
356
-			// blob upload session created successfully, so begin the upload
357
-			mountAttemptsRemaining = 0
358 358
 		default:
359
-			// unable to mount layer from this repository, so this source mapping is no longer valid
360
-			logrus.Debugf("unassociating layer %s (%s) with %s", diffID, mountFrom.Digest, mountFrom.SourceRepository)
361
-			pd.v2MetadataService.Remove(mountFrom)
362
-			mountAttemptsRemaining--
359
+			logrus.Infof("failed to mount layer %s (%s) from %s: %v", diffID, mountCandidate.Digest, mountCandidate.SourceRepository, err)
360
+		}
361
+
362
+		if len(mountCandidate.SourceRepository) > 0 &&
363
+			(metadata.CheckV2MetadataHMAC(&mountCandidate, pd.hmacKey) ||
364
+				len(mountCandidate.HMAC) == 0) {
365
+			cause := "blob mount failure"
366
+			if err != nil {
367
+				cause = fmt.Sprintf("an error: %v", err.Error())
368
+			}
369
+			logrus.Debugf("removing association between layer %s and %s due to %s", mountCandidate.Digest, mountCandidate.SourceRepository, cause)
370
+			pd.v2MetadataService.Remove(mountCandidate)
371
+		}
372
+
373
+		if lu != nil {
374
+			// cancel previous upload
375
+			cancelLayerUpload(ctx, mountCandidate.Digest, layerUpload)
376
+			layerUpload = lu
377
+		}
378
+	}
379
+
380
+	if maxExistenceChecks-len(pd.checkedDigests) > 0 {
381
+		// do additional layer existence checks with other known digests if any
382
+		descriptor, exists, err := pd.layerAlreadyExists(ctx, progressOutput, diffID, checkOtherRepositories, maxExistenceChecks-len(pd.checkedDigests), v2Metadata)
383
+		if exists || err != nil {
384
+			return descriptor, err
363 385
 		}
364 386
 	}
365 387
 
388
+	logrus.Debugf("Pushing layer: %s", diffID)
366 389
 	if layerUpload == nil {
367 390
 		layerUpload, err = bs.Create(ctx)
368 391
 		if err != nil {
... ...
@@ -371,6 +402,29 @@ func (pd *v2PushDescriptor) Upload(ctx context.Context, progressOutput progress.
371 371
 	}
372 372
 	defer layerUpload.Close()
373 373
 
374
+	// upload the blob
375
+	desc, err := pd.uploadUsingSession(ctx, progressOutput, diffID, layerUpload)
376
+	if err != nil {
377
+		return desc, err
378
+	}
379
+
380
+	return desc, nil
381
+}
382
+
383
+func (pd *v2PushDescriptor) SetRemoteDescriptor(descriptor distribution.Descriptor) {
384
+	pd.remoteDescriptor = descriptor
385
+}
386
+
387
+func (pd *v2PushDescriptor) Descriptor() distribution.Descriptor {
388
+	return pd.remoteDescriptor
389
+}
390
+
391
+func (pd *v2PushDescriptor) uploadUsingSession(
392
+	ctx context.Context,
393
+	progressOutput progress.Output,
394
+	diffID layer.DiffID,
395
+	layerUpload distribution.BlobWriter,
396
+) (distribution.Descriptor, error) {
374 397
 	arch, err := pd.layer.TarStream()
375 398
 	if err != nil {
376 399
 		return distribution.Descriptor{}, xfer.DoNotRetry{Err: err}
... ...
@@ -404,55 +458,237 @@ func (pd *v2PushDescriptor) Upload(ctx context.Context, progressOutput progress.
404 404
 	progress.Update(progressOutput, pd.ID(), "Pushed")
405 405
 
406 406
 	// Cache mapping from this layer's DiffID to the blobsum
407
-	if err := pd.v2MetadataService.Add(diffID, metadata.V2Metadata{Digest: pushDigest, SourceRepository: pd.repoInfo.FullName()}); err != nil {
407
+	if err := pd.v2MetadataService.TagAndAdd(diffID, pd.hmacKey, metadata.V2Metadata{
408
+		Digest:           pushDigest,
409
+		SourceRepository: pd.repoInfo.FullName(),
410
+	}); err != nil {
408 411
 		return distribution.Descriptor{}, xfer.DoNotRetry{Err: err}
409 412
 	}
410 413
 
411
-	pd.pushState.Lock()
412
-
413
-	// If Commit succeeded, that's an indication that the remote registry
414
-	// speaks the v2 protocol.
415
-	pd.pushState.confirmedV2 = true
416
-
417
-	descriptor := distribution.Descriptor{
414
+	desc := distribution.Descriptor{
418 415
 		Digest:    pushDigest,
419 416
 		MediaType: schema2.MediaTypeLayer,
420 417
 		Size:      nn,
421 418
 	}
422
-	pd.pushState.remoteLayers[diffID] = descriptor
423 419
 
420
+	pd.pushState.Lock()
421
+	// If Commit succeeded, that's an indication that the remote registry speaks the v2 protocol.
422
+	pd.pushState.confirmedV2 = true
423
+	pd.pushState.remoteLayers[diffID] = desc
424 424
 	pd.pushState.Unlock()
425 425
 
426
-	return descriptor, nil
427
-}
428
-
429
-func (pd *v2PushDescriptor) SetRemoteDescriptor(descriptor distribution.Descriptor) {
430
-	pd.remoteDescriptor = descriptor
431
-}
432
-
433
-func (pd *v2PushDescriptor) Descriptor() distribution.Descriptor {
434
-	return pd.remoteDescriptor
426
+	return desc, nil
435 427
 }
436 428
 
437
-// layerAlreadyExists checks if the registry already know about any of the
438
-// metadata passed in the "metadata" slice. If it finds one that the registry
439
-// knows about, it returns the known digest and "true".
440
-func layerAlreadyExists(ctx context.Context, metadata []metadata.V2Metadata, repoInfo reference.Named, repo distribution.Repository, pushState *pushState) (distribution.Descriptor, bool, error) {
441
-	for _, meta := range metadata {
442
-		// Only check blobsums that are known to this repository or have an unknown source
443
-		if meta.SourceRepository != "" && meta.SourceRepository != repoInfo.FullName() {
429
+// layerAlreadyExists checks if the registry already knows about any of the metadata passed in the "metadata"
430
+// slice. If it finds one that the registry knows about, it returns the known digest and "true". If
431
+// "checkOtherRepositories" is true, stat will be performed also with digests mapped to any other repository
432
+// (not just the target one).
433
+func (pd *v2PushDescriptor) layerAlreadyExists(
434
+	ctx context.Context,
435
+	progressOutput progress.Output,
436
+	diffID layer.DiffID,
437
+	checkOtherRepositories bool,
438
+	maxExistenceCheckAttempts int,
439
+	v2Metadata []metadata.V2Metadata,
440
+) (desc distribution.Descriptor, exists bool, err error) {
441
+	// filter the metadata
442
+	candidates := []metadata.V2Metadata{}
443
+	for _, meta := range v2Metadata {
444
+		if len(meta.SourceRepository) > 0 && !checkOtherRepositories && meta.SourceRepository != pd.repoInfo.FullName() {
445
+			continue
446
+		}
447
+		candidates = append(candidates, meta)
448
+	}
449
+	// sort the candidates by similarity
450
+	sortV2MetadataByLikenessAndAge(pd.repoInfo, pd.hmacKey, candidates)
451
+
452
+	digestToMetadata := make(map[digest.Digest]*metadata.V2Metadata)
453
+	// an array of unique blob digests ordered from the best mount candidates to worst
454
+	layerDigests := []digest.Digest{}
455
+	for i := 0; i < len(candidates); i++ {
456
+		if len(layerDigests) >= maxExistenceCheckAttempts {
457
+			break
458
+		}
459
+		meta := &candidates[i]
460
+		if _, exists := digestToMetadata[meta.Digest]; exists {
461
+			// keep reference just to the first mapping (the best mount candidate)
444 462
 			continue
445 463
 		}
446
-		descriptor, err := repo.Blobs(ctx).Stat(ctx, meta.Digest)
464
+		if _, exists := pd.checkedDigests[meta.Digest]; exists {
465
+			// existence of this digest has already been tested
466
+			continue
467
+		}
468
+		digestToMetadata[meta.Digest] = meta
469
+		layerDigests = append(layerDigests, meta.Digest)
470
+	}
471
+
472
+	for _, dgst := range layerDigests {
473
+		meta := digestToMetadata[dgst]
474
+		logrus.Debugf("Checking for presence of layer %s (%s) in %s", diffID, dgst, pd.repoInfo.FullName())
475
+		desc, err = pd.repo.Blobs(ctx).Stat(ctx, dgst)
476
+		pd.checkedDigests[meta.Digest] = struct{}{}
447 477
 		switch err {
448 478
 		case nil:
449
-			descriptor.MediaType = schema2.MediaTypeLayer
450
-			return descriptor, true, nil
479
+			if m, ok := digestToMetadata[desc.Digest]; !ok || m.SourceRepository != pd.repoInfo.FullName() || !metadata.CheckV2MetadataHMAC(m, pd.hmacKey) {
480
+				// cache mapping from this layer's DiffID to the blobsum
481
+				if err := pd.v2MetadataService.TagAndAdd(diffID, pd.hmacKey, metadata.V2Metadata{
482
+					Digest:           desc.Digest,
483
+					SourceRepository: pd.repoInfo.FullName(),
484
+				}); err != nil {
485
+					return distribution.Descriptor{}, false, xfer.DoNotRetry{Err: err}
486
+				}
487
+			}
488
+			desc.MediaType = schema2.MediaTypeLayer
489
+			exists = true
490
+			break
451 491
 		case distribution.ErrBlobUnknown:
452
-			// nop
492
+			if meta.SourceRepository == pd.repoInfo.FullName() {
493
+				// remove the mapping to the target repository
494
+				pd.v2MetadataService.Remove(*meta)
495
+			}
453 496
 		default:
454
-			return distribution.Descriptor{}, false, err
497
+			progress.Update(progressOutput, pd.ID(), "Image push failed")
498
+			return desc, false, retryOnError(err)
499
+		}
500
+	}
501
+
502
+	if exists {
503
+		progress.Update(progressOutput, pd.ID(), "Layer already exists")
504
+		pd.pushState.Lock()
505
+		pd.pushState.remoteLayers[diffID] = desc
506
+		pd.pushState.Unlock()
507
+	}
508
+
509
+	return desc, exists, nil
510
+}
511
+
512
+// getMaxMountAndExistenceCheckAttempts returns a maximum number of cross repository mount attempts from
513
+// source repositories of target registry, maximum number of layer existence checks performed on the target
514
+// repository and whether the check shall be done also with digests mapped to different repositories. The
515
+// decision is based on layer size. The smaller the layer, the fewer attempts shall be made because the cost
516
+// of upload does not outweigh a latency.
517
+func getMaxMountAndExistenceCheckAttempts(layer layer.Layer) (maxMountAttempts, maxExistenceCheckAttempts int, checkOtherRepositories bool) {
518
+	size, err := layer.DiffSize()
519
+	switch {
520
+	// big blob
521
+	case size > middleLayerMaximumSize:
522
+		// 1st attempt to mount the blob few times
523
+		// 2nd few existence checks with digests associated to any repository
524
+		// then fallback to upload
525
+		return 4, 3, true
526
+
527
+	// middle sized blobs; if we could not get the size, assume we deal with middle sized blob
528
+	case size > smallLayerMaximumSize, err != nil:
529
+		// 1st attempt to mount blobs of average size few times
530
+		// 2nd try at most 1 existence check if there's an existing mapping to the target repository
531
+		// then fallback to upload
532
+		return 3, 1, false
533
+
534
+	// small blobs, do a minimum number of checks
535
+	default:
536
+		return 1, 1, false
537
+	}
538
+}
539
+
540
+// getRepositoryMountCandidates returns an array of v2 metadata items belonging to the given registry. The
541
+// array is sorted from youngest to oldest. If requireReigstryMatch is true, the resulting array will contain
542
+// only metadata entries having registry part of SourceRepository matching the part of repoInfo.
543
+func getRepositoryMountCandidates(
544
+	repoInfo reference.Named,
545
+	hmacKey []byte,
546
+	max int,
547
+	v2Metadata []metadata.V2Metadata,
548
+) []metadata.V2Metadata {
549
+	candidates := []metadata.V2Metadata{}
550
+	for _, meta := range v2Metadata {
551
+		sourceRepo, err := reference.ParseNamed(meta.SourceRepository)
552
+		if err != nil || repoInfo.Hostname() != sourceRepo.Hostname() {
553
+			continue
554
+		}
555
+		// target repository is not a viable candidate
556
+		if meta.SourceRepository == repoInfo.FullName() {
557
+			continue
558
+		}
559
+		candidates = append(candidates, meta)
560
+	}
561
+
562
+	sortV2MetadataByLikenessAndAge(repoInfo, hmacKey, candidates)
563
+	if max >= 0 && len(candidates) > max {
564
+		// select the youngest metadata
565
+		candidates = candidates[:max]
566
+	}
567
+
568
+	return candidates
569
+}
570
+
571
+// byLikeness is a sorting container for v2 metadata candidates for cross repository mount. The
572
+// candidate "a" is preferred over "b":
573
+//
574
+//  1. if it was hashed using the same AuthConfig as the one used to authenticate to target repository and the
575
+//     "b" was not
576
+//  2. if a number of its repository path components exactly matching path components of target repository is higher
577
+type byLikeness struct {
578
+	arr            []metadata.V2Metadata
579
+	hmacKey        []byte
580
+	pathComponents []string
581
+}
582
+
583
+func (bla byLikeness) Less(i, j int) bool {
584
+	aMacMatch := metadata.CheckV2MetadataHMAC(&bla.arr[i], bla.hmacKey)
585
+	bMacMatch := metadata.CheckV2MetadataHMAC(&bla.arr[j], bla.hmacKey)
586
+	if aMacMatch != bMacMatch {
587
+		return aMacMatch
588
+	}
589
+	aMatch := numOfMatchingPathComponents(bla.arr[i].SourceRepository, bla.pathComponents)
590
+	bMatch := numOfMatchingPathComponents(bla.arr[j].SourceRepository, bla.pathComponents)
591
+	return aMatch > bMatch
592
+}
593
+func (bla byLikeness) Swap(i, j int) {
594
+	bla.arr[i], bla.arr[j] = bla.arr[j], bla.arr[i]
595
+}
596
+func (bla byLikeness) Len() int { return len(bla.arr) }
597
+
598
+func sortV2MetadataByLikenessAndAge(repoInfo reference.Named, hmacKey []byte, marr []metadata.V2Metadata) {
599
+	// reverse the metadata array to shift the newest entries to the beginning
600
+	for i := 0; i < len(marr)/2; i++ {
601
+		marr[i], marr[len(marr)-i-1] = marr[len(marr)-i-1], marr[i]
602
+	}
603
+	// keep equal entries ordered from the youngest to the oldest
604
+	sort.Stable(byLikeness{
605
+		arr:            marr,
606
+		hmacKey:        hmacKey,
607
+		pathComponents: getPathComponents(repoInfo.FullName()),
608
+	})
609
+}
610
+
611
+// numOfMatchingPathComponents returns a number of path components in "pth" that exactly match "matchComponents".
612
+func numOfMatchingPathComponents(pth string, matchComponents []string) int {
613
+	pthComponents := getPathComponents(pth)
614
+	i := 0
615
+	for ; i < len(pthComponents) && i < len(matchComponents); i++ {
616
+		if matchComponents[i] != pthComponents[i] {
617
+			return i
618
+		}
619
+	}
620
+	return i
621
+}
622
+
623
+func getPathComponents(path string) []string {
624
+	// make sure to add docker.io/ prefix to the path
625
+	named, err := reference.ParseNamed(path)
626
+	if err == nil {
627
+		path = named.FullName()
628
+	}
629
+	return strings.Split(path, "/")
630
+}
631
+
632
+func cancelLayerUpload(ctx context.Context, dgst digest.Digest, layerUpload distribution.BlobWriter) {
633
+	if layerUpload != nil {
634
+		logrus.Debugf("cancelling upload of blob %s", dgst)
635
+		err := layerUpload.Cancel(ctx)
636
+		if err != nil {
637
+			logrus.Warnf("failed to cancel upload: %v", err)
455 638
 		}
456 639
 	}
457
-	return distribution.Descriptor{}, false, nil
458 640
 }
459 641
new file mode 100644
... ...
@@ -0,0 +1,574 @@
0
+package distribution
1
+
2
+import (
3
+	"net/http"
4
+	"reflect"
5
+	"testing"
6
+
7
+	"github.com/docker/distribution"
8
+	"github.com/docker/distribution/context"
9
+	"github.com/docker/distribution/digest"
10
+	"github.com/docker/distribution/manifest/schema2"
11
+	distreference "github.com/docker/distribution/reference"
12
+	"github.com/docker/docker/distribution/metadata"
13
+	"github.com/docker/docker/layer"
14
+	"github.com/docker/docker/pkg/progress"
15
+	"github.com/docker/docker/reference"
16
+)
17
+
18
+func TestGetRepositoryMountCandidates(t *testing.T) {
19
+	for _, tc := range []struct {
20
+		name          string
21
+		hmacKey       string
22
+		targetRepo    string
23
+		maxCandidates int
24
+		metadata      []metadata.V2Metadata
25
+		candidates    []metadata.V2Metadata
26
+	}{
27
+		{
28
+			name:          "empty metadata",
29
+			targetRepo:    "busybox",
30
+			maxCandidates: -1,
31
+			metadata:      []metadata.V2Metadata{},
32
+			candidates:    []metadata.V2Metadata{},
33
+		},
34
+		{
35
+			name:          "one item not matching",
36
+			targetRepo:    "busybox",
37
+			maxCandidates: -1,
38
+			metadata:      []metadata.V2Metadata{taggedMetadata("key", "dgst", "127.0.0.1/repo")},
39
+			candidates:    []metadata.V2Metadata{},
40
+		},
41
+		{
42
+			name:          "one item matching",
43
+			targetRepo:    "busybox",
44
+			maxCandidates: -1,
45
+			metadata:      []metadata.V2Metadata{taggedMetadata("hash", "1", "hello-world")},
46
+			candidates:    []metadata.V2Metadata{taggedMetadata("hash", "1", "hello-world")},
47
+		},
48
+		{
49
+			name:          "allow missing SourceRepository",
50
+			targetRepo:    "busybox",
51
+			maxCandidates: -1,
52
+			metadata: []metadata.V2Metadata{
53
+				{Digest: digest.Digest("1")},
54
+				{Digest: digest.Digest("3")},
55
+				{Digest: digest.Digest("2")},
56
+			},
57
+			candidates: []metadata.V2Metadata{},
58
+		},
59
+		{
60
+			name:          "handle docker.io",
61
+			targetRepo:    "user/app",
62
+			maxCandidates: -1,
63
+			metadata: []metadata.V2Metadata{
64
+				{Digest: digest.Digest("1"), SourceRepository: "docker.io/user/foo"},
65
+				{Digest: digest.Digest("3"), SourceRepository: "user/bar"},
66
+				{Digest: digest.Digest("2"), SourceRepository: "app"},
67
+			},
68
+			candidates: []metadata.V2Metadata{
69
+				{Digest: digest.Digest("3"), SourceRepository: "user/bar"},
70
+				{Digest: digest.Digest("1"), SourceRepository: "docker.io/user/foo"},
71
+				{Digest: digest.Digest("2"), SourceRepository: "app"},
72
+			},
73
+		},
74
+		{
75
+			name:          "sort more items",
76
+			hmacKey:       "abcd",
77
+			targetRepo:    "127.0.0.1/foo/bar",
78
+			maxCandidates: -1,
79
+			metadata: []metadata.V2Metadata{
80
+				taggedMetadata("hash", "1", "hello-world"),
81
+				taggedMetadata("efgh", "2", "127.0.0.1/hello-world"),
82
+				taggedMetadata("abcd", "3", "busybox"),
83
+				taggedMetadata("hash", "4", "busybox"),
84
+				taggedMetadata("hash", "5", "127.0.0.1/foo"),
85
+				taggedMetadata("hash", "6", "127.0.0.1/bar"),
86
+				taggedMetadata("efgh", "7", "127.0.0.1/foo/bar"),
87
+				taggedMetadata("abcd", "8", "127.0.0.1/xyz"),
88
+				taggedMetadata("hash", "9", "127.0.0.1/foo/app"),
89
+			},
90
+			candidates: []metadata.V2Metadata{
91
+				// first by matching hash
92
+				taggedMetadata("abcd", "8", "127.0.0.1/xyz"),
93
+				// then by longest matching prefix
94
+				taggedMetadata("hash", "9", "127.0.0.1/foo/app"),
95
+				taggedMetadata("hash", "5", "127.0.0.1/foo"),
96
+				// sort the rest of the matching items in reversed order
97
+				taggedMetadata("hash", "6", "127.0.0.1/bar"),
98
+				taggedMetadata("efgh", "2", "127.0.0.1/hello-world"),
99
+			},
100
+		},
101
+		{
102
+			name:          "limit max candidates",
103
+			hmacKey:       "abcd",
104
+			targetRepo:    "user/app",
105
+			maxCandidates: 3,
106
+			metadata: []metadata.V2Metadata{
107
+				taggedMetadata("abcd", "1", "user/app1"),
108
+				taggedMetadata("abcd", "2", "user/app/base"),
109
+				taggedMetadata("hash", "3", "user/app"),
110
+				taggedMetadata("abcd", "4", "127.0.0.1/user/app"),
111
+				taggedMetadata("hash", "5", "user/foo"),
112
+				taggedMetadata("hash", "6", "app/bar"),
113
+			},
114
+			candidates: []metadata.V2Metadata{
115
+				// first by matching hash
116
+				taggedMetadata("abcd", "2", "user/app/base"),
117
+				taggedMetadata("abcd", "1", "user/app1"),
118
+				// then by longest matching prefix
119
+				taggedMetadata("hash", "3", "user/app"),
120
+			},
121
+		},
122
+	} {
123
+		repoInfo, err := reference.ParseNamed(tc.targetRepo)
124
+		if err != nil {
125
+			t.Fatalf("[%s] failed to parse reference name: %v", tc.name, err)
126
+		}
127
+		candidates := getRepositoryMountCandidates(repoInfo, []byte(tc.hmacKey), tc.maxCandidates, tc.metadata)
128
+		if len(candidates) != len(tc.candidates) {
129
+			t.Errorf("[%s] got unexpected number of candidates: %d != %d", tc.name, len(candidates), len(tc.candidates))
130
+		}
131
+		for i := 0; i < len(candidates) && i < len(tc.candidates); i++ {
132
+			if !reflect.DeepEqual(candidates[i], tc.candidates[i]) {
133
+				t.Errorf("[%s] candidate %d does not match expected: %#+v != %#+v", tc.name, i, candidates[i], tc.candidates[i])
134
+			}
135
+		}
136
+		for i := len(candidates); i < len(tc.candidates); i++ {
137
+			t.Errorf("[%s] missing expected candidate at position %d (%#+v)", tc.name, i, tc.candidates[i])
138
+		}
139
+		for i := len(tc.candidates); i < len(candidates); i++ {
140
+			t.Errorf("[%s] got unexpected candidate at position %d (%#+v)", tc.name, i, candidates[i])
141
+		}
142
+	}
143
+}
144
+
145
+func TestLayerAlreadyExists(t *testing.T) {
146
+	for _, tc := range []struct {
147
+		name                   string
148
+		metadata               []metadata.V2Metadata
149
+		targetRepo             string
150
+		hmacKey                string
151
+		maxExistenceChecks     int
152
+		checkOtherRepositories bool
153
+		remoteBlobs            map[digest.Digest]distribution.Descriptor
154
+		remoteErrors           map[digest.Digest]error
155
+		expectedDescriptor     distribution.Descriptor
156
+		expectedExists         bool
157
+		expectedError          error
158
+		expectedRequests       []string
159
+		expectedAdditions      []metadata.V2Metadata
160
+		expectedRemovals       []metadata.V2Metadata
161
+	}{
162
+		{
163
+			name:                   "empty metadata",
164
+			targetRepo:             "busybox",
165
+			maxExistenceChecks:     3,
166
+			checkOtherRepositories: true,
167
+		},
168
+		{
169
+			name:               "single not existent metadata",
170
+			targetRepo:         "busybox",
171
+			metadata:           []metadata.V2Metadata{{Digest: digest.Digest("pear"), SourceRepository: "docker.io/library/busybox"}},
172
+			maxExistenceChecks: 3,
173
+			expectedRequests:   []string{"pear"},
174
+			expectedRemovals:   []metadata.V2Metadata{{Digest: digest.Digest("pear"), SourceRepository: "docker.io/library/busybox"}},
175
+		},
176
+		{
177
+			name:               "access denied",
178
+			targetRepo:         "busybox",
179
+			maxExistenceChecks: 1,
180
+			metadata:           []metadata.V2Metadata{{Digest: digest.Digest("apple"), SourceRepository: "docker.io/library/busybox"}},
181
+			remoteErrors:       map[digest.Digest]error{digest.Digest("apple"): distribution.ErrAccessDenied},
182
+			expectedError:      distribution.ErrAccessDenied,
183
+			expectedRequests:   []string{"apple"},
184
+		},
185
+		{
186
+			name:               "not matching reposies",
187
+			targetRepo:         "busybox",
188
+			maxExistenceChecks: 3,
189
+			metadata: []metadata.V2Metadata{
190
+				{Digest: digest.Digest("apple"), SourceRepository: "docker.io/library/hello-world"},
191
+				{Digest: digest.Digest("orange"), SourceRepository: "docker.io/library/busybox/subapp"},
192
+				{Digest: digest.Digest("pear"), SourceRepository: "docker.io/busybox"},
193
+				{Digest: digest.Digest("plum"), SourceRepository: "busybox"},
194
+				{Digest: digest.Digest("banana"), SourceRepository: "127.0.0.1/busybox"},
195
+			},
196
+		},
197
+		{
198
+			name:                   "check other repositories",
199
+			targetRepo:             "busybox",
200
+			maxExistenceChecks:     10,
201
+			checkOtherRepositories: true,
202
+			metadata: []metadata.V2Metadata{
203
+				{Digest: digest.Digest("apple"), SourceRepository: "docker.io/library/hello-world"},
204
+				{Digest: digest.Digest("orange"), SourceRepository: "docker.io/library/busybox/subapp"},
205
+				{Digest: digest.Digest("pear"), SourceRepository: "docker.io/busybox"},
206
+				{Digest: digest.Digest("plum"), SourceRepository: "busybox"},
207
+				{Digest: digest.Digest("banana"), SourceRepository: "127.0.0.1/busybox"},
208
+			},
209
+			expectedRequests: []string{"plum", "pear", "apple", "orange", "banana"},
210
+		},
211
+		{
212
+			name:               "find existing blob",
213
+			targetRepo:         "busybox",
214
+			metadata:           []metadata.V2Metadata{{Digest: digest.Digest("apple"), SourceRepository: "docker.io/library/busybox"}},
215
+			maxExistenceChecks: 3,
216
+			remoteBlobs:        map[digest.Digest]distribution.Descriptor{digest.Digest("apple"): {Digest: digest.Digest("apple")}},
217
+			expectedDescriptor: distribution.Descriptor{Digest: digest.Digest("apple"), MediaType: schema2.MediaTypeLayer},
218
+			expectedExists:     true,
219
+			expectedRequests:   []string{"apple"},
220
+		},
221
+		{
222
+			name:               "find existing blob with different hmac",
223
+			targetRepo:         "busybox",
224
+			metadata:           []metadata.V2Metadata{{SourceRepository: "docker.io/library/busybox", Digest: digest.Digest("apple"), HMAC: "dummyhmac"}},
225
+			maxExistenceChecks: 3,
226
+			remoteBlobs:        map[digest.Digest]distribution.Descriptor{digest.Digest("apple"): {Digest: digest.Digest("apple")}},
227
+			expectedDescriptor: distribution.Descriptor{Digest: digest.Digest("apple"), MediaType: schema2.MediaTypeLayer},
228
+			expectedExists:     true,
229
+			expectedRequests:   []string{"apple"},
230
+			expectedAdditions:  []metadata.V2Metadata{{Digest: digest.Digest("apple"), SourceRepository: "docker.io/library/busybox"}},
231
+		},
232
+		{
233
+			name:               "overwrite media types",
234
+			targetRepo:         "busybox",
235
+			metadata:           []metadata.V2Metadata{{Digest: digest.Digest("apple"), SourceRepository: "docker.io/library/busybox"}},
236
+			hmacKey:            "key",
237
+			maxExistenceChecks: 3,
238
+			remoteBlobs:        map[digest.Digest]distribution.Descriptor{digest.Digest("apple"): {Digest: digest.Digest("apple"), MediaType: "custom-media-type"}},
239
+			expectedDescriptor: distribution.Descriptor{Digest: digest.Digest("apple"), MediaType: schema2.MediaTypeLayer},
240
+			expectedExists:     true,
241
+			expectedRequests:   []string{"apple"},
242
+			expectedAdditions:  []metadata.V2Metadata{taggedMetadata("key", "apple", "docker.io/library/busybox")},
243
+		},
244
+		{
245
+			name:       "find existing blob among many",
246
+			targetRepo: "127.0.0.1/myapp",
247
+			hmacKey:    "key",
248
+			metadata: []metadata.V2Metadata{
249
+				taggedMetadata("someotherkey", "pear", "127.0.0.1/myapp"),
250
+				taggedMetadata("key", "apple", "127.0.0.1/myapp"),
251
+				taggedMetadata("", "plum", "127.0.0.1/myapp"),
252
+			},
253
+			maxExistenceChecks: 3,
254
+			remoteBlobs:        map[digest.Digest]distribution.Descriptor{digest.Digest("pear"): {Digest: digest.Digest("pear")}},
255
+			expectedDescriptor: distribution.Descriptor{Digest: digest.Digest("pear"), MediaType: schema2.MediaTypeLayer},
256
+			expectedExists:     true,
257
+			expectedRequests:   []string{"apple", "plum", "pear"},
258
+			expectedAdditions:  []metadata.V2Metadata{taggedMetadata("key", "pear", "127.0.0.1/myapp")},
259
+			expectedRemovals: []metadata.V2Metadata{
260
+				taggedMetadata("key", "apple", "127.0.0.1/myapp"),
261
+				{Digest: digest.Digest("plum"), SourceRepository: "127.0.0.1/myapp"},
262
+			},
263
+		},
264
+		{
265
+			name:       "reach maximum existence checks",
266
+			targetRepo: "user/app",
267
+			metadata: []metadata.V2Metadata{
268
+				{Digest: digest.Digest("pear"), SourceRepository: "docker.io/user/app"},
269
+				{Digest: digest.Digest("apple"), SourceRepository: "docker.io/user/app"},
270
+				{Digest: digest.Digest("plum"), SourceRepository: "docker.io/user/app"},
271
+				{Digest: digest.Digest("banana"), SourceRepository: "docker.io/user/app"},
272
+			},
273
+			maxExistenceChecks: 3,
274
+			remoteBlobs:        map[digest.Digest]distribution.Descriptor{digest.Digest("pear"): {Digest: digest.Digest("pear")}},
275
+			expectedExists:     false,
276
+			expectedRequests:   []string{"banana", "plum", "apple"},
277
+			expectedRemovals: []metadata.V2Metadata{
278
+				{Digest: digest.Digest("banana"), SourceRepository: "docker.io/user/app"},
279
+				{Digest: digest.Digest("plum"), SourceRepository: "docker.io/user/app"},
280
+				{Digest: digest.Digest("apple"), SourceRepository: "docker.io/user/app"},
281
+			},
282
+		},
283
+		{
284
+			name:       "zero allowed existence checks",
285
+			targetRepo: "user/app",
286
+			metadata: []metadata.V2Metadata{
287
+				{Digest: digest.Digest("pear"), SourceRepository: "docker.io/user/app"},
288
+				{Digest: digest.Digest("apple"), SourceRepository: "docker.io/user/app"},
289
+				{Digest: digest.Digest("plum"), SourceRepository: "docker.io/user/app"},
290
+				{Digest: digest.Digest("banana"), SourceRepository: "docker.io/user/app"},
291
+			},
292
+			maxExistenceChecks: 0,
293
+			remoteBlobs:        map[digest.Digest]distribution.Descriptor{digest.Digest("pear"): {Digest: digest.Digest("pear")}},
294
+		},
295
+		{
296
+			name:       "stat single digest just once",
297
+			targetRepo: "busybox",
298
+			metadata: []metadata.V2Metadata{
299
+				taggedMetadata("key1", "pear", "docker.io/library/busybox"),
300
+				taggedMetadata("key2", "apple", "docker.io/library/busybox"),
301
+				taggedMetadata("key3", "apple", "docker.io/library/busybox"),
302
+			},
303
+			maxExistenceChecks: 3,
304
+			remoteBlobs:        map[digest.Digest]distribution.Descriptor{digest.Digest("pear"): {Digest: digest.Digest("pear")}},
305
+			expectedDescriptor: distribution.Descriptor{Digest: digest.Digest("pear"), MediaType: schema2.MediaTypeLayer},
306
+			expectedExists:     true,
307
+			expectedRequests:   []string{"apple", "pear"},
308
+			expectedAdditions:  []metadata.V2Metadata{{Digest: digest.Digest("pear"), SourceRepository: "docker.io/library/busybox"}},
309
+			expectedRemovals:   []metadata.V2Metadata{taggedMetadata("key3", "apple", "docker.io/library/busybox")},
310
+		},
311
+		{
312
+			name:       "stop on first error",
313
+			targetRepo: "user/app",
314
+			hmacKey:    "key",
315
+			metadata: []metadata.V2Metadata{
316
+				taggedMetadata("key", "banana", "docker.io/user/app"),
317
+				taggedMetadata("key", "orange", "docker.io/user/app"),
318
+				taggedMetadata("key", "plum", "docker.io/user/app"),
319
+			},
320
+			maxExistenceChecks: 3,
321
+			remoteErrors:       map[digest.Digest]error{"orange": distribution.ErrAccessDenied},
322
+			remoteBlobs:        map[digest.Digest]distribution.Descriptor{digest.Digest("apple"): {}},
323
+			expectedError:      distribution.ErrAccessDenied,
324
+			expectedRequests:   []string{"plum", "orange"},
325
+			expectedRemovals:   []metadata.V2Metadata{taggedMetadata("key", "plum", "docker.io/user/app")},
326
+		},
327
+		{
328
+			name:       "remove outdated metadata",
329
+			targetRepo: "docker.io/user/app",
330
+			metadata: []metadata.V2Metadata{
331
+				{Digest: digest.Digest("plum"), SourceRepository: "docker.io/library/busybox"},
332
+				{Digest: digest.Digest("orange"), SourceRepository: "docker.io/user/app"},
333
+			},
334
+			maxExistenceChecks: 3,
335
+			remoteErrors:       map[digest.Digest]error{"orange": distribution.ErrBlobUnknown},
336
+			remoteBlobs:        map[digest.Digest]distribution.Descriptor{digest.Digest("plum"): {}},
337
+			expectedExists:     false,
338
+			expectedRequests:   []string{"orange"},
339
+			expectedRemovals:   []metadata.V2Metadata{{Digest: digest.Digest("orange"), SourceRepository: "docker.io/user/app"}},
340
+		},
341
+		{
342
+			name:       "missing SourceRepository",
343
+			targetRepo: "busybox",
344
+			metadata: []metadata.V2Metadata{
345
+				{Digest: digest.Digest("1")},
346
+				{Digest: digest.Digest("3")},
347
+				{Digest: digest.Digest("2")},
348
+			},
349
+			maxExistenceChecks: 3,
350
+			expectedExists:     false,
351
+			expectedRequests:   []string{"2", "3", "1"},
352
+		},
353
+
354
+		{
355
+			name:       "with and without SourceRepository",
356
+			targetRepo: "busybox",
357
+			metadata: []metadata.V2Metadata{
358
+				{Digest: digest.Digest("1")},
359
+				{Digest: digest.Digest("2"), SourceRepository: "docker.io/library/busybox"},
360
+				{Digest: digest.Digest("3")},
361
+			},
362
+			remoteBlobs:        map[digest.Digest]distribution.Descriptor{digest.Digest("1"): {Digest: digest.Digest("1")}},
363
+			maxExistenceChecks: 3,
364
+			expectedDescriptor: distribution.Descriptor{Digest: digest.Digest("1"), MediaType: schema2.MediaTypeLayer},
365
+			expectedExists:     true,
366
+			expectedRequests:   []string{"2", "3", "1"},
367
+			expectedAdditions:  []metadata.V2Metadata{{Digest: digest.Digest("1"), SourceRepository: "docker.io/library/busybox"}},
368
+			expectedRemovals: []metadata.V2Metadata{
369
+				{Digest: digest.Digest("2"), SourceRepository: "docker.io/library/busybox"},
370
+			},
371
+		},
372
+	} {
373
+		repoInfo, err := reference.ParseNamed(tc.targetRepo)
374
+		if err != nil {
375
+			t.Fatalf("[%s] failed to parse reference name: %v", tc.name, err)
376
+		}
377
+		repo := &mockRepo{
378
+			t:        t,
379
+			errors:   tc.remoteErrors,
380
+			blobs:    tc.remoteBlobs,
381
+			requests: []string{},
382
+		}
383
+		ctx := context.Background()
384
+		ms := &mockV2MetadataService{}
385
+		pd := &v2PushDescriptor{
386
+			hmacKey:           []byte(tc.hmacKey),
387
+			repoInfo:          repoInfo,
388
+			layer:             layer.EmptyLayer,
389
+			repo:              repo,
390
+			v2MetadataService: ms,
391
+			pushState:         &pushState{remoteLayers: make(map[layer.DiffID]distribution.Descriptor)},
392
+			checkedDigests:    make(map[digest.Digest]struct{}),
393
+		}
394
+
395
+		desc, exists, err := pd.layerAlreadyExists(ctx, &progressSink{t}, layer.EmptyLayer.DiffID(), tc.checkOtherRepositories, tc.maxExistenceChecks, tc.metadata)
396
+
397
+		if !reflect.DeepEqual(desc, tc.expectedDescriptor) {
398
+			t.Errorf("[%s] got unexpected descriptor: %#+v != %#+v", tc.name, desc, tc.expectedDescriptor)
399
+		}
400
+		if exists != tc.expectedExists {
401
+			t.Errorf("[%s] got unexpected exists: %t != %t", tc.name, exists, tc.expectedExists)
402
+		}
403
+		if !reflect.DeepEqual(err, tc.expectedError) {
404
+			t.Errorf("[%s] got unexpected error: %#+v != %#+v", tc.name, err, tc.expectedError)
405
+		}
406
+
407
+		if len(repo.requests) != len(tc.expectedRequests) {
408
+			t.Errorf("[%s] got unexpected number of requests: %d != %d", tc.name, len(repo.requests), len(tc.expectedRequests))
409
+		}
410
+		for i := 0; i < len(repo.requests) && i < len(tc.expectedRequests); i++ {
411
+			if repo.requests[i] != tc.expectedRequests[i] {
412
+				t.Errorf("[%s] request %d does not match expected: %q != %q", tc.name, i, repo.requests[i], tc.expectedRequests[i])
413
+			}
414
+		}
415
+		for i := len(repo.requests); i < len(tc.expectedRequests); i++ {
416
+			t.Errorf("[%s] missing expected request at position %d (%q)", tc.name, i, tc.expectedRequests[i])
417
+		}
418
+		for i := len(tc.expectedRequests); i < len(repo.requests); i++ {
419
+			t.Errorf("[%s] got unexpected request at position %d (%q)", tc.name, i, repo.requests[i])
420
+		}
421
+
422
+		if len(ms.added) != len(tc.expectedAdditions) {
423
+			t.Errorf("[%s] got unexpected number of additions: %d != %d", tc.name, len(ms.added), len(tc.expectedAdditions))
424
+		}
425
+		for i := 0; i < len(ms.added) && i < len(tc.expectedAdditions); i++ {
426
+			if ms.added[i] != tc.expectedAdditions[i] {
427
+				t.Errorf("[%s] added metadata at %d does not match expected: %q != %q", tc.name, i, ms.added[i], tc.expectedAdditions[i])
428
+			}
429
+		}
430
+		for i := len(ms.added); i < len(tc.expectedAdditions); i++ {
431
+			t.Errorf("[%s] missing expected addition at position %d (%q)", tc.name, i, tc.expectedAdditions[i])
432
+		}
433
+		for i := len(tc.expectedAdditions); i < len(ms.added); i++ {
434
+			t.Errorf("[%s] unexpected metadata addition at position %d (%q)", tc.name, i, ms.added[i])
435
+		}
436
+
437
+		if len(ms.removed) != len(tc.expectedRemovals) {
438
+			t.Errorf("[%s] got unexpected number of removals: %d != %d", tc.name, len(ms.removed), len(tc.expectedRemovals))
439
+		}
440
+		for i := 0; i < len(ms.removed) && i < len(tc.expectedRemovals); i++ {
441
+			if ms.removed[i] != tc.expectedRemovals[i] {
442
+				t.Errorf("[%s] removed metadata at %d does not match expected: %q != %q", tc.name, i, ms.removed[i], tc.expectedRemovals[i])
443
+			}
444
+		}
445
+		for i := len(ms.removed); i < len(tc.expectedRemovals); i++ {
446
+			t.Errorf("[%s] missing expected removal at position %d (%q)", tc.name, i, tc.expectedRemovals[i])
447
+		}
448
+		for i := len(tc.expectedRemovals); i < len(ms.removed); i++ {
449
+			t.Errorf("[%s] removed unexpected metadata at position %d (%q)", tc.name, i, ms.removed[i])
450
+		}
451
+	}
452
+}
453
+
454
+func taggedMetadata(key string, dgst string, sourceRepo string) metadata.V2Metadata {
455
+	meta := metadata.V2Metadata{
456
+		Digest:           digest.Digest(dgst),
457
+		SourceRepository: sourceRepo,
458
+	}
459
+
460
+	meta.HMAC = metadata.ComputeV2MetadataHMAC([]byte(key), &meta)
461
+	return meta
462
+}
463
+
464
+type mockRepo struct {
465
+	t        *testing.T
466
+	errors   map[digest.Digest]error
467
+	blobs    map[digest.Digest]distribution.Descriptor
468
+	requests []string
469
+}
470
+
471
+var _ distribution.Repository = &mockRepo{}
472
+
473
+func (m *mockRepo) Named() distreference.Named {
474
+	m.t.Fatalf("Named() not implemented")
475
+	return nil
476
+}
477
+func (m *mockRepo) Manifests(ctc context.Context, options ...distribution.ManifestServiceOption) (distribution.ManifestService, error) {
478
+	m.t.Fatalf("Manifests() not implemented")
479
+	return nil, nil
480
+}
481
+func (m *mockRepo) Tags(ctc context.Context) distribution.TagService {
482
+	m.t.Fatalf("Tags() not implemented")
483
+	return nil
484
+}
485
+func (m *mockRepo) Blobs(ctx context.Context) distribution.BlobStore {
486
+	return &mockBlobStore{
487
+		repo: m,
488
+	}
489
+}
490
+
491
+type mockBlobStore struct {
492
+	repo *mockRepo
493
+}
494
+
495
+var _ distribution.BlobStore = &mockBlobStore{}
496
+
497
+func (m *mockBlobStore) Stat(ctx context.Context, dgst digest.Digest) (distribution.Descriptor, error) {
498
+	m.repo.requests = append(m.repo.requests, dgst.String())
499
+	if err, exists := m.repo.errors[dgst]; exists {
500
+		return distribution.Descriptor{}, err
501
+	}
502
+	if desc, exists := m.repo.blobs[dgst]; exists {
503
+		return desc, nil
504
+	}
505
+	return distribution.Descriptor{}, distribution.ErrBlobUnknown
506
+}
507
+func (m *mockBlobStore) Get(ctx context.Context, dgst digest.Digest) ([]byte, error) {
508
+	m.repo.t.Fatal("Get() not implemented")
509
+	return nil, nil
510
+}
511
+
512
+func (m *mockBlobStore) Open(ctx context.Context, dgst digest.Digest) (distribution.ReadSeekCloser, error) {
513
+	m.repo.t.Fatal("Open() not implemented")
514
+	return nil, nil
515
+}
516
+
517
+func (m *mockBlobStore) Put(ctx context.Context, mediaType string, p []byte) (distribution.Descriptor, error) {
518
+	m.repo.t.Fatal("Put() not implemented")
519
+	return distribution.Descriptor{}, nil
520
+}
521
+
522
+func (m *mockBlobStore) Create(ctx context.Context, options ...distribution.BlobCreateOption) (distribution.BlobWriter, error) {
523
+	m.repo.t.Fatal("Create() not implemented")
524
+	return nil, nil
525
+}
526
+func (m *mockBlobStore) Resume(ctx context.Context, id string) (distribution.BlobWriter, error) {
527
+	m.repo.t.Fatal("Resume() not implemented")
528
+	return nil, nil
529
+}
530
+func (m *mockBlobStore) Delete(ctx context.Context, dgst digest.Digest) error {
531
+	m.repo.t.Fatal("Delete() not implemented")
532
+	return nil
533
+}
534
+func (m *mockBlobStore) ServeBlob(ctx context.Context, w http.ResponseWriter, r *http.Request, dgst digest.Digest) error {
535
+	m.repo.t.Fatalf("ServeBlob() not implemented")
536
+	return nil
537
+}
538
+
539
+type mockV2MetadataService struct {
540
+	added   []metadata.V2Metadata
541
+	removed []metadata.V2Metadata
542
+}
543
+
544
+var _ metadata.V2MetadataService = &mockV2MetadataService{}
545
+
546
+func (*mockV2MetadataService) GetMetadata(diffID layer.DiffID) ([]metadata.V2Metadata, error) {
547
+	return nil, nil
548
+}
549
+func (*mockV2MetadataService) GetDiffID(dgst digest.Digest) (layer.DiffID, error) {
550
+	return "", nil
551
+}
552
+func (m *mockV2MetadataService) Add(diffID layer.DiffID, metadata metadata.V2Metadata) error {
553
+	m.added = append(m.added, metadata)
554
+	return nil
555
+}
556
+func (m *mockV2MetadataService) TagAndAdd(diffID layer.DiffID, hmacKey []byte, meta metadata.V2Metadata) error {
557
+	meta.HMAC = metadata.ComputeV2MetadataHMAC(hmacKey, &meta)
558
+	m.Add(diffID, meta)
559
+	return nil
560
+}
561
+func (m *mockV2MetadataService) Remove(metadata metadata.V2Metadata) error {
562
+	m.removed = append(m.removed, metadata)
563
+	return nil
564
+}
565
+
566
+type progressSink struct {
567
+	t *testing.T
568
+}
569
+
570
+func (s *progressSink) WriteProgress(p progress.Progress) error {
571
+	s.t.Logf("progress update: %#+v", p)
572
+	return nil
573
+}