contrib/download-frozen-image-v2.sh
52379fa7
 #!/usr/bin/env bash
356dc712
 set -eo pipefail
351074ed
 
 # hello-world                      latest              ef872312fe1b        3 months ago        910 B
 # hello-world                      latest              ef872312fe1bbc5e05aae626791a47ee9b032efa8f3bda39cc0be7b56bfe59b9   3 months ago        910 B
 
 # debian                           latest              f6fab3b798be        10 weeks ago        85.1 MB
 # debian                           latest              f6fab3b798be3174f45aa1eb731f8182705555f89c9026d8c1ef230cbf8301dd   10 weeks ago        85.1 MB
60ec93f7
 
 # check if essential commands are in our PATH
 for cmd in curl jq go; do
 	if ! command -v $cmd &> /dev/null; then
 		echo >&2 "error: \"$cmd\" not found!"
 		exit 1
 	fi
 done
b5763f8f
 
351074ed
 usage() {
359d0c24
 	echo "usage: $0 dir image[:tag][@digest] ..."
 	echo "       $0 /tmp/old-hello-world hello-world:latest@sha256:8be990ef2aeb16dbcb9271ddfe2610fa6658d13f6dfb8bc72074cc1ca36966a7"
351074ed
 	[ -z "$1" ] || exit "$1"
 }
 
 dir="$1" # dir for building tar in
 shift || usage 1 >&2
 
 [ $# -gt 0 -a "$dir" ] || usage 2 >&2
 mkdir -p "$dir"
 
27aab3ac
 # hacky workarounds for Bash 3 support (no associative arrays)
 images=()
 rm -f "$dir"/tags-*.tmp
356dc712
 manifestJsonEntries=()
 doNotGenerateManifestJson=
27aab3ac
 # repositories[busybox]='"latest": "...", "ubuntu-14.04": "..."'
351074ed
 
356dc712
 # bash v4 on Windows CI requires CRLF separator
 newlineIFS=$'\n'
 if [ "$(go env GOHOSTOS)" = 'windows' ]; then
 	major=$(echo ${BASH_VERSION%%[^0.9]} | cut -d. -f1)
 	if [ "$major" -ge 4 ]; then
 		newlineIFS=$'\r\n'
 	fi
 fi
 
4bbdc0b8
 registryBase='https://registry-1.docker.io'
 authBase='https://auth.docker.io'
 authService='registry.docker.io'
 
 # https://github.com/moby/moby/issues/33700
5c38e4c1
 fetch_blob() {
4bbdc0b8
 	local token="$1"; shift
 	local image="$1"; shift
 	local digest="$1"; shift
 	local targetFile="$1"; shift
 	local curlArgs=( "$@" )
 
 	local curlHeaders="$(
 		curl -S "${curlArgs[@]}" \
 			-H "Authorization: Bearer $token" \
 			"$registryBase/v2/$image/blobs/$digest" \
 			-o "$targetFile" \
 			-D-
 	)"
 	curlHeaders="$(echo "$curlHeaders" | tr -d '\r')"
24da8a0e
 	if grep -qE "^HTTP/[0-9].[0-9] 3" <<<"$curlHeaders"; then
4bbdc0b8
 		rm -f "$targetFile"
 
 		local blobRedirect="$(echo "$curlHeaders" | awk -F ': ' 'tolower($1) == "location" { print $2; exit }')"
 		if [ -z "$blobRedirect" ]; then
 			echo >&2 "error: failed fetching '$image' blob '$digest'"
 			echo "$curlHeaders" | head -1 >&2
 			return 1
 		fi
 
 		curl -fSL "${curlArgs[@]}" \
 			"$blobRedirect" \
 			-o "$targetFile"
 	fi
5c38e4c1
 }
 
0af5db51
 # handle 'application/vnd.docker.distribution.manifest.v2+json' manifest
 handle_single_manifest_v2() {
 	local manifestJson="$1"; shift
 
 	local configDigest="$(echo "$manifestJson" | jq --raw-output '.config.digest')"
 	local imageId="${configDigest#*:}" # strip off "sha256:"
 
 	local configFile="$imageId.json"
 	fetch_blob "$token" "$image" "$configDigest" "$dir/$configFile" -s
 
 	local layersFs="$(echo "$manifestJson" | jq --raw-output --compact-output '.layers[]')"
 	local IFS="$newlineIFS"
 	local layers=( $layersFs )
 	unset IFS
 
 	echo "Downloading '$imageIdentifier' (${#layers[@]} layers)..."
 	local layerId=
 	local layerFiles=()
 	for i in "${!layers[@]}"; do
 		local layerMeta="${layers[$i]}"
 
 		local layerMediaType="$(echo "$layerMeta" | jq --raw-output '.mediaType')"
 		local layerDigest="$(echo "$layerMeta" | jq --raw-output '.digest')"
 
 		# save the previous layer's ID
 		local parentId="$layerId"
 		# create a new fake layer ID based on this layer's digest and the previous layer's fake ID
 		layerId="$(echo "$parentId"$'\n'"$layerDigest" | sha256sum | cut -d' ' -f1)"
 		# this accounts for the possibility that an image contains the same layer twice (and thus has a duplicate digest value)
 
 		mkdir -p "$dir/$layerId"
 		echo '1.0' > "$dir/$layerId/VERSION"
 
 		if [ ! -s "$dir/$layerId/json" ]; then
 			local parentJson="$(printf ', parent: "%s"' "$parentId")"
 			local addJson="$(printf '{ id: "%s"%s }' "$layerId" "${parentId:+$parentJson}")"
 			# this starter JSON is taken directly from Docker's own "docker save" output for unimportant layers
 			jq "$addJson + ." > "$dir/$layerId/json" <<-'EOJSON'
 				{
 					"created": "0001-01-01T00:00:00Z",
 					"container_config": {
 						"Hostname": "",
 						"Domainname": "",
 						"User": "",
 						"AttachStdin": false,
 						"AttachStdout": false,
 						"AttachStderr": false,
 						"Tty": false,
 						"OpenStdin": false,
 						"StdinOnce": false,
 						"Env": null,
 						"Cmd": null,
 						"Image": "",
 						"Volumes": null,
 						"WorkingDir": "",
 						"Entrypoint": null,
 						"OnBuild": null,
 						"Labels": null
 					}
 				}
 			EOJSON
 		fi
 
 		case "$layerMediaType" in
 			application/vnd.docker.image.rootfs.diff.tar.gzip)
 				local layerTar="$layerId/layer.tar"
 				layerFiles=( "${layerFiles[@]}" "$layerTar" )
 				# TODO figure out why "-C -" doesn't work here
 				# "curl: (33) HTTP server doesn't seem to support byte ranges. Cannot resume."
 				# "HTTP/1.1 416 Requested Range Not Satisfiable"
 				if [ -f "$dir/$layerTar" ]; then
 					# TODO hackpatch for no -C support :'(
 					echo "skipping existing ${layerId:0:12}"
 					continue
 				fi
 				local token="$(curl -fsSL "$authBase/token?service=$authService&scope=repository:$image:pull" | jq --raw-output '.token')"
 				fetch_blob "$token" "$image" "$layerDigest" "$dir/$layerTar" --progress
 				;;
 
 			*)
 				echo >&2 "error: unknown layer mediaType ($imageIdentifier, $layerDigest): '$layerMediaType'"
 				exit 1
 				;;
 		esac
 	done
 
 	# change "$imageId" to be the ID of the last layer we added (needed for old-style "repositories" file which is created later -- specifically for older Docker daemons)
 	imageId="$layerId"
 
 	# munge the top layer image manifest to have the appropriate image configuration for older daemons
 	local imageOldConfig="$(jq --raw-output --compact-output '{ id: .id } + if .parent then { parent: .parent } else {} end' "$dir/$imageId/json")"
 	jq --raw-output "$imageOldConfig + del(.history, .rootfs)" "$dir/$configFile" > "$dir/$imageId/json"
 
 	local manifestJsonEntry="$(
 		echo '{}' | jq --raw-output '. + {
 			Config: "'"$configFile"'",
 			RepoTags: ["'"${image#library\/}:$tag"'"],
 			Layers: '"$(echo '[]' | jq --raw-output ".$(for layerFile in "${layerFiles[@]}"; do echo " + [ \"$layerFile\" ]"; done)")"'
 		}'
 	)"
 	manifestJsonEntries=( "${manifestJsonEntries[@]}" "$manifestJsonEntry" )
 }
 
351074ed
 while [ $# -gt 0 ]; do
 	imageTag="$1"
 	shift
 	image="${imageTag%%[:@]*}"
359d0c24
 	imageTag="${imageTag#*:}"
 	digest="${imageTag##*@}"
 	tag="${imageTag%%@*}"
7617ec17
 
359d0c24
 	# add prefix library if passed official image
 	if [[ "$image" != *"/"* ]]; then
 		image="library/$image"
 	fi
ca8fa6e4
 
359d0c24
 	imageFile="${image//\//_}" # "/" can't be in filenames :)
7617ec17
 
4bbdc0b8
 	token="$(curl -fsSL "$authBase/token?service=$authService&scope=repository:$image:pull" | jq --raw-output '.token')"
7617ec17
 
356dc712
 	manifestJson="$(
 		curl -fsSL \
 			-H "Authorization: Bearer $token" \
 			-H 'Accept: application/vnd.docker.distribution.manifest.v2+json' \
0af5db51
 			-H 'Accept: application/vnd.docker.distribution.manifest.list.v2+json' \
356dc712
 			-H 'Accept: application/vnd.docker.distribution.manifest.v1+json' \
4bbdc0b8
 			"$registryBase/v2/$image/manifests/$digest"
356dc712
 	)"
359d0c24
 	if [ "${manifestJson:0:1}" != '{' ]; then
 		echo >&2 "error: /v2/$image/manifests/$digest returned something unexpected:"
 		echo >&2 "  $manifestJson"
351074ed
 		exit 1
 	fi
7617ec17
 
356dc712
 	imageIdentifier="$image:$tag@$digest"
359d0c24
 
356dc712
 	schemaVersion="$(echo "$manifestJson" | jq --raw-output '.schemaVersion')"
 	case "$schemaVersion" in
 		2)
 			mediaType="$(echo "$manifestJson" | jq --raw-output '.mediaType')"
 
 			case "$mediaType" in
 				application/vnd.docker.distribution.manifest.v2+json)
0af5db51
 					handle_single_manifest_v2 "$manifestJson"
 					;;
 				application/vnd.docker.distribution.manifest.list.v2+json)
 					layersFs="$(echo "$manifestJson" | jq --raw-output --compact-output '.manifests[]')"
356dc712
 					IFS="$newlineIFS"
 					layers=( $layersFs )
 					unset IFS
 
0af5db51
 					found=""
 					# parse first level multi-arch manifest
356dc712
 					for i in "${!layers[@]}"; do
 						layerMeta="${layers[$i]}"
0af5db51
 						maniArch="$(echo "$layerMeta" | jq --raw-output '.platform.architecture')"
 						if [ "$maniArch" = "$(go env GOARCH)" ]; then
 							digest="$(echo "$layerMeta" | jq --raw-output '.digest')"
 							# get second level single manifest
 							submanifestJson="$(
 								curl -fsSL \
 									-H "Authorization: Bearer $token" \
 									-H 'Accept: application/vnd.docker.distribution.manifest.v2+json' \
 									-H 'Accept: application/vnd.docker.distribution.manifest.list.v2+json' \
 									-H 'Accept: application/vnd.docker.distribution.manifest.v1+json' \
 									"$registryBase/v2/$image/manifests/$digest"
 							)"
 							handle_single_manifest_v2 "$submanifestJson"
 							found="found"
 							break
356dc712
 						fi
 					done
0af5db51
 					if [ -z "$found" ]; then
 						echo >&2 "error: manifest for $maniArch is not found"
 						exit 1
 					fi
356dc712
 					;;
 				*)
 					echo >&2 "error: unknown manifest mediaType ($imageIdentifier): '$mediaType'"
 					exit 1
 					;;
 			esac
 			;;
 
 		1)
 			if [ -z "$doNotGenerateManifestJson" ]; then
 				echo >&2 "warning: '$imageIdentifier' uses schemaVersion '$schemaVersion'"
 				echo >&2 "  this script cannot (currently) recreate the 'image config' to put in a 'manifest.json' (thus any schemaVersion 2+ images will be imported in the old way, and their 'docker history' will suffer)"
 				echo >&2
 				doNotGenerateManifestJson=1
 			fi
 
 			layersFs="$(echo "$manifestJson" | jq --raw-output '.fsLayers | .[] | .blobSum')"
 			IFS="$newlineIFS"
 			layers=( $layersFs )
 			unset IFS
 
 			history="$(echo "$manifestJson" | jq '.history | [.[] | .v1Compatibility]')"
 			imageId="$(echo "$history" | jq --raw-output '.[0]' | jq --raw-output '.id')"
 
 			echo "Downloading '$imageIdentifier' (${#layers[@]} layers)..."
 			for i in "${!layers[@]}"; do
 				imageJson="$(echo "$history" | jq --raw-output ".[${i}]")"
 				layerId="$(echo "$imageJson" | jq --raw-output '.id')"
 				imageLayer="${layers[$i]}"
 
 				mkdir -p "$dir/$layerId"
 				echo '1.0' > "$dir/$layerId/VERSION"
 
 				echo "$imageJson" > "$dir/$layerId/json"
 
 				# TODO figure out why "-C -" doesn't work here
 				# "curl: (33) HTTP server doesn't seem to support byte ranges. Cannot resume."
 				# "HTTP/1.1 416 Requested Range Not Satisfiable"
 				if [ -f "$dir/$layerId/layer.tar" ]; then
 					# TODO hackpatch for no -C support :'(
 					echo "skipping existing ${layerId:0:12}"
 					continue
 				fi
4bbdc0b8
 				token="$(curl -fsSL "$authBase/token?service=$authService&scope=repository:$image:pull" | jq --raw-output '.token')"
 				fetch_blob "$token" "$image" "$imageLayer" "$dir/$layerId/layer.tar" --progress
356dc712
 			done
 			;;
 
 		*)
 			echo >&2 "error: unknown manifest schemaVersion ($imageIdentifier): '$schemaVersion'"
 			exit 1
 			;;
 	esac
 
 	echo
359d0c24
 
ca8fa6e4
 	if [ -s "$dir/tags-$imageFile.tmp" ]; then
 		echo -n ', ' >> "$dir/tags-$imageFile.tmp"
27aab3ac
 	else
 		images=( "${images[@]}" "$image" )
 	fi
ca8fa6e4
 	echo -n '"'"$tag"'": "'"$imageId"'"' >> "$dir/tags-$imageFile.tmp"
351074ed
 done
 
 echo -n '{' > "$dir/repositories"
 firstImage=1
27aab3ac
 for image in "${images[@]}"; do
ca8fa6e4
 	imageFile="${image//\//_}" # "/" can't be in filenames :)
359d0c24
 	image="${image#library\/}"
ca8fa6e4
 
351074ed
 	[ "$firstImage" ] || echo -n ',' >> "$dir/repositories"
 	firstImage=
 	echo -n $'\n\t' >> "$dir/repositories"
ca8fa6e4
 	echo -n '"'"$image"'": { '"$(cat "$dir/tags-$imageFile.tmp")"' }' >> "$dir/repositories"
351074ed
 done
 echo -n $'\n}\n' >> "$dir/repositories"
 
27aab3ac
 rm -f "$dir"/tags-*.tmp
 
356dc712
 if [ -z "$doNotGenerateManifestJson" ] && [ "${#manifestJsonEntries[@]}" -gt 0 ]; then
 	echo '[]' | jq --raw-output ".$(for entry in "${manifestJsonEntries[@]}"; do echo " + [ $entry ]"; done)" > "$dir/manifest.json"
 else
 	rm -f "$dir/manifest.json"
 fi
 
351074ed
 echo "Download of images into '$dir' complete."
 echo "Use something like the following to load the result into a Docker daemon:"
 echo "  tar -cC '$dir' . | docker load"