Packages such as linux, linux-esx, linux-secure and linux-aws all
share the same source tarball. So, when building these packages in
parallel, the package-builder will try to download the source tarball
to the same destination (filename) on the disk, simultaneously.
Unfortunately, there is no protection against partial file downloads;
that is, the destination file is visible well before the download is
actually complete. This can result in a race condition where some of
these packages observe that the source tarball already exists, and
compute a sha1 checksum on the (partially downloaded) file and error
out, complaining that the checksum doesn't match the expected value
from that package's spec file. Linux is particularly prone to this
race, as its tarballs are quite large, thus extending this race
window.
Fix this by downloading to a temporary filename and renaming it to the
final destination filename after the download is actually complete.
(Note that it is important to make sure that the temporary file and
the final filename both exist on the same filesystem; otherwise it
doesn't fix the race completely). Also, we utilize the package name
(eg: linux-esx) in making the temporary filename unique.
Change-Id: I5dda3da4d7bd4e75f211774246adeded7a0d095b
Reviewed-on: http://photon-jenkins.eng.vmware.com:8082/5136
Tested-by: gerrit-photon <photon-checkins@vmware.com>
Reviewed-by: Alexey Makhalov <amakhalov@vmware.com>
... | ... |
@@ -154,7 +154,7 @@ def buildSourcesList(yamlDir, blackListPkgs, logger, singleFile=True): |
154 | 154 |
sourceName = listSourceNames[0] |
155 | 155 |
sha1 = SPECS.getData().getSHA1(package, sourceName) |
156 | 156 |
if sha1 is not None: |
157 |
- PullSources.get(sourceName, sha1, yamlSourceDir, |
|
157 |
+ PullSources.get(package, sourceName, sha1, yamlSourceDir, |
|
158 | 158 |
constants.pullsourcesConfig, logger) |
159 | 159 |
|
160 | 160 |
if not singleFile: |
... | ... |
@@ -450,8 +450,8 @@ class PackageUtils(object): |
450 | 450 |
# Fetch/verify sources if sha1 not None. |
451 | 451 |
sha1 = SPECS.getData().getSHA1(package, source, index) |
452 | 452 |
if sha1 is not None: |
453 |
- PullSources.get(source, sha1, constants.sourcePath, constants.pullsourcesConfig, |
|
454 |
- self.logger) |
|
453 |
+ PullSources.get(package, source, sha1, constants.sourcePath, |
|
454 |
+ constants.pullsourcesConfig, self.logger) |
|
455 | 455 |
|
456 | 456 |
sourcePath = cmdUtils.findFile(source, constants.sourcePath) |
457 | 457 |
if not sourcePath: |
... | ... |
@@ -22,7 +22,7 @@ def getFileHash(filepath): |
22 | 22 |
f.close() |
23 | 23 |
return sha1.hexdigest() |
24 | 24 |
|
25 |
-def get(source, sha1, sourcesPath, configs, logger): |
|
25 |
+def get(package, source, sha1, sourcesPath, configs, logger): |
|
26 | 26 |
cmdUtils = CommandUtils() |
27 | 27 |
sourcePath = cmdUtils.findFile(source, sourcesPath) |
28 | 28 |
if sourcePath is not None and len(sourcePath) > 0: |
... | ... |
@@ -40,7 +40,7 @@ def get(source, sha1, sourcesPath, configs, logger): |
40 | 40 |
p = pullSources(config, logger) |
41 | 41 |
package_path = os.path.join(sourcesPath, source) |
42 | 42 |
try: |
43 |
- p.downloadFileHelper(source, package_path, sha1) |
|
43 |
+ p.downloadFileHelper(package, source, package_path, sha1) |
|
44 | 44 |
return |
45 | 45 |
except Exception as e: |
46 | 46 |
logger.exception(e) |
... | ... |
@@ -63,13 +63,21 @@ class pullSources: |
63 | 63 |
with open(conf_file) as jsonFile: |
64 | 64 |
self._config = json.load(jsonFile) |
65 | 65 |
|
66 |
- def downloadFile(self, filename, file_path): |
|
66 |
+ def downloadFile(self, package, filename, file_path): |
|
67 | 67 |
#form url: https://dl.bintray.com/vmware/photon_sources/1.0/<filename>. |
68 | 68 |
url = '%s/%s' % (self._config['baseurl'], filename) |
69 | 69 |
|
70 | 70 |
self.logger.info("Downloading: " + url) |
71 | 71 |
|
72 |
- with open(file_path, 'wb') as handle: |
|
72 |
+ # We need to provide atomicity for file downloads. That is, |
|
73 |
+ # the file should be visible in its canonical location only |
|
74 |
+ # when the download is complete. To achieve that, first |
|
75 |
+ # download to a temporary location (on the same filesystem) |
|
76 |
+ # and then rename it to the final destination filename. |
|
77 |
+ |
|
78 |
+ temp_file_path = file_path + "-" + package |
|
79 |
+ |
|
80 |
+ with open(temp_file_path, 'wb') as handle: |
|
73 | 81 |
response = requests.get(url, auth=self._auth, stream=True) |
74 | 82 |
|
75 | 83 |
if not response.ok: |
... | ... |
@@ -82,9 +90,15 @@ class pullSources: |
82 | 82 |
handle.write(block) |
83 | 83 |
handle.flush() |
84 | 84 |
response.close() |
85 |
+ |
|
86 |
+ if os.path.exists(file_path): |
|
87 |
+ os.remove(temp_file_path) |
|
88 |
+ else: |
|
89 |
+ os.rename(temp_file_path, file_path) |
|
90 |
+ |
|
85 | 91 |
return file_path |
86 | 92 |
|
87 |
- def downloadFileHelper(self, package_name, package_path, package_sha1=None): |
|
88 |
- self.downloadFile(package_name, package_path) |
|
89 |
- if package_sha1 != getFileHash(package_path): |
|
90 |
- raise Exception('Invalid sha1 for package %s' % package_name) |
|
93 |
+ def downloadFileHelper(self, package, filename, file_path, package_sha1=None): |
|
94 |
+ self.downloadFile(package, filename, file_path) |
|
95 |
+ if package_sha1 != getFileHash(file_path): |
|
96 |
+ raise Exception('Invalid sha1 for package %s file %s' % package, filename) |