Remove all the newly introduced parameters for passing enable_multipart
and keep it in Config() instead.
Also renames --enable-multipart to --disable-multipart and
introduces --multipart-chunk-size=SIZE parameter.
... | ... |
@@ -46,10 +46,8 @@ class ThreadPool(object): |
46 | 46 |
|
47 | 47 |
class MultiPartUpload(object): |
48 | 48 |
|
49 |
- MIN_CHUNK_SIZE = 5242880 # 5MB |
|
50 | 49 |
MAX_CHUNK_SIZE = 5368709120 # 5GB |
51 | 50 |
MAX_CHUNKS = 100 |
52 |
- MAX_FILE_SIZE = 42949672960 # 5TB |
|
53 | 51 |
|
54 | 52 |
def __init__(self, s3, file, uri): |
55 | 53 |
self.s3 = s3 |
... | ... |
@@ -79,7 +77,6 @@ class MultiPartUpload(object): |
79 | 79 |
if not self.upload_id: |
80 | 80 |
raise RuntimeError("Attempting to use a multipart upload that has not been initiated.") |
81 | 81 |
|
82 |
- chunk_size = max(self.MIN_CHUNK_SIZE, chunk_size) |
|
83 | 82 |
id = 1 |
84 | 83 |
if num_threads > 1: |
85 | 84 |
debug("MultiPart: Uploading in %d threads" % num_threads) |
... | ... |
@@ -335,7 +335,7 @@ class S3(object): |
335 | 335 |
|
336 | 336 |
return response |
337 | 337 |
|
338 |
- def object_put(self, filename, uri, extra_headers = None, extra_label = "", multipart = False): |
|
338 |
+ def object_put(self, filename, uri, extra_headers = None, extra_label = ""): |
|
339 | 339 |
# TODO TODO |
340 | 340 |
# Make it consistent with stream-oriented object_get() |
341 | 341 |
if uri.type != "s3": |
... | ... |
@@ -353,8 +353,9 @@ class S3(object): |
353 | 353 |
if extra_headers: |
354 | 354 |
headers.update(extra_headers) |
355 | 355 |
|
356 |
- if not multipart: |
|
357 |
- if size > 104857600: # 100MB |
|
356 |
+ multipart = False |
|
357 |
+ if self.config.enable_multipart: |
|
358 |
+ if size > 2 * self.config.multipart_chunk_size: |
|
358 | 359 |
multipart = True |
359 | 360 |
|
360 | 361 |
if multipart: |
... | ... |
@@ -744,18 +745,11 @@ class S3(object): |
744 | 744 |
upload = MultiPartUpload(self, file, uri) |
745 | 745 |
bucket, key, upload_id = upload.initiate_multipart_upload() |
746 | 746 |
|
747 |
- num_threads = self.config.multipart_num_threads or 4 |
|
747 |
+ num_threads = self.config.multipart_num_threads |
|
748 |
+ chunk_size = self.config.multipart_chunk_size or MultiPartUpload.MIN_CHUNK_SIZE |
|
748 | 749 |
|
749 |
- if size > MultiPartUpload.MAX_FILE_SIZE: |
|
750 |
- raise RuntimeError("File is too large (%i bytes, max %i)" % (size, MultiPartUpload.MAX_FILE_SIZE)) |
|
751 |
- elif size > 107374182400: # 100GB |
|
752 |
- chunk_size = size / 10000 |
|
753 |
- elif size > 10737418240: # 10GB |
|
754 |
- chunk_size = size / 1000 |
|
755 |
- elif size > 1073741824: # 1GB |
|
756 |
- chunk_size = size / 100 |
|
757 |
- else: |
|
758 |
- chunk_size = self.config.multipart_chunk_size or MultiPartUpload.MIN_CHUNK_SIZE |
|
750 |
+ if chunk_size > MultiPartUpload.MAX_CHUNK_SIZE: |
|
751 |
+ raise RuntimeError("Chunk size is too large (%i bytes, max %i). Please adjust with --multipart-chunk-size=SIZE." % (size, MultiPartUpload.MAX_CHUNK_SIZE)) |
|
759 | 752 |
|
760 | 753 |
file.seek(0) |
761 | 754 |
upload.upload_all_parts(num_threads, chunk_size) |
... | ... |
@@ -241,7 +241,7 @@ def cmd_bucket_delete(args): |
241 | 241 |
_bucket_delete_one(uri) |
242 | 242 |
output(u"Bucket '%s' removed" % uri.uri()) |
243 | 243 |
|
244 |
-def cmd_object_put(args, multipart): |
|
244 |
+def cmd_object_put(args): |
|
245 | 245 |
cfg = Config() |
246 | 246 |
s3 = S3(cfg) |
247 | 247 |
|
... | ... |
@@ -296,7 +296,7 @@ def cmd_object_put(args, multipart): |
296 | 296 |
if Config().encrypt: |
297 | 297 |
exitcode, full_name, extra_headers["x-amz-meta-s3tools-gpgenc"] = gpg_encrypt(full_name_orig) |
298 | 298 |
try: |
299 |
- response = s3.object_put(full_name, uri_final, extra_headers, extra_label = seq_label, multipart = multipart) |
|
299 |
+ response = s3.object_put(full_name, uri_final, extra_headers, extra_label = seq_label) |
|
300 | 300 |
except S3UploadError, e: |
301 | 301 |
error(u"Upload of '%s' failed too many times. Skipping that file." % full_name_orig) |
302 | 302 |
continue |
... | ... |
@@ -1526,7 +1526,9 @@ def main(): |
1526 | 1526 |
optparser.add_option( "--encoding", dest="encoding", metavar="ENCODING", help="Override autodetected terminal and filesystem encoding (character set). Autodetected: %s" % preferred_encoding) |
1527 | 1527 |
optparser.add_option( "--verbatim", dest="urlencoding_mode", action="store_const", const="verbatim", help="Use the S3 name as given on the command line. No pre-processing, encoding, etc. Use with caution!") |
1528 | 1528 |
|
1529 |
- optparser.add_option( "--enable-multipart", dest="enable_multipart", action="store_true", default=False, help="Force multipart upload on files < 100MB") |
|
1529 |
+ optparser.add_option( "--disable-multipart", dest="enable_multipart", action="store_false", help="Disable multipart upload on files bigger than --multipart-chunk-size") |
|
1530 |
+ optparser.add_option( "--multipart-chunk-size", dest="multipart_chunk_size", action="store", metavar="SIZE", help="Size of each chunk of a multipart upload. Files bigger than 2*SIZE are automatically uploaded as multithreaded-multipart, smaller files are uploaded using the traditional method. SIZE is in Bytes.") |
|
1531 |
+ |
|
1530 | 1532 |
optparser.add_option( "--list-md5", dest="list_md5", action="store_true", help="Include MD5 sums in bucket listings (only for 'ls' command).") |
1531 | 1533 |
optparser.add_option("-H", "--human-readable-sizes", dest="human_readable_sizes", action="store_true", help="Print sizes in human readable form (eg 1kB instead of 1234).") |
1532 | 1534 |
|
... | ... |
@@ -1730,10 +1732,7 @@ def main(): |
1730 | 1730 |
sys.exit(1) |
1731 | 1731 |
|
1732 | 1732 |
try: |
1733 |
- if cmd_func == cmd_object_put: |
|
1734 |
- cmd_func(args, options.enable_multipart) |
|
1735 |
- else: |
|
1736 |
- cmd_func(args) |
|
1733 |
+ cmd_func(args) |
|
1737 | 1734 |
except S3Error, e: |
1738 | 1735 |
error(u"S3 error: %s" % e) |
1739 | 1736 |
sys.exit(1) |