Browse code

Cleanup: pass enable_multipart via Config()

Remove all the newly introduced parameters for passing enable_multipart
and keep it in Config() instead.

Also renames --enable-multipart to --disable-multipart and
introduces --multipart-chunk-size=SIZE parameter.

Michal Ludvig authored on 2012/01/02 20:40:38
Showing 4 changed files
... ...
@@ -63,6 +63,7 @@ class Config(object):
63 63
     default_mime_type = "binary/octet-stream"
64 64
     guess_mime_type = False
65 65
     mime_type = ""
66
+    enable_multipart = True
66 67
     multipart_num_threads = 4
67 68
     multipart_chunk_size = 5242880
68 69
     # List of checks to be performed for 'sync'
... ...
@@ -46,10 +46,8 @@ class ThreadPool(object):
46 46
 
47 47
 class MultiPartUpload(object):
48 48
 
49
-    MIN_CHUNK_SIZE = 5242880 # 5MB
50 49
     MAX_CHUNK_SIZE = 5368709120 # 5GB
51 50
     MAX_CHUNKS = 100
52
-    MAX_FILE_SIZE = 42949672960 # 5TB
53 51
 
54 52
     def __init__(self, s3, file, uri):
55 53
         self.s3 = s3
... ...
@@ -79,7 +77,6 @@ class MultiPartUpload(object):
79 79
         if not self.upload_id:
80 80
             raise RuntimeError("Attempting to use a multipart upload that has not been initiated.")
81 81
 
82
-        chunk_size = max(self.MIN_CHUNK_SIZE, chunk_size)
83 82
         id = 1
84 83
         if num_threads > 1:
85 84
             debug("MultiPart: Uploading in %d threads" % num_threads)
... ...
@@ -335,7 +335,7 @@ class S3(object):
335 335
 
336 336
         return response
337 337
 
338
-    def object_put(self, filename, uri, extra_headers = None, extra_label = "", multipart = False):
338
+    def object_put(self, filename, uri, extra_headers = None, extra_label = ""):
339 339
         # TODO TODO
340 340
         # Make it consistent with stream-oriented object_get()
341 341
         if uri.type != "s3":
... ...
@@ -353,8 +353,9 @@ class S3(object):
353 353
         if extra_headers:
354 354
             headers.update(extra_headers)
355 355
 
356
-        if not multipart:
357
-            if size > 104857600: # 100MB
356
+        multipart = False
357
+        if self.config.enable_multipart:
358
+            if size > 2 * self.config.multipart_chunk_size:
358 359
                 multipart = True
359 360
 
360 361
         if multipart:
... ...
@@ -744,18 +745,11 @@ class S3(object):
744 744
         upload = MultiPartUpload(self, file, uri)
745 745
         bucket, key, upload_id = upload.initiate_multipart_upload()
746 746
 
747
-        num_threads = self.config.multipart_num_threads or 4
747
+        num_threads = self.config.multipart_num_threads
748
+        chunk_size = self.config.multipart_chunk_size or MultiPartUpload.MIN_CHUNK_SIZE
748 749
 
749
-        if size > MultiPartUpload.MAX_FILE_SIZE:
750
-            raise RuntimeError("File is too large (%i bytes, max %i)" % (size, MultiPartUpload.MAX_FILE_SIZE))
751
-        elif size > 107374182400: # 100GB
752
-            chunk_size = size / 10000
753
-        elif size > 10737418240: # 10GB
754
-            chunk_size = size / 1000
755
-        elif size > 1073741824: # 1GB
756
-            chunk_size = size / 100
757
-        else:
758
-            chunk_size = self.config.multipart_chunk_size or MultiPartUpload.MIN_CHUNK_SIZE
750
+        if chunk_size > MultiPartUpload.MAX_CHUNK_SIZE:
751
+            raise RuntimeError("Chunk size is too large (%i bytes, max %i). Please adjust with --multipart-chunk-size=SIZE." % (size, MultiPartUpload.MAX_CHUNK_SIZE))
759 752
 
760 753
         file.seek(0)
761 754
         upload.upload_all_parts(num_threads, chunk_size)
... ...
@@ -241,7 +241,7 @@ def cmd_bucket_delete(args):
241 241
         _bucket_delete_one(uri)
242 242
         output(u"Bucket '%s' removed" % uri.uri())
243 243
 
244
-def cmd_object_put(args, multipart):
244
+def cmd_object_put(args):
245 245
     cfg = Config()
246 246
     s3 = S3(cfg)
247 247
 
... ...
@@ -296,7 +296,7 @@ def cmd_object_put(args, multipart):
296 296
         if Config().encrypt:
297 297
             exitcode, full_name, extra_headers["x-amz-meta-s3tools-gpgenc"] = gpg_encrypt(full_name_orig)
298 298
         try:
299
-            response = s3.object_put(full_name, uri_final, extra_headers, extra_label = seq_label, multipart = multipart)
299
+            response = s3.object_put(full_name, uri_final, extra_headers, extra_label = seq_label)
300 300
         except S3UploadError, e:
301 301
             error(u"Upload of '%s' failed too many times. Skipping that file." % full_name_orig)
302 302
             continue
... ...
@@ -1526,7 +1526,9 @@ def main():
1526 1526
     optparser.add_option(      "--encoding", dest="encoding", metavar="ENCODING", help="Override autodetected terminal and filesystem encoding (character set). Autodetected: %s" % preferred_encoding)
1527 1527
     optparser.add_option(      "--verbatim", dest="urlencoding_mode", action="store_const", const="verbatim", help="Use the S3 name as given on the command line. No pre-processing, encoding, etc. Use with caution!")
1528 1528
 
1529
-    optparser.add_option(      "--enable-multipart", dest="enable_multipart", action="store_true", default=False, help="Force multipart upload on files < 100MB")
1529
+    optparser.add_option(      "--disable-multipart", dest="enable_multipart", action="store_false", help="Disable multipart upload on files bigger than --multipart-chunk-size")
1530
+    optparser.add_option(      "--multipart-chunk-size", dest="multipart_chunk_size", action="store", metavar="SIZE", help="Size of each chunk of a multipart upload. Files bigger than 2*SIZE are automatically uploaded as multithreaded-multipart, smaller files are uploaded using the traditional method. SIZE is in Bytes.")
1531
+
1530 1532
     optparser.add_option(      "--list-md5", dest="list_md5", action="store_true", help="Include MD5 sums in bucket listings (only for 'ls' command).")
1531 1533
     optparser.add_option("-H", "--human-readable-sizes", dest="human_readable_sizes", action="store_true", help="Print sizes in human readable form (eg 1kB instead of 1234).")
1532 1534
 
... ...
@@ -1730,10 +1732,7 @@ def main():
1730 1730
         sys.exit(1)
1731 1731
 
1732 1732
     try:
1733
-        if cmd_func == cmd_object_put:
1734
-            cmd_func(args, options.enable_multipart)
1735
-        else:
1736
-            cmd_func(args)
1733
+        cmd_func(args)
1737 1734
     except S3Error, e:
1738 1735
         error(u"S3 error: %s" % e)
1739 1736
         sys.exit(1)