Browse code

Properly handle multipart chunk sizes

Michal Ludvig authored on 2012/01/02 21:48:49
Showing 3 changed files
... ...
@@ -46,8 +46,10 @@ class ThreadPool(object):
46 46
 
47 47
 class MultiPartUpload(object):
48 48
 
49
-    MAX_CHUNK_SIZE = 5368709120 # 5GB
49
+    MIN_CHUNK_SIZE_MB = 5       # 5MB
50
+    MAX_CHUNK_SIZE_MB = 5120    # 5GB
50 51
     MAX_CHUNKS = 100
52
+    MAX_FILE_SIZE = 42949672960 # 5TB
51 53
 
52 54
     def __init__(self, s3, file, uri):
53 55
         self.s3 = s3
... ...
@@ -355,7 +355,7 @@ class S3(object):
355 355
 
356 356
         multipart = False
357 357
         if self.config.enable_multipart:
358
-            if size > 2 * self.config.multipart_chunk_size:
358
+            if size > self.config.multipart_chunk_size:
359 359
                 multipart = True
360 360
 
361 361
         if multipart:
... ...
@@ -746,10 +746,7 @@ class S3(object):
746 746
         bucket, key, upload_id = upload.initiate_multipart_upload()
747 747
 
748 748
         num_threads = self.config.multipart_num_threads
749
-        chunk_size = self.config.multipart_chunk_size or MultiPartUpload.MIN_CHUNK_SIZE
750
-
751
-        if chunk_size > MultiPartUpload.MAX_CHUNK_SIZE:
752
-            raise RuntimeError("Chunk size is too large (%i bytes, max %i). Please adjust with --multipart-chunk-size=SIZE." % (size, MultiPartUpload.MAX_CHUNK_SIZE))
749
+        chunk_size = self.config.multipart_chunk_size
753 750
 
754 751
         file.seek(0)
755 752
         upload.upload_all_parts(num_threads, chunk_size)
... ...
@@ -1527,7 +1527,7 @@ def main():
1527 1527
     optparser.add_option(      "--verbatim", dest="urlencoding_mode", action="store_const", const="verbatim", help="Use the S3 name as given on the command line. No pre-processing, encoding, etc. Use with caution!")
1528 1528
 
1529 1529
     optparser.add_option(      "--disable-multipart", dest="enable_multipart", action="store_false", help="Disable multipart upload on files bigger than --multipart-chunk-size")
1530
-    optparser.add_option(      "--multipart-chunk-size", dest="multipart_chunk_size", action="store", metavar="SIZE", help="Size of each chunk of a multipart upload. Files bigger than 2*SIZE are automatically uploaded as multithreaded-multipart, smaller files are uploaded using the traditional method. SIZE is in Bytes.")
1530
+    optparser.add_option(      "--multipart-chunk-size", dest="multipart_chunk_size", type="int", action="store", metavar="SIZE", help="Size of each chunk of a multipart upload. Files bigger than SIZE are automatically uploaded as multithreaded-multipart, smaller files are uploaded using the traditional method. SIZE is in Mega-Bytes, default chunk size is %defaultMB, minimum allowed chunk size is 5MB, maximum is 5GB.")
1531 1531
 
1532 1532
     optparser.add_option(      "--list-md5", dest="list_md5", action="store_true", help="Include MD5 sums in bucket listings (only for 'ls' command).")
1533 1533
     optparser.add_option("-H", "--human-readable-sizes", dest="human_readable_sizes", action="store_true", help="Print sizes in human readable form (eg 1kB instead of 1234).")
... ...
@@ -1652,6 +1652,12 @@ def main():
1652 1652
     cfg.update_option("enable", options.enable)
1653 1653
     cfg.update_option("acl_public", options.acl_public)
1654 1654
 
1655
+    ## Check multipart chunk constraints
1656
+    if cfg.multipart_chunk_size < MultiPartUpload.MIN_CHUNK_SIZE_MB:
1657
+        raise ParameterError("Chunk size %d MB is too small, must be >= %d MB. Please adjust --multipart-chunk-size" % (cfg.multipart_chunk_size, MultiPartUpload.MIN_CHUNK_SIZE_MB))
1658
+    if cfg.multipart_chunk_size > MultiPartUpload.MAX_CHUNK_SIZE_MB:
1659
+        raise ParameterError("Chunk size %d MB is too large, must be <= %d MB. Please adjust --multipart-chunk-size" % (cfg.multipart_chunk_size, MultiPartUpload.MAX_CHUNK_SIZE_MB))
1660
+
1655 1661
     ## CloudFront's cf_enable and Config's enable share the same --enable switch
1656 1662
     options.cf_enable = options.enable
1657 1663
 
... ...
@@ -1789,6 +1795,7 @@ if __name__ == '__main__':
1789 1789
         from S3.CloudFront import Cmd as CfCmd
1790 1790
         from S3.CloudFront import CloudFront
1791 1791
         from S3.FileLists import *
1792
+        from S3.MultiPart import MultiPartUpload
1792 1793
 
1793 1794
         main()
1794 1795
         sys.exit(0)