GitList

Browse code

Cleanup: pass enable_multipart via Config()

Remove all the newly introduced parameters for passing enable_multipart
and keep it in Config() instead.

Also renames --enable-multipart to --disable-multipart and
introduces --multipart-chunk-size=SIZE parameter.

Michal Ludvig authored on 2012/01/02 20:40:38
Showing 4 changed files

S3/Config.py index eb53f48..6f4ebd7 100644
S3/MultiPart.py index e37c55e..79c78fd 100644
S3/S3.py index b3b23e5..20af165 100644
s3cmd index 1f4ff49..c1d8ca1 100755

S3/Config.py

History View file @ 880e0de

@@ -63,6 +63,7 @@ class Config(object):
                          default_mime_type = "binary/octet-stream"
                          guess_mime_type = False
                          mime_type = ""
                     +    enable_multipart = True
                          multipart_num_threads = 4
                          multipart_chunk_size = 5242880
                          # List of checks to be performed for 'sync'

S3/MultiPart.py

History View file @ 880e0de

@@ -46,10 +46,8 @@ class ThreadPool(object):
                      class MultiPartUpload(object):
                     -    MIN_CHUNK_SIZE = 5242880 # 5MB
                          MAX_CHUNK_SIZE = 5368709120 # 5GB
                          MAX_CHUNKS = 100
                     -    MAX_FILE_SIZE = 42949672960 # 5TB
                          def __init__(self, s3, file, uri):
                              self.s3 = s3
@@ -79,7 +77,6 @@ class MultiPartUpload(object):
                              if not self.upload_id:
                                  raise RuntimeError("Attempting to use a multipart upload that has not been initiated.")
                     -        chunk_size = max(self.MIN_CHUNK_SIZE, chunk_size)
                              id = 1
                              if num_threads > 1:
                                  debug("MultiPart: Uploading in %d threads" % num_threads)

S3/S3.py

History View file @ 880e0de

@@ -335,7 +335,7 @@ class S3(object):
                              return response
                     -    def object_put(self, filename, uri, extra_headers = None, extra_label = "", multipart = False):
                     +    def object_put(self, filename, uri, extra_headers = None, extra_label = ""):
                              # TODO TODO
                              # Make it consistent with stream-oriented object_get()
                              if uri.type != "s3":
@@ -353,8 +353,9 @@ class S3(object):
                              if extra_headers:
                                  headers.update(extra_headers)
                     -        if not multipart:
                     -            if size > 104857600: # 100MB
                     +        multipart = False
                     +        if self.config.enable_multipart:
                     +            if size > 2 * self.config.multipart_chunk_size:
                                      multipart = True
                              if multipart:
@@ -744,18 +745,11 @@ class S3(object):
                              upload = MultiPartUpload(self, file, uri)
                              bucket, key, upload_id = upload.initiate_multipart_upload()
                     -        num_threads = self.config.multipart_num_threads or 4
                     +        num_threads = self.config.multipart_num_threads
                     +        chunk_size = self.config.multipart_chunk_size or MultiPartUpload.MIN_CHUNK_SIZE
                     -        if size > MultiPartUpload.MAX_FILE_SIZE:
                     -            raise RuntimeError("File is too large (%i bytes, max %i)" % (size, MultiPartUpload.MAX_FILE_SIZE))
                     -        elif size > 107374182400: # 100GB
                     -            chunk_size = size / 10000
                     -        elif size > 10737418240: # 10GB
                     -            chunk_size = size / 1000
                     -        elif size > 1073741824: # 1GB
                     -            chunk_size = size / 100
                     -        else:
                     -            chunk_size = self.config.multipart_chunk_size or MultiPartUpload.MIN_CHUNK_SIZE
                     +        if chunk_size > MultiPartUpload.MAX_CHUNK_SIZE:
                     +            raise RuntimeError("Chunk size is too large (%i bytes, max %i). Please adjust with --multipart-chunk-size=SIZE." % (size, MultiPartUpload.MAX_CHUNK_SIZE))
                              file.seek(0)
                              upload.upload_all_parts(num_threads, chunk_size)

s3cmd

History View file @ 880e0de

@@ -241,7 +241,7 @@ def cmd_bucket_delete(args):
                              _bucket_delete_one(uri)
                              output(u"Bucket '%s' removed" % uri.uri())
                     -def cmd_object_put(args, multipart):
                     +def cmd_object_put(args):
                          cfg = Config()
                          s3 = S3(cfg)
@@ -296,7 +296,7 @@ def cmd_object_put(args, multipart):
                              if Config().encrypt:
                                  exitcode, full_name, extra_headers["x-amz-meta-s3tools-gpgenc"] = gpg_encrypt(full_name_orig)
                              try:
                     -            response = s3.object_put(full_name, uri_final, extra_headers, extra_label = seq_label, multipart = multipart)
                     +            response = s3.object_put(full_name, uri_final, extra_headers, extra_label = seq_label)
                              except S3UploadError, e:
                                  error(u"Upload of '%s' failed too many times. Skipping that file." % full_name_orig)
                                  continue
@@ -1526,7 +1526,9 @@ def main():
                          optparser.add_option(      "--encoding", dest="encoding", metavar="ENCODING", help="Override autodetected terminal and filesystem encoding (character set). Autodetected: %s" % preferred_encoding)
                          optparser.add_option(      "--verbatim", dest="urlencoding_mode", action="store_const", const="verbatim", help="Use the S3 name as given on the command line. No pre-processing, encoding, etc. Use with caution!")
                     -    optparser.add_option(      "--enable-multipart", dest="enable_multipart", action="store_true", default=False, help="Force multipart upload on files < 100MB")
                     +    optparser.add_option(      "--disable-multipart", dest="enable_multipart", action="store_false", help="Disable multipart upload on files bigger than --multipart-chunk-size")
                     +    optparser.add_option(      "--multipart-chunk-size", dest="multipart_chunk_size", action="store", metavar="SIZE", help="Size of each chunk of a multipart upload. Files bigger than 2*SIZE are automatically uploaded as multithreaded-multipart, smaller files are uploaded using the traditional method. SIZE is in Bytes.")
+                    +
                          optparser.add_option(      "--list-md5", dest="list_md5", action="store_true", help="Include MD5 sums in bucket listings (only for 'ls' command).")
                          optparser.add_option("-H", "--human-readable-sizes", dest="human_readable_sizes", action="store_true", help="Print sizes in human readable form (eg 1kB instead of 1234).")
@@ -1730,10 +1732,7 @@ def main():
                              sys.exit(1)
                          try:
                     -        if cmd_func == cmd_object_put:
                     -            cmd_func(args, options.enable_multipart)
                     -        else:
                     -            cmd_func(args)
                     +        cmd_func(args)
                          except S3Error, e:
                              error(u"S3 error: %s" % e)
                              sys.exit(1)