| ... | ... |
@@ -13,12 +13,14 @@ from .S3Uri import S3UriS3 |
| 13 | 13 |
from .Utils import (getTextFromXml, getTreeFromXml, formatSize, |
| 14 | 14 |
calculateChecksum, parseNodes, s3_quote) |
| 15 | 15 |
|
| 16 |
+SIZE_1MB = 1024 * 1024 |
|
| 17 |
+ |
|
| 16 | 18 |
|
| 17 | 19 |
class MultiPartUpload(object): |
| 18 | 20 |
"""Supports MultiPartUpload and MultiPartUpload(Copy) operation""" |
| 19 | 21 |
MIN_CHUNK_SIZE_MB = 5 # 5MB |
| 20 |
- MAX_CHUNK_SIZE_MB = 5120 # 5GB |
|
| 21 |
- MAX_FILE_SIZE = 42949672960 # 5TB |
|
| 22 |
+ MAX_CHUNK_SIZE_MB = 5 * 1024 # 5GB |
|
| 23 |
+ MAX_FILE_SIZE = 5 * 1024 * 1024 # 5TB |
|
| 22 | 24 |
|
| 23 | 25 |
def __init__(self, s3, src, dst_uri, headers_baseline=None, |
| 24 | 26 |
src_size=None): |
| ... | ... |
@@ -36,11 +38,11 @@ class MultiPartUpload(object): |
| 36 | 36 |
if not src_size: |
| 37 | 37 |
raise ParameterError("Source size is missing for "
|
| 38 | 38 |
"MultipartUploadCopy operation") |
| 39 |
- c_size = self.s3.config.multipart_copy_chunk_size_mb * 1024 * 1024 |
|
| 39 |
+ c_size = self.s3.config.multipart_copy_chunk_size_mb * SIZE_1MB |
|
| 40 | 40 |
else: |
| 41 | 41 |
# Source is a file_stream to upload |
| 42 | 42 |
self.file_stream = src |
| 43 |
- c_size = self.s3.config.multipart_chunk_size_mb * 1024 * 1024 |
|
| 43 |
+ c_size = self.s3.config.multipart_chunk_size_mb * SIZE_1MB |
|
| 44 | 44 |
|
| 45 | 45 |
self.chunk_size = c_size |
| 46 | 46 |
self.upload_id = self.initiate_multipart_upload() |
| ... | ... |
@@ -127,8 +127,9 @@ def mime_magic(file): |
| 127 | 127 |
result = (None, None) |
| 128 | 128 |
return result |
| 129 | 129 |
|
| 130 |
-EXPECT_CONTINUE_TIMEOUT = 2 |
|
| 131 | 130 |
|
| 131 |
+EXPECT_CONTINUE_TIMEOUT = 2 |
|
| 132 |
+SIZE_1MB = 1024 * 1024 |
|
| 132 | 133 |
|
| 133 | 134 |
__all__ = [] |
| 134 | 135 |
class S3Request(object): |
| ... | ... |
@@ -681,9 +682,9 @@ class S3(object): |
| 681 | 681 |
if not self.config.enable_multipart and filename == "-": |
| 682 | 682 |
raise ParameterError("Multi-part upload is required to upload from stdin")
|
| 683 | 683 |
if self.config.enable_multipart: |
| 684 |
- if size > self.config.multipart_chunk_size_mb * 1024 * 1024 or filename == "-": |
|
| 684 |
+ if size > self.config.multipart_chunk_size_mb * SIZE_1MB or filename == "-": |
|
| 685 | 685 |
multipart = True |
| 686 |
- if size > self.config.multipart_max_chunks * self.config.multipart_chunk_size_mb * 1024 * 1024: |
|
| 686 |
+ if size > self.config.multipart_max_chunks * self.config.multipart_chunk_size_mb * SIZE_1MB: |
|
| 687 | 687 |
raise ParameterError("Chunk size %d MB results in more than %d chunks. Please increase --multipart-chunk-size-mb" % \
|
| 688 | 688 |
(self.config.multipart_chunk_size_mb, self.config.multipart_max_chunks)) |
| 689 | 689 |
if multipart: |
| ... | ... |
@@ -816,7 +817,7 @@ class S3(object): |
| 816 | 816 |
return headers |
| 817 | 817 |
|
| 818 | 818 |
def object_copy(self, src_uri, dst_uri, extra_headers=None, |
| 819 |
- extra_label=""): |
|
| 819 |
+ src_size=None, extra_label=""): |
|
| 820 | 820 |
if src_uri.type != "s3": |
| 821 | 821 |
raise ValueError("Expected URI type 's3', got '%s'" % src_uri.type)
|
| 822 | 822 |
if dst_uri.type != "s3": |
| ... | ... |
@@ -830,7 +831,7 @@ class S3(object): |
| 830 | 830 |
if exc.status != 501: |
| 831 | 831 |
raise exc |
| 832 | 832 |
acl = None |
| 833 |
- headers = SortedDict(ignore_case = True) |
|
| 833 |
+ headers = SortedDict(ignore_case=True) |
|
| 834 | 834 |
|
| 835 | 835 |
if self.config.acl_public: |
| 836 | 836 |
headers["x-amz-acl"] = "public-read" |
| ... | ... |
@@ -850,15 +851,18 @@ class S3(object): |
| 850 | 850 |
if extra_headers: |
| 851 | 851 |
headers.update(extra_headers) |
| 852 | 852 |
|
| 853 |
- ## Multipart decision - only do multipart copy for remote s3 files > 5gb |
|
| 853 |
+ # Multipart decision. Only do multipart copy for remote s3 files |
|
| 854 |
+ # bigger than the multipart copy threshlod. |
|
| 854 | 855 |
if self.config.enable_multipart: |
| 855 |
- # get size of remote src only if multipart is enabled |
|
| 856 |
- src_info = self.object_info(src_uri) |
|
| 857 |
- size = int(src_info["headers"]["content-length"]) |
|
| 856 |
+ # get size of remote src only if multipart is enabled and no size |
|
| 857 |
+ # info was provided |
|
| 858 |
+ if src_size is None: |
|
| 859 |
+ src_info = self.object_info(src_uri) |
|
| 860 |
+ src_size = int(src_info["headers"]["content-length"]) |
|
| 858 | 861 |
|
| 859 |
- if size > self.config.multipart_copy_chunk_size_mb * 1024 * 1024: |
|
| 862 |
+ if src_size > self.config.multipart_copy_chunk_size_mb * SIZE_1MB: |
|
| 860 | 863 |
# Multipart requests are quite different... drop here |
| 861 |
- return self.copy_file_multipart(src_uri, dst_uri, size, |
|
| 864 |
+ return self.copy_file_multipart(src_uri, dst_uri, src_size, |
|
| 862 | 865 |
headers, extra_label) |
| 863 | 866 |
|
| 864 | 867 |
## Not multipart... |
| ... | ... |
@@ -872,7 +876,7 @@ class S3(object): |
| 872 | 872 |
headers=headers) |
| 873 | 873 |
response = self.send_request(request) |
| 874 | 874 |
if response["data"] and getRootTagName(response["data"]) == "Error": |
| 875 |
- #http://doc.s3.amazonaws.com/proposals/copy.html |
|
| 875 |
+ # http://doc.s3.amazonaws.com/proposals/copy.html |
|
| 876 | 876 |
# Error during copy, status will be 200, so force error code 500 |
| 877 | 877 |
response["status"] = 500 |
| 878 | 878 |
error("Server error during the COPY operation. Overwrite response "
|
| ... | ... |
@@ -890,7 +894,7 @@ class S3(object): |
| 890 | 890 |
return response |
| 891 | 891 |
|
| 892 | 892 |
def object_modify(self, src_uri, dst_uri, extra_headers=None, |
| 893 |
- extra_label=""): |
|
| 893 |
+ src_size=None, extra_label=""): |
|
| 894 | 894 |
|
| 895 | 895 |
if src_uri.type != "s3": |
| 896 | 896 |
raise ValueError("Expected URI type 's3', got '%s'" % src_uri.type)
|
| ... | ... |
@@ -957,12 +961,13 @@ class S3(object): |
| 957 | 957 |
return response |
| 958 | 958 |
|
| 959 | 959 |
def object_move(self, src_uri, dst_uri, extra_headers=None, |
| 960 |
- extra_label=""): |
|
| 961 |
- response_copy = self.object_copy(src_uri, dst_uri, extra_headers) |
|
| 960 |
+ src_size=None, extra_label=""): |
|
| 961 |
+ response_copy = self.object_copy(src_uri, dst_uri, extra_headers, |
|
| 962 |
+ src_size, extra_label) |
|
| 962 | 963 |
debug("Object %s copied to %s" % (src_uri, dst_uri))
|
| 963 | 964 |
if not response_copy["data"] \ |
| 964 | 965 |
or getRootTagName(response_copy["data"]) \ |
| 965 |
- in ["CopyObjectResult", "CompleteMultipartUploadResult"]: |
|
| 966 |
+ in ["CopyObjectResult", "CompleteMultipartUploadResult"]: |
|
| 966 | 967 |
self.object_delete(src_uri) |
| 967 | 968 |
debug("Object '%s' deleted", src_uri)
|
| 968 | 969 |
else: |
| ... | ... |
@@ -831,19 +831,23 @@ def subcmd_cp_mv(args, process_fce, action_str, message): |
| 831 | 831 |
cfg = Config() |
| 832 | 832 |
if action_str == 'modify': |
| 833 | 833 |
if len(args) < 1: |
| 834 |
- raise ParameterError("Expecting one or more S3 URIs for " + action_str)
|
|
| 834 |
+ raise ParameterError("Expecting one or more S3 URIs for "
|
|
| 835 |
+ + action_str) |
|
| 835 | 836 |
destination_base = None |
| 836 | 837 |
else: |
| 837 | 838 |
if len(args) < 2: |
| 838 |
- raise ParameterError("Expecting two or more S3 URIs for " + action_str)
|
|
| 839 |
+ raise ParameterError("Expecting two or more S3 URIs for "
|
|
| 840 |
+ + action_str) |
|
| 839 | 841 |
dst_base_uri = S3Uri(args.pop()) |
| 840 | 842 |
if dst_base_uri.type != "s3": |
| 841 |
- raise ParameterError("Destination must be S3 URI. To download a file use 'get' or 'sync'.")
|
|
| 843 |
+ raise ParameterError("Destination must be S3 URI. To download a "
|
|
| 844 |
+ "file use 'get' or 'sync'.") |
|
| 842 | 845 |
destination_base = dst_base_uri.uri() |
| 843 | 846 |
|
| 844 | 847 |
scoreboard = ExitScoreboard() |
| 845 | 848 |
|
| 846 |
- remote_list, exclude_list, remote_total_size = fetch_remote_list(args, require_attribs = False) |
|
| 849 |
+ remote_list, exclude_list, remote_total_size = \ |
|
| 850 |
+ fetch_remote_list(args, require_attribs=False) |
|
| 847 | 851 |
|
| 848 | 852 |
remote_count = len(remote_list) |
| 849 | 853 |
|
| ... | ... |
@@ -854,7 +858,9 @@ def subcmd_cp_mv(args, process_fce, action_str, message): |
| 854 | 854 |
# so we don't need to test for it here. |
| 855 | 855 |
if not destination_base.endswith('/') \
|
| 856 | 856 |
and (len(remote_list) > 1 or cfg.recursive): |
| 857 |
- raise ParameterError("Destination must be a directory and end with '/' when acting on a folder content or on multiple sources.")
|
|
| 857 |
+ raise ParameterError("Destination must be a directory and end with"
|
|
| 858 |
+ " '/' when acting on a folder content or on " |
|
| 859 |
+ "multiple sources.") |
|
| 858 | 860 |
|
| 859 | 861 |
if cfg.recursive: |
| 860 | 862 |
for key in remote_list: |
| ... | ... |
@@ -873,7 +879,9 @@ def subcmd_cp_mv(args, process_fce, action_str, message): |
| 873 | 873 |
for key in exclude_list: |
| 874 | 874 |
output(u"exclude: %s" % key) |
| 875 | 875 |
for key in remote_list: |
| 876 |
- output(u"%s: '%s' -> '%s'" % (action_str, remote_list[key]['object_uri_str'], remote_list[key]['dest_name'])) |
|
| 876 |
+ output(u"%s: '%s' -> '%s'" % (action_str, |
|
| 877 |
+ remote_list[key]['object_uri_str'], |
|
| 878 |
+ remote_list[key]['dest_name'])) |
|
| 877 | 879 |
|
| 878 | 880 |
warning(u"Exiting now because of --dry-run") |
| 879 | 881 |
return EX_OK |
| ... | ... |
@@ -886,10 +894,12 @@ def subcmd_cp_mv(args, process_fce, action_str, message): |
| 886 | 886 |
item = remote_list[key] |
| 887 | 887 |
src_uri = S3Uri(item['object_uri_str']) |
| 888 | 888 |
dst_uri = S3Uri(item['dest_name']) |
| 889 |
+ src_size = item.get('size')
|
|
| 889 | 890 |
|
| 890 | 891 |
extra_headers = copy(cfg.extra_headers) |
| 891 | 892 |
try: |
| 892 | 893 |
response = process_fce(src_uri, dst_uri, extra_headers, |
| 894 |
+ src_size=src_size, |
|
| 893 | 895 |
extra_label=seq_label) |
| 894 | 896 |
output(message % {"src": src_uri, "dst": dst_uri,
|
| 895 | 897 |
"extra": seq_label}) |
| ... | ... |
@@ -902,12 +912,14 @@ def subcmd_cp_mv(args, process_fce, action_str, message): |
| 902 | 902 |
warning(u"Key not found %s" % item['object_uri_str']) |
| 903 | 903 |
else: |
| 904 | 904 |
scoreboard.failed() |
| 905 |
- if cfg.stop_on_error: break |
|
| 905 |
+ if cfg.stop_on_error: |
|
| 906 |
+ break |
|
| 906 | 907 |
return scoreboard.rc() |
| 907 | 908 |
|
| 908 | 909 |
def cmd_cp(args): |
| 909 | 910 |
s3 = S3(Config()) |
| 910 |
- return subcmd_cp_mv(args, s3.object_copy, "copy", u"remote copy: '%(src)s' -> '%(dst)s' %(extra)s") |
|
| 911 |
+ return subcmd_cp_mv(args, s3.object_copy, "copy", |
|
| 912 |
+ u"remote copy: '%(src)s' -> '%(dst)s' %(extra)s") |
|
| 911 | 913 |
|
| 912 | 914 |
def cmd_modify(args): |
| 913 | 915 |
s3 = S3(Config()) |
| ... | ... |
@@ -916,7 +928,8 @@ def cmd_modify(args): |
| 916 | 916 |
|
| 917 | 917 |
def cmd_mv(args): |
| 918 | 918 |
s3 = S3(Config()) |
| 919 |
- return subcmd_cp_mv(args, s3.object_move, "move", u"move: '%(src)s' -> '%(dst)s' %(extra)s") |
|
| 919 |
+ return subcmd_cp_mv(args, s3.object_move, "move", |
|
| 920 |
+ u"move: '%(src)s' -> '%(dst)s' %(extra)s") |
|
| 920 | 921 |
|
| 921 | 922 |
def cmd_info(args): |
| 922 | 923 |
cfg = Config() |
| ... | ... |
@@ -1112,10 +1125,12 @@ def cmd_sync_remote2remote(args): |
| 1112 | 1112 |
item = src_list[file] |
| 1113 | 1113 |
src_uri = S3Uri(item['object_uri_str']) |
| 1114 | 1114 |
dst_uri = S3Uri(item['target_uri']) |
| 1115 |
+ src_size = item.get('size')
|
|
| 1115 | 1116 |
seq_label = "[%d of %d]" % (seq, src_count) |
| 1116 | 1117 |
extra_headers = copy(cfg.extra_headers) |
| 1117 | 1118 |
try: |
| 1118 | 1119 |
response = s3.object_copy(src_uri, dst_uri, extra_headers, |
| 1120 |
+ src_size=src_size, |
|
| 1119 | 1121 |
extra_label=seq_label) |
| 1120 | 1122 |
output(u"remote copy: '%s' -> '%s' %s" % |
| 1121 | 1123 |
(src_uri, dst_uri, seq_label)) |
| ... | ... |
@@ -1561,13 +1576,15 @@ def remote_copy(s3, copy_pairs, destination_base, uploaded_objects_list=None): |
| 1561 | 1561 |
debug(u"Remote Copying from %s to %s" % (dst1, dst2)) |
| 1562 | 1562 |
dst1_uri = S3Uri(destination_base + dst1) |
| 1563 | 1563 |
dst2_uri = S3Uri(destination_base + dst2) |
| 1564 |
+ src_obj_size = src_obj.get(u'size', 0) |
|
| 1564 | 1565 |
seq_label = "[%d of %d]" % (seq, src_count) |
| 1565 | 1566 |
extra_headers = copy(cfg.extra_headers) |
| 1566 | 1567 |
try: |
| 1567 | 1568 |
s3.object_copy(dst1_uri, dst2_uri, extra_headers, |
| 1569 |
+ src_size=src_obj_size, |
|
| 1568 | 1570 |
extra_label=seq_label) |
| 1569 | 1571 |
output(u"remote copy: '%s' -> '%s' %s" % (dst1, dst2, seq_label)) |
| 1570 |
- saved_bytes += src_obj.get(u'size', 0) |
|
| 1572 |
+ saved_bytes += src_obj_size |
|
| 1571 | 1573 |
if uploaded_objects_list is not None: |
| 1572 | 1574 |
uploaded_objects_list.append(dst2) |
| 1573 | 1575 |
except Exception: |