Browse code

Refactored object_copy to avoid duplicated code in object_modify

Florent Viard authored on 2020/04/17 05:57:30
Showing 1 changed files
... ...
@@ -132,6 +132,7 @@ EXPECT_CONTINUE_TIMEOUT = 2
132 132
 SIZE_1MB = 1024 * 1024
133 133
 
134 134
 __all__ = []
135
+
135 136
 class S3Request(object):
136 137
     region_map = {}
137 138
     ## S3 sometimes sends HTTP-301, HTTP-307 response
... ...
@@ -820,7 +821,7 @@ class S3(object):
820 820
         return headers
821 821
 
822 822
     def object_copy(self, src_uri, dst_uri, extra_headers=None,
823
-                    src_size=None, extra_label=""):
823
+                    src_size=None, extra_label="", replace_meta=False):
824 824
         if src_uri.type != "s3":
825 825
             raise ValueError("Expected URI type 's3', got '%s'" % src_uri.type)
826 826
         if dst_uri.type != "s3":
... ...
@@ -836,16 +837,34 @@ class S3(object):
836 836
                 acl = None
837 837
 
838 838
         multipart = False
839
+
840
+        headers = None
841
+        if replace_meta:
842
+            src_info = self.object_info(src_uri)
843
+            headers = src_info['headers']
844
+            src_size = int(headers["content-length"])
845
+
839 846
         if self.config.enable_multipart:
840 847
             # Get size of remote source only if multipart is enabled and that no
841 848
             # size info was provided
842
-            src_headers = None
849
+            src_headers = headers
843 850
             if src_size is None:
844 851
                 src_info = self.object_info(src_uri)
845 852
                 src_headers = src_info['headers']
846 853
                 src_size = int(src_headers["content-length"])
847 854
 
848
-            if src_size > self.config.multipart_copy_chunk_size_mb * SIZE_1MB:
855
+            # If we are over the grand maximum size for a normal copy/modify
856
+            # (> 5GB) go nuclear and use multipart copy as the only option to
857
+            # modify an object.
858
+            # Reason is an aws s3 design bug. See:
859
+            # https://github.com/aws/aws-sdk-java/issues/367
860
+            if src_uri is dst_uri:
861
+                # optimisation in the case of modify
862
+                threshold = MultiPartUpload.MAX_CHUNK_SIZE_MB * SIZE_1MB
863
+            else:
864
+                threshold = self.config.multipart_copy_chunk_size_mb * SIZE_1MB
865
+
866
+            if src_size > threshold:
849 867
                 # Sadly, s3 is badly done as metadata will not be copied in
850 868
                 # multipart copy unlike what is done in the case of direct
851 869
                 # copy.
... ...
@@ -855,11 +874,13 @@ class S3(object):
855 855
                     src_info = self.object_info(src_uri)
856 856
                     src_headers = src_info['headers']
857 857
                     src_size = int(src_headers["content-length"])
858
-                self._sanitize_headers(src_headers)
859
-                headers = SortedDict(src_headers, ignore_case=True)
858
+                headers = src_headers
860 859
                 multipart = True
861 860
 
862
-        if not multipart:
861
+        if headers:
862
+            self._sanitize_headers(headers)
863
+            headers = SortedDict(headers, ignore_case=True)
864
+        else:
863 865
             headers = SortedDict(ignore_case=True)
864 866
 
865 867
         if self.config.acl_public:
... ...
@@ -883,11 +904,15 @@ class S3(object):
883 883
         if self.config.mime_type:
884 884
             headers["content-type"] = self.config.mime_type
885 885
 
886
-        headers['x-amz-metadata-directive'] = "COPY"
886
+        # "COPY" or "REPLACE"
887
+        if not replace_meta:
888
+            headers['x-amz-metadata-directive'] = "COPY"
889
+        else:
890
+            headers['x-amz-metadata-directive'] = "REPLACE"
887 891
 
888 892
         if multipart:
889 893
             # Multipart decision. Only do multipart copy for remote s3 files
890
-            # bigger than the multipart copy threshlod.
894
+            # bigger than the multipart copy threshold.
891 895
 
892 896
             # Multipart requests are quite different... delegate
893 897
             response = self.copy_file_multipart(src_uri, dst_uri, src_size,
... ...
@@ -922,86 +947,9 @@ class S3(object):
922 922
 
923 923
     def object_modify(self, src_uri, dst_uri, extra_headers=None,
924 924
                       src_size=None, extra_label=""):
925
-
926
-        if src_uri.type != "s3":
927
-            raise ValueError("Expected URI type 's3', got '%s'" % src_uri.type)
928
-        if dst_uri.type != "s3":
929
-            raise ValueError("Expected URI type 's3', got '%s'" % dst_uri.type)
930
-
931
-        info_response = self.object_info(src_uri)
932
-        headers = info_response['headers']
933
-        src_size = int(headers["content-length"])
934
-        headers = self._sanitize_headers(headers)
935
-
936
-        # If we are over the grand maximum size for a normal copy/modify
937
-        # (> 5GB) go nuclear, and use multipart copy as the only option to
938
-        # modify an object.
939
-        # We are sure that copy will run multipart has we are bigger than
940
-        # the maximum size for non multipart.
941
-        # Reason is aws s3 design bug. See:
942
-        # https://github.com/aws/aws-sdk-java/issues/367
943
-        if src_size > MultiPartUpload.MAX_CHUNK_SIZE_MB * SIZE_1MB \
944
-           and self.config.enable_multipart:
945
-            return self.object_copy(src_uri, src_uri, extra_headers, src_size,
946
-                                    extra_label)
947
-
948
-        if self.config.acl_public is None:
949
-            try:
950
-                acl = self.get_acl(src_uri)
951
-            except S3Error as exc:
952
-                # Ignore the exception and don't fail the modify
953
-                # if the server doesn't support setting ACLs
954
-                if exc.status != 501:
955
-                    raise exc
956
-                acl = None
957
-
958
-        headers['x-amz-copy-source'] = s3_quote("/%s/%s" % (src_uri.bucket(),
959
-                                                            src_uri.object()),
960
-                                                quote_backslashes=False,
961
-                                                unicode_output=True)
962
-        headers['x-amz-metadata-directive'] = "REPLACE"
963
-
964
-        # cannot change between standard and reduced redundancy with a REPLACE.
965
-
966
-        if self.config.acl_public:
967
-            headers["x-amz-acl"] = "public-read"
968
-
969
-        ## Set server side encryption
970
-        if self.config.server_side_encryption:
971
-            headers["x-amz-server-side-encryption"] = "AES256"
972
-
973
-        ## Set kms headers
974
-        if self.config.kms_key:
975
-            headers['x-amz-server-side-encryption'] = 'aws:kms'
976
-            headers['x-amz-server-side-encryption-aws-kms-key-id'] = \
977
-                self.config.kms_key
978
-
979
-        if extra_headers:
980
-            headers.update(extra_headers)
981
-
982
-        if self.config.mime_type:
983
-            headers["content-type"] = self.config.mime_type
984
-
985
-        request = self.create_request("OBJECT_PUT", uri=src_uri,
986
-                                      headers=headers)
987
-        response = self.send_request(request)
988
-        if response["data"] and getRootTagName(response["data"]) == "Error":
989
-            #http://doc.s3.amazonaws.com/proposals/copy.html
990
-            # Error during modify, status will be 200, so force error code 500
991
-            response["status"] = 500
992
-            error("Server error during the MODIFY operation. Overwrite "
993
-                  "response status to 500")
994
-            raise S3Error(response)
995
-
996
-        if self.config.acl_public is None and acl:
997
-            try:
998
-                self.set_acl(src_uri, acl)
999
-            except S3Error as exc:
1000
-                # Ignore the exception and don't fail the modify
1001
-                # if the server doesn't support setting ACLs
1002
-                if exc.status != 501:
1003
-                    raise exc
1004
-        return response
925
+        # dst_uri = src_uri Will optimize by using multipart just in worst case
926
+        return self.object_copy(src_uri, src_uri, extra_headers, src_size,
927
+                                extra_label, replace_meta=True)
1005 928
 
1006 929
     def object_move(self, src_uri, dst_uri, extra_headers=None,
1007 930
                     src_size=None, extra_label=""):