Browse code

Dynamically increase the chunk size depending on the file size

Jerome Leclanche authored on 2011/05/31 19:13:19
Showing 2 changed files
... ...
@@ -70,7 +70,7 @@ class MultiPartUpload(object):
70 70
 		self.upload_id = upload_id
71 71
 		return s3, key, upload_id
72 72
 	
73
-	def upload_all_parts(self, num_threads = 4, chunk_size = MIN_CHUNK_SIZE):
73
+	def upload_all_parts(self, num_threads, chunk_size):
74 74
 		"""
75 75
 		Execute a full multipart upload on a file
76 76
 		Returns the id/etag dict
... ...
@@ -332,7 +332,7 @@ class S3(object):
332 332
 		
333 333
 		if multipart:
334 334
 			# Multipart requests are quite different... drop here
335
-			return self.send_file_multipart(file, headers, uri)
335
+			return self.send_file_multipart(file, headers, uri, size)
336 336
 		
337 337
 		headers["content-length"] = size
338 338
 		content_type = None
... ...
@@ -713,16 +713,28 @@ class S3(object):
713 713
 
714 714
 		return response
715 715
 	
716
-	def send_file_multipart(self, file, headers, uri):
716
+	def send_file_multipart(self, file, headers, uri, size):
717 717
 		upload = MultiPartUpload(self, file, uri)
718 718
 		bucket, key, upload_id = upload.initiate_multipart_upload()
719 719
 		
720
+		num_threads = self.config.multipart_num_threads or 4
721
+		
722
+		if size > MultiPartUpload.MAX_FILE_SIZE:
723
+			raise RuntimeError("File is too large (%i bytes, max %i)" % (size, MultiPartUpload.MAX_FILE_SIZE))
724
+		elif size > 107374182400: # 100GB
725
+			chunk_size = size / 10000
726
+		elif size > 10737418240: # 10GB
727
+			chunk_size = size / 1000
728
+		elif size > 1073741824: # 1GB
729
+			chunk_size = size / 100
730
+		else:
731
+			chunk_size = self.config.multipart_chunk_size or MultiPartUpload.MIN_CHUNK_SIZE
732
+		
720 733
 		file.seek(0)
721 734
 		upload.upload_all_parts()
722
-		response = upload.complete_multipart_upload()
735
+		response = upload.complete_multipart_upload(num_threads, chunk_size)
723 736
 		response["speed"] = 0 # XXX
724 737
 		return response
725
-		exit() # TODO return response
726 738
 	
727 739
 	def recv_file(self, request, stream, labels, start_position = 0, retries = _max_retries):
728 740
 		method_string, resource, headers = request.get_triplet()