Browse code

batch_mode-ectomy to fix recursive bucket_list

batch_mode was introduced to try to let deletes happen using the
BATCH_DELETE operator. Unfortunately, that introduced some side
effects, specifically now only the first 1000 files in a bucket were
retrieved during operation del --recursive. Furthermore, since
6f91f384, we no longer need the batch_mode logic as we are dealing
with slices of the remote_list.

This patch specifically removes the conditional as follows:

- truncated = _list_truncated(response["data"]) and not batch_mode
+ truncated = _list_truncated(response["data"])

returning it to its pre-batch_mode glory, such that the entire list is
recursively obtained properly.

Furthermore, delete all references to batch_mode, as it is no longer
used anywhere after the above change.

Matt Domsch authored on 2014/04/21 11:20:53
Showing 3 changed files
... ...
@@ -318,7 +318,7 @@ def fetch_local_list(args, is_src = False, recursive = None):
318 318
     _maintain_cache(cache, local_list)
319 319
     return local_list, single_file, exclude_list
320 320
 
321
-def fetch_remote_list(args, require_attribs = False, recursive = None, batch_mode = False, uri_params = {}):
321
+def fetch_remote_list(args, require_attribs = False, recursive = None, uri_params = {}):
322 322
     def _get_remote_attribs(uri, remote_item):
323 323
         response = S3(cfg).object_info(uri)
324 324
         remote_item.update({
... ...
@@ -333,7 +333,7 @@ def fetch_remote_list(args, require_attribs = False, recursive = None, batch_mod
333 333
         except KeyError:
334 334
             pass
335 335
 
336
-    def _get_filelist_remote(remote_uri, recursive = True, batch_mode = False):
336
+    def _get_filelist_remote(remote_uri, recursive = True):
337 337
         ## If remote_uri ends with '/' then all remote files will have
338 338
         ## the remote_uri prefix removed in the relative path.
339 339
         ## If, on the other hand, the remote_uri ends with something else
... ...
@@ -354,7 +354,7 @@ def fetch_remote_list(args, require_attribs = False, recursive = None, batch_mod
354 354
 
355 355
         s3 = S3(Config())
356 356
         response = s3.bucket_list(remote_uri.bucket(), prefix = remote_uri.object(),
357
-                                  recursive = recursive, batch_mode = batch_mode, uri_params = uri_params)
357
+                                  recursive = recursive, uri_params = uri_params)
358 358
 
359 359
         rem_base_original = rem_base = remote_uri.object()
360 360
         remote_uri_original = remote_uri
... ...
@@ -414,7 +414,7 @@ def fetch_remote_list(args, require_attribs = False, recursive = None, batch_mod
414 414
 
415 415
     if recursive:
416 416
         for uri in remote_uris:
417
-            objectlist = _get_filelist_remote(uri, batch_mode = batch_mode)
417
+            objectlist = _get_filelist_remote(uri, recursive = True)
418 418
             for key in objectlist:
419 419
                 remote_list[key] = objectlist[key]
420 420
                 remote_list.record_md5(key, objectlist.get_md5(key))
... ...
@@ -238,7 +238,7 @@ class S3(object):
238 238
         response["list"] = getListFromXml(response["data"], "Bucket")
239 239
         return response
240 240
 
241
-    def bucket_list(self, bucket, prefix = None, recursive = None, batch_mode = False, uri_params = {}):
241
+    def bucket_list(self, bucket, prefix = None, recursive = None, uri_params = {}):
242 242
         def _list_truncated(data):
243 243
             ## <IsTruncated> can either be "true" or "false" or be missing completely
244 244
             is_truncated = getTextFromXml(data, ".//IsTruncated") or "false"
... ...
@@ -258,7 +258,7 @@ class S3(object):
258 258
             response = self.bucket_list_noparse(bucket, prefix, recursive, uri_params)
259 259
             current_list = _get_contents(response["data"])
260 260
             current_prefixes = _get_common_prefixes(response["data"])
261
-            truncated = _list_truncated(response["data"]) and not batch_mode
261
+            truncated = _list_truncated(response["data"])
262 262
             if truncated:
263 263
                 if current_list:
264 264
                     uri_params['marker'] = self.urlencode_string(current_list[-1]["Key"])
... ...
@@ -571,12 +571,10 @@ def subcmd_batch_del(uri_str = None, bucket = None, remote_list = None):
571 571
     if len([item for item in [uri_str, bucket, remote_list] if item]) != 1:
572 572
         raise ValueError("One and only one of 'uri_str', 'bucket', 'remote_list' can be specified.")
573 573
 
574
-    batch_mode = False
575 574
     if bucket:
576 575
         uri_str = "s3://%s" % bucket
577 576
     if not remote_list:
578
-        batch_mode = True
579
-        remote_list, exclude_list = fetch_remote_list(uri_str, require_attribs = False, batch_mode = True)
577
+        remote_list, exclude_list = fetch_remote_list(uri_str, require_attribs = False)
580 578
 
581 579
     if len(remote_list) == 0:
582 580
         warning(u"Remote list is empty.")