git-svn-id: https://s3tools.svn.sourceforge.net/svnroot/s3tools/s3cmd/trunk@467 830e0280-6d2a-0410-9c65-932aecc39d9d
| 4 | 9 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,339 @@ |
| 0 |
+## Create and compare lists of files/objects |
|
| 1 |
+## Author: Michal Ludvig <michal@logix.cz> |
|
| 2 |
+## http://www.logix.cz/michal |
|
| 3 |
+## License: GPL Version 2 |
|
| 4 |
+ |
|
| 5 |
+from S3 import S3 |
|
| 6 |
+from Config import Config |
|
| 7 |
+from S3Uri import S3Uri |
|
| 8 |
+from SortedDict import SortedDict |
|
| 9 |
+from Utils import * |
|
| 10 |
+ |
|
| 11 |
+from logging import debug, info, warning, error |
|
| 12 |
+ |
|
| 13 |
+import os |
|
| 14 |
+import glob |
|
| 15 |
+ |
|
| 16 |
+__all__ = ["fetch_local_list", "fetch_remote_list", "compare_filelists", "filter_exclude_include"] |
|
| 17 |
+ |
|
| 18 |
+def _fswalk_follow_symlinks(path): |
|
| 19 |
+ ''' |
|
| 20 |
+ Walk filesystem, following symbolic links (but without recursion), on python2.4 and later |
|
| 21 |
+ |
|
| 22 |
+ If a recursive directory link is detected, emit a warning and skip. |
|
| 23 |
+ ''' |
|
| 24 |
+ assert os.path.isdir(path) # only designed for directory argument |
|
| 25 |
+ walkdirs = set([path]) |
|
| 26 |
+ targets = set() |
|
| 27 |
+ for dirpath, dirnames, filenames in os.walk(path): |
|
| 28 |
+ for dirname in dirnames: |
|
| 29 |
+ current = os.path.join(dirpath, dirname) |
|
| 30 |
+ target = os.path.realpath(current) |
|
| 31 |
+ if os.path.islink(current): |
|
| 32 |
+ if target in targets: |
|
| 33 |
+ warning("Skipping recursively symlinked directory %s" % dirname)
|
|
| 34 |
+ else: |
|
| 35 |
+ walkdirs.add(current) |
|
| 36 |
+ targets.add(target) |
|
| 37 |
+ for walkdir in walkdirs: |
|
| 38 |
+ for value in os.walk(walkdir): |
|
| 39 |
+ yield value |
|
| 40 |
+ |
|
| 41 |
+def _fswalk(path, follow_symlinks): |
|
| 42 |
+ ''' |
|
| 43 |
+ Directory tree generator |
|
| 44 |
+ |
|
| 45 |
+ path (str) is the root of the directory tree to walk |
|
| 46 |
+ |
|
| 47 |
+ follow_symlinks (bool) indicates whether to descend into symbolically linked directories |
|
| 48 |
+ ''' |
|
| 49 |
+ if follow_symlinks: |
|
| 50 |
+ return _fswalk_follow_symlinks(path) |
|
| 51 |
+ return os.walk(path) |
|
| 52 |
+ |
|
| 53 |
+def filter_exclude_include(src_list): |
|
| 54 |
+ info(u"Applying --exclude/--include") |
|
| 55 |
+ cfg = Config() |
|
| 56 |
+ exclude_list = SortedDict(ignore_case = False) |
|
| 57 |
+ for file in src_list.keys(): |
|
| 58 |
+ debug(u"CHECK: %s" % file) |
|
| 59 |
+ excluded = False |
|
| 60 |
+ for r in cfg.exclude: |
|
| 61 |
+ if r.search(file): |
|
| 62 |
+ excluded = True |
|
| 63 |
+ debug(u"EXCL-MATCH: '%s'" % (cfg.debug_exclude[r])) |
|
| 64 |
+ break |
|
| 65 |
+ if excluded: |
|
| 66 |
+ ## No need to check for --include if not excluded |
|
| 67 |
+ for r in cfg.include: |
|
| 68 |
+ if r.search(file): |
|
| 69 |
+ excluded = False |
|
| 70 |
+ debug(u"INCL-MATCH: '%s'" % (cfg.debug_include[r])) |
|
| 71 |
+ break |
|
| 72 |
+ if excluded: |
|
| 73 |
+ ## Still excluded - ok, action it |
|
| 74 |
+ debug(u"EXCLUDE: %s" % file) |
|
| 75 |
+ exclude_list[file] = src_list[file] |
|
| 76 |
+ del(src_list[file]) |
|
| 77 |
+ continue |
|
| 78 |
+ else: |
|
| 79 |
+ debug(u"PASS: %s" % (file)) |
|
| 80 |
+ return src_list, exclude_list |
|
| 81 |
+ |
|
| 82 |
+def fetch_local_list(args, recursive = None): |
|
| 83 |
+ def _get_filelist_local(local_uri): |
|
| 84 |
+ info(u"Compiling list of local files...") |
|
| 85 |
+ if local_uri.isdir(): |
|
| 86 |
+ local_base = deunicodise(local_uri.basename()) |
|
| 87 |
+ local_path = deunicodise(local_uri.path()) |
|
| 88 |
+ filelist = _fswalk(local_path, cfg.follow_symlinks) |
|
| 89 |
+ single_file = False |
|
| 90 |
+ else: |
|
| 91 |
+ local_base = "" |
|
| 92 |
+ local_path = deunicodise(local_uri.dirname()) |
|
| 93 |
+ filelist = [( local_path, [], [deunicodise(local_uri.basename())] )] |
|
| 94 |
+ single_file = True |
|
| 95 |
+ loc_list = SortedDict(ignore_case = False) |
|
| 96 |
+ for root, dirs, files in filelist: |
|
| 97 |
+ rel_root = root.replace(local_path, local_base, 1) |
|
| 98 |
+ for f in files: |
|
| 99 |
+ full_name = os.path.join(root, f) |
|
| 100 |
+ if not os.path.isfile(full_name): |
|
| 101 |
+ continue |
|
| 102 |
+ if os.path.islink(full_name): |
|
| 103 |
+ if not cfg.follow_symlinks: |
|
| 104 |
+ continue |
|
| 105 |
+ relative_file = unicodise(os.path.join(rel_root, f)) |
|
| 106 |
+ if os.path.sep != "/": |
|
| 107 |
+ # Convert non-unix dir separators to '/' |
|
| 108 |
+ relative_file = "/".join(relative_file.split(os.path.sep)) |
|
| 109 |
+ if cfg.urlencoding_mode == "normal": |
|
| 110 |
+ relative_file = replace_nonprintables(relative_file) |
|
| 111 |
+ if relative_file.startswith('./'):
|
|
| 112 |
+ relative_file = relative_file[2:] |
|
| 113 |
+ sr = os.stat_result(os.lstat(full_name)) |
|
| 114 |
+ loc_list[relative_file] = {
|
|
| 115 |
+ 'full_name_unicode' : unicodise(full_name), |
|
| 116 |
+ 'full_name' : full_name, |
|
| 117 |
+ 'size' : sr.st_size, |
|
| 118 |
+ 'mtime' : sr.st_mtime, |
|
| 119 |
+ ## TODO: Possibly more to save here... |
|
| 120 |
+ } |
|
| 121 |
+ return loc_list, single_file |
|
| 122 |
+ |
|
| 123 |
+ cfg = Config() |
|
| 124 |
+ local_uris = [] |
|
| 125 |
+ local_list = SortedDict(ignore_case = False) |
|
| 126 |
+ single_file = False |
|
| 127 |
+ |
|
| 128 |
+ if type(args) not in (list, tuple): |
|
| 129 |
+ args = [args] |
|
| 130 |
+ |
|
| 131 |
+ if recursive == None: |
|
| 132 |
+ recursive = cfg.recursive |
|
| 133 |
+ |
|
| 134 |
+ for arg in args: |
|
| 135 |
+ uri = S3Uri(arg) |
|
| 136 |
+ if not uri.type == 'file': |
|
| 137 |
+ raise ParameterError("Expecting filename or directory instead of: %s" % arg)
|
|
| 138 |
+ if uri.isdir() and not recursive: |
|
| 139 |
+ raise ParameterError("Use --recursive to upload a directory: %s" % arg)
|
|
| 140 |
+ local_uris.append(uri) |
|
| 141 |
+ |
|
| 142 |
+ for uri in local_uris: |
|
| 143 |
+ list_for_uri, single_file = _get_filelist_local(uri) |
|
| 144 |
+ local_list.update(list_for_uri) |
|
| 145 |
+ |
|
| 146 |
+ ## Single file is True if and only if the user |
|
| 147 |
+ ## specified one local URI and that URI represents |
|
| 148 |
+ ## a FILE. Ie it is False if the URI was of a DIR |
|
| 149 |
+ ## and that dir contained only one FILE. That's not |
|
| 150 |
+ ## a case of single_file==True. |
|
| 151 |
+ if len(local_list) > 1: |
|
| 152 |
+ single_file = False |
|
| 153 |
+ |
|
| 154 |
+ return local_list, single_file |
|
| 155 |
+ |
|
| 156 |
+def fetch_remote_list(args, require_attribs = False, recursive = None): |
|
| 157 |
+ def _get_filelist_remote(remote_uri, recursive = True): |
|
| 158 |
+ ## If remote_uri ends with '/' then all remote files will have |
|
| 159 |
+ ## the remote_uri prefix removed in the relative path. |
|
| 160 |
+ ## If, on the other hand, the remote_uri ends with something else |
|
| 161 |
+ ## (probably alphanumeric symbol) we'll use the last path part |
|
| 162 |
+ ## in the relative path. |
|
| 163 |
+ ## |
|
| 164 |
+ ## Complicated, eh? See an example: |
|
| 165 |
+ ## _get_filelist_remote("s3://bckt/abc/def") may yield:
|
|
| 166 |
+ ## { 'def/file1.jpg' : {}, 'def/xyz/blah.txt' : {} }
|
|
| 167 |
+ ## _get_filelist_remote("s3://bckt/abc/def/") will yield:
|
|
| 168 |
+ ## { 'file1.jpg' : {}, 'xyz/blah.txt' : {} }
|
|
| 169 |
+ ## Furthermore a prefix-magic can restrict the return list: |
|
| 170 |
+ ## _get_filelist_remote("s3://bckt/abc/def/x") yields:
|
|
| 171 |
+ ## { 'xyz/blah.txt' : {} }
|
|
| 172 |
+ |
|
| 173 |
+ info(u"Retrieving list of remote files for %s ..." % remote_uri) |
|
| 174 |
+ |
|
| 175 |
+ s3 = S3(Config()) |
|
| 176 |
+ response = s3.bucket_list(remote_uri.bucket(), prefix = remote_uri.object(), recursive = recursive) |
|
| 177 |
+ |
|
| 178 |
+ rem_base_original = rem_base = remote_uri.object() |
|
| 179 |
+ remote_uri_original = remote_uri |
|
| 180 |
+ if rem_base != '' and rem_base[-1] != '/': |
|
| 181 |
+ rem_base = rem_base[:rem_base.rfind('/')+1]
|
|
| 182 |
+ remote_uri = S3Uri("s3://%s/%s" % (remote_uri.bucket(), rem_base))
|
|
| 183 |
+ rem_base_len = len(rem_base) |
|
| 184 |
+ rem_list = SortedDict(ignore_case = False) |
|
| 185 |
+ break_now = False |
|
| 186 |
+ for object in response['list']: |
|
| 187 |
+ if object['Key'] == rem_base_original and object['Key'][-1] != os.path.sep: |
|
| 188 |
+ ## We asked for one file and we got that file :-) |
|
| 189 |
+ key = os.path.basename(object['Key']) |
|
| 190 |
+ object_uri_str = remote_uri_original.uri() |
|
| 191 |
+ break_now = True |
|
| 192 |
+ rem_list = {} ## Remove whatever has already been put to rem_list
|
|
| 193 |
+ else: |
|
| 194 |
+ key = object['Key'][rem_base_len:] ## Beware - this may be '' if object['Key']==rem_base !! |
|
| 195 |
+ object_uri_str = remote_uri.uri() + key |
|
| 196 |
+ rem_list[key] = {
|
|
| 197 |
+ 'size' : int(object['Size']), |
|
| 198 |
+ 'timestamp' : dateS3toUnix(object['LastModified']), ## Sadly it's upload time, not our lastmod time :-( |
|
| 199 |
+ 'md5' : object['ETag'][1:-1], |
|
| 200 |
+ 'object_key' : object['Key'], |
|
| 201 |
+ 'object_uri_str' : object_uri_str, |
|
| 202 |
+ 'base_uri' : remote_uri, |
|
| 203 |
+ } |
|
| 204 |
+ if break_now: |
|
| 205 |
+ break |
|
| 206 |
+ return rem_list |
|
| 207 |
+ |
|
| 208 |
+ cfg = Config() |
|
| 209 |
+ remote_uris = [] |
|
| 210 |
+ remote_list = SortedDict(ignore_case = False) |
|
| 211 |
+ |
|
| 212 |
+ if type(args) not in (list, tuple): |
|
| 213 |
+ args = [args] |
|
| 214 |
+ |
|
| 215 |
+ if recursive == None: |
|
| 216 |
+ recursive = cfg.recursive |
|
| 217 |
+ |
|
| 218 |
+ for arg in args: |
|
| 219 |
+ uri = S3Uri(arg) |
|
| 220 |
+ if not uri.type == 's3': |
|
| 221 |
+ raise ParameterError("Expecting S3 URI instead of '%s'" % arg)
|
|
| 222 |
+ remote_uris.append(uri) |
|
| 223 |
+ |
|
| 224 |
+ if recursive: |
|
| 225 |
+ for uri in remote_uris: |
|
| 226 |
+ objectlist = _get_filelist_remote(uri) |
|
| 227 |
+ for key in objectlist: |
|
| 228 |
+ remote_list[key] = objectlist[key] |
|
| 229 |
+ else: |
|
| 230 |
+ for uri in remote_uris: |
|
| 231 |
+ uri_str = str(uri) |
|
| 232 |
+ ## Wildcards used in remote URI? |
|
| 233 |
+ ## If yes we'll need a bucket listing... |
|
| 234 |
+ if uri_str.find('*') > -1 or uri_str.find('?') > -1:
|
|
| 235 |
+ first_wildcard = uri_str.find('*')
|
|
| 236 |
+ first_questionmark = uri_str.find('?')
|
|
| 237 |
+ if first_questionmark > -1 and first_questionmark < first_wildcard: |
|
| 238 |
+ first_wildcard = first_questionmark |
|
| 239 |
+ prefix = uri_str[:first_wildcard] |
|
| 240 |
+ rest = uri_str[first_wildcard+1:] |
|
| 241 |
+ ## Only request recursive listing if the 'rest' of the URI, |
|
| 242 |
+ ## i.e. the part after first wildcard, contains '/' |
|
| 243 |
+ need_recursion = rest.find('/') > -1
|
|
| 244 |
+ objectlist = _get_filelist_remote(S3Uri(prefix), recursive = need_recursion) |
|
| 245 |
+ for key in objectlist: |
|
| 246 |
+ ## Check whether the 'key' matches the requested wildcards |
|
| 247 |
+ if glob.fnmatch.fnmatch(objectlist[key]['object_uri_str'], uri_str): |
|
| 248 |
+ remote_list[key] = objectlist[key] |
|
| 249 |
+ else: |
|
| 250 |
+ ## No wildcards - simply append the given URI to the list |
|
| 251 |
+ key = os.path.basename(uri.object()) |
|
| 252 |
+ if not key: |
|
| 253 |
+ raise ParameterError(u"Expecting S3 URI with a filename or --recursive: %s" % uri.uri()) |
|
| 254 |
+ remote_item = {
|
|
| 255 |
+ 'base_uri': uri, |
|
| 256 |
+ 'object_uri_str': unicode(uri), |
|
| 257 |
+ 'object_key': uri.object() |
|
| 258 |
+ } |
|
| 259 |
+ if require_attribs: |
|
| 260 |
+ response = S3(cfg).object_info(uri) |
|
| 261 |
+ remote_item.update({
|
|
| 262 |
+ 'size': int(response['headers']['content-length']), |
|
| 263 |
+ 'md5': response['headers']['etag'].strip('"\''),
|
|
| 264 |
+ 'timestamp' : dateRFC822toUnix(response['headers']['date']) |
|
| 265 |
+ }) |
|
| 266 |
+ remote_list[key] = remote_item |
|
| 267 |
+ return remote_list |
|
| 268 |
+ |
|
| 269 |
+def compare_filelists(src_list, dst_list, src_remote, dst_remote): |
|
| 270 |
+ def __direction_str(is_remote): |
|
| 271 |
+ return is_remote and "remote" or "local" |
|
| 272 |
+ |
|
| 273 |
+ # We don't support local->local sync, use 'rsync' or something like that instead ;-) |
|
| 274 |
+ assert(not(src_remote == False and dst_remote == False)) |
|
| 275 |
+ |
|
| 276 |
+ info(u"Verifying attributes...") |
|
| 277 |
+ cfg = Config() |
|
| 278 |
+ exists_list = SortedDict(ignore_case = False) |
|
| 279 |
+ |
|
| 280 |
+ debug("Comparing filelists (direction: %s -> %s)" % (__direction_str(src_remote), __direction_str(dst_remote)))
|
|
| 281 |
+ debug("src_list.keys: %s" % src_list.keys())
|
|
| 282 |
+ debug("dst_list.keys: %s" % dst_list.keys())
|
|
| 283 |
+ |
|
| 284 |
+ for file in src_list.keys(): |
|
| 285 |
+ debug(u"CHECK: %s" % file) |
|
| 286 |
+ if dst_list.has_key(file): |
|
| 287 |
+ ## Was --skip-existing requested? |
|
| 288 |
+ if cfg.skip_existing: |
|
| 289 |
+ debug(u"IGNR: %s (used --skip-existing)" % (file)) |
|
| 290 |
+ exists_list[file] = src_list[file] |
|
| 291 |
+ del(src_list[file]) |
|
| 292 |
+ ## Remove from destination-list, all that is left there will be deleted |
|
| 293 |
+ del(dst_list[file]) |
|
| 294 |
+ continue |
|
| 295 |
+ |
|
| 296 |
+ attribs_match = True |
|
| 297 |
+ ## Check size first |
|
| 298 |
+ if 'size' in cfg.sync_checks and dst_list[file]['size'] != src_list[file]['size']: |
|
| 299 |
+ debug(u"XFER: %s (size mismatch: src=%s dst=%s)" % (file, src_list[file]['size'], dst_list[file]['size'])) |
|
| 300 |
+ attribs_match = False |
|
| 301 |
+ |
|
| 302 |
+ if attribs_match and 'md5' in cfg.sync_checks: |
|
| 303 |
+ ## ... same size, check MD5 |
|
| 304 |
+ try: |
|
| 305 |
+ if src_remote == False and dst_remote == True: |
|
| 306 |
+ src_md5 = hash_file_md5(src_list[file]['full_name']) |
|
| 307 |
+ dst_md5 = dst_list[file]['md5'] |
|
| 308 |
+ elif src_remote == True and dst_remote == False: |
|
| 309 |
+ src_md5 = src_list[file]['md5'] |
|
| 310 |
+ dst_md5 = hash_file_md5(dst_list[file]['full_name']) |
|
| 311 |
+ elif src_remote == True and dst_remote == True: |
|
| 312 |
+ src_md5 = src_list[file]['md5'] |
|
| 313 |
+ dst_md5 = dst_list[file]['md5'] |
|
| 314 |
+ except (IOError,OSError), e: |
|
| 315 |
+ # MD5 sum verification failed - ignore that file altogether |
|
| 316 |
+ debug(u"IGNR: %s (disappeared)" % (file)) |
|
| 317 |
+ warning(u"%s: file disappeared, ignoring." % (file)) |
|
| 318 |
+ del(src_list[file]) |
|
| 319 |
+ del(dst_list[file]) |
|
| 320 |
+ continue |
|
| 321 |
+ |
|
| 322 |
+ if src_md5 != dst_md5: |
|
| 323 |
+ ## Checksums are different. |
|
| 324 |
+ attribs_match = False |
|
| 325 |
+ debug(u"XFER: %s (md5 mismatch: src=%s dst=%s)" % (file, src_md5, dst_md5)) |
|
| 326 |
+ |
|
| 327 |
+ if attribs_match: |
|
| 328 |
+ ## Remove from source-list, all that is left there will be transferred |
|
| 329 |
+ debug(u"IGNR: %s (transfer not needed)" % file) |
|
| 330 |
+ exists_list[file] = src_list[file] |
|
| 331 |
+ del(src_list[file]) |
|
| 332 |
+ |
|
| 333 |
+ ## Remove from destination-list, all that is left there will be deleted |
|
| 334 |
+ del(dst_list[file]) |
|
| 335 |
+ |
|
| 336 |
+ return src_list, dst_list, exists_list |
|
| 337 |
+ |
|
| 338 |
+ |
| ... | ... |
@@ -37,41 +37,6 @@ def check_args_type(args, type, verbose_type): |
| 37 | 37 |
if S3Uri(arg).type != type: |
| 38 | 38 |
raise ParameterError("Expecting %s instead of '%s'" % (verbose_type, arg))
|
| 39 | 39 |
|
| 40 |
-def _fswalk_follow_symlinks(path): |
|
| 41 |
- ''' |
|
| 42 |
- Walk filesystem, following symbolic links (but without recursion), on python2.4 and later |
|
| 43 |
- |
|
| 44 |
- If a recursive directory link is detected, emit a warning and skip. |
|
| 45 |
- ''' |
|
| 46 |
- assert os.path.isdir(path) # only designed for directory argument |
|
| 47 |
- walkdirs = set([path]) |
|
| 48 |
- targets = set() |
|
| 49 |
- for dirpath, dirnames, filenames in os.walk(path): |
|
| 50 |
- for dirname in dirnames: |
|
| 51 |
- current = os.path.join(dirpath, dirname) |
|
| 52 |
- target = os.path.realpath(current) |
|
| 53 |
- if os.path.islink(current): |
|
| 54 |
- if target in targets: |
|
| 55 |
- warning("Skipping recursively symlinked directory %s" % dirname)
|
|
| 56 |
- else: |
|
| 57 |
- walkdirs.add(current) |
|
| 58 |
- targets.add(target) |
|
| 59 |
- for walkdir in walkdirs: |
|
| 60 |
- for value in os.walk(walkdir): |
|
| 61 |
- yield value |
|
| 62 |
- |
|
| 63 |
-def fswalk(path, follow_symlinks): |
|
| 64 |
- ''' |
|
| 65 |
- Directory tree generator |
|
| 66 |
- |
|
| 67 |
- path (str) is the root of the directory tree to walk |
|
| 68 |
- |
|
| 69 |
- follow_symlinks (bool) indicates whether to descend into symbolically linked directories |
|
| 70 |
- ''' |
|
| 71 |
- if follow_symlinks: |
|
| 72 |
- return _fswalk_follow_symlinks(path) |
|
| 73 |
- return os.walk(path) |
|
| 74 |
- |
|
| 75 | 40 |
def cmd_du(args): |
| 76 | 41 |
s3 = S3(Config()) |
| 77 | 42 |
if len(args) > 0: |
| ... | ... |
@@ -222,100 +187,6 @@ def cmd_bucket_delete(args): |
| 222 | 222 |
_bucket_delete_one(uri) |
| 223 | 223 |
output(u"Bucket '%s' removed" % uri.uri()) |
| 224 | 224 |
|
| 225 |
-def fetch_local_list(args, recursive = None): |
|
| 226 |
- local_uris = [] |
|
| 227 |
- local_list = SortedDict(ignore_case = False) |
|
| 228 |
- single_file = False |
|
| 229 |
- |
|
| 230 |
- if type(args) not in (list, tuple): |
|
| 231 |
- args = [args] |
|
| 232 |
- |
|
| 233 |
- if recursive == None: |
|
| 234 |
- recursive = cfg.recursive |
|
| 235 |
- |
|
| 236 |
- for arg in args: |
|
| 237 |
- uri = S3Uri(arg) |
|
| 238 |
- if not uri.type == 'file': |
|
| 239 |
- raise ParameterError("Expecting filename or directory instead of: %s" % arg)
|
|
| 240 |
- if uri.isdir() and not recursive: |
|
| 241 |
- raise ParameterError("Use --recursive to upload a directory: %s" % arg)
|
|
| 242 |
- local_uris.append(uri) |
|
| 243 |
- |
|
| 244 |
- for uri in local_uris: |
|
| 245 |
- list_for_uri, single_file = _get_filelist_local(uri) |
|
| 246 |
- local_list.update(list_for_uri) |
|
| 247 |
- |
|
| 248 |
- ## Single file is True if and only if the user |
|
| 249 |
- ## specified one local URI and that URI represents |
|
| 250 |
- ## a FILE. Ie it is False if the URI was of a DIR |
|
| 251 |
- ## and that dir contained only one FILE. That's not |
|
| 252 |
- ## a case of single_file==True. |
|
| 253 |
- if len(local_list) > 1: |
|
| 254 |
- single_file = False |
|
| 255 |
- |
|
| 256 |
- return local_list, single_file |
|
| 257 |
- |
|
| 258 |
-def fetch_remote_list(args, require_attribs = False, recursive = None): |
|
| 259 |
- remote_uris = [] |
|
| 260 |
- remote_list = SortedDict(ignore_case = False) |
|
| 261 |
- |
|
| 262 |
- if type(args) not in (list, tuple): |
|
| 263 |
- args = [args] |
|
| 264 |
- |
|
| 265 |
- if recursive == None: |
|
| 266 |
- recursive = cfg.recursive |
|
| 267 |
- |
|
| 268 |
- for arg in args: |
|
| 269 |
- uri = S3Uri(arg) |
|
| 270 |
- if not uri.type == 's3': |
|
| 271 |
- raise ParameterError("Expecting S3 URI instead of '%s'" % arg)
|
|
| 272 |
- remote_uris.append(uri) |
|
| 273 |
- |
|
| 274 |
- if recursive: |
|
| 275 |
- for uri in remote_uris: |
|
| 276 |
- objectlist = _get_filelist_remote(uri) |
|
| 277 |
- for key in objectlist: |
|
| 278 |
- remote_list[key] = objectlist[key] |
|
| 279 |
- else: |
|
| 280 |
- for uri in remote_uris: |
|
| 281 |
- uri_str = str(uri) |
|
| 282 |
- ## Wildcards used in remote URI? |
|
| 283 |
- ## If yes we'll need a bucket listing... |
|
| 284 |
- if uri_str.find('*') > -1 or uri_str.find('?') > -1:
|
|
| 285 |
- first_wildcard = uri_str.find('*')
|
|
| 286 |
- first_questionmark = uri_str.find('?')
|
|
| 287 |
- if first_questionmark > -1 and first_questionmark < first_wildcard: |
|
| 288 |
- first_wildcard = first_questionmark |
|
| 289 |
- prefix = uri_str[:first_wildcard] |
|
| 290 |
- rest = uri_str[first_wildcard+1:] |
|
| 291 |
- ## Only request recursive listing if the 'rest' of the URI, |
|
| 292 |
- ## i.e. the part after first wildcard, contains '/' |
|
| 293 |
- need_recursion = rest.find('/') > -1
|
|
| 294 |
- objectlist = _get_filelist_remote(S3Uri(prefix), recursive = need_recursion) |
|
| 295 |
- for key in objectlist: |
|
| 296 |
- ## Check whether the 'key' matches the requested wildcards |
|
| 297 |
- if glob.fnmatch.fnmatch(objectlist[key]['object_uri_str'], uri_str): |
|
| 298 |
- remote_list[key] = objectlist[key] |
|
| 299 |
- else: |
|
| 300 |
- ## No wildcards - simply append the given URI to the list |
|
| 301 |
- key = os.path.basename(uri.object()) |
|
| 302 |
- if not key: |
|
| 303 |
- raise ParameterError(u"Expecting S3 URI with a filename or --recursive: %s" % uri.uri()) |
|
| 304 |
- remote_item = {
|
|
| 305 |
- 'base_uri': uri, |
|
| 306 |
- 'object_uri_str': unicode(uri), |
|
| 307 |
- 'object_key': uri.object() |
|
| 308 |
- } |
|
| 309 |
- if require_attribs: |
|
| 310 |
- response = S3(cfg).object_info(uri) |
|
| 311 |
- remote_item.update({
|
|
| 312 |
- 'size': int(response['headers']['content-length']), |
|
| 313 |
- 'md5': response['headers']['etag'].strip('"\''),
|
|
| 314 |
- 'timestamp' : Utils.dateRFC822toUnix(response['headers']['date']) |
|
| 315 |
- }) |
|
| 316 |
- remote_list[key] = remote_item |
|
| 317 |
- return remote_list |
|
| 318 |
- |
|
| 319 | 225 |
def cmd_object_put(args): |
| 320 | 226 |
cfg = Config() |
| 321 | 227 |
s3 = S3(cfg) |
| ... | ... |
@@ -334,7 +205,7 @@ def cmd_object_put(args): |
| 334 | 334 |
|
| 335 | 335 |
local_list, single_file_local = fetch_local_list(args) |
| 336 | 336 |
|
| 337 |
- local_list, exclude_list = _filelist_filter_exclude_include(local_list) |
|
| 337 |
+ local_list, exclude_list = filter_exclude_include(local_list) |
|
| 338 | 338 |
|
| 339 | 339 |
local_count = len(local_list) |
| 340 | 340 |
|
| ... | ... |
@@ -437,7 +308,7 @@ def cmd_object_get(args): |
| 437 | 437 |
raise ParameterError("Nothing to download. Expecting S3 URI.")
|
| 438 | 438 |
|
| 439 | 439 |
remote_list = fetch_remote_list(args, require_attribs = False) |
| 440 |
- remote_list, exclude_list = _filelist_filter_exclude_include(remote_list) |
|
| 440 |
+ remote_list, exclude_list = filter_exclude_include(remote_list) |
|
| 441 | 441 |
|
| 442 | 442 |
remote_count = len(remote_list) |
| 443 | 443 |
|
| ... | ... |
@@ -538,7 +409,7 @@ def subcmd_object_del_uri(uri_str, recursive = None): |
| 538 | 538 |
recursive = cfg.recursive |
| 539 | 539 |
|
| 540 | 540 |
remote_list = fetch_remote_list(uri_str, require_attribs = False, recursive = recursive) |
| 541 |
- remote_list, exclude_list = _filelist_filter_exclude_include(remote_list) |
|
| 541 |
+ remote_list, exclude_list = filter_exclude_include(remote_list) |
|
| 542 | 542 |
|
| 543 | 543 |
remote_count = len(remote_list) |
| 544 | 544 |
|
| ... | ... |
@@ -567,7 +438,7 @@ def subcmd_cp_mv(args, process_fce, action_str, message): |
| 567 | 567 |
destination_base = dst_base_uri.uri() |
| 568 | 568 |
|
| 569 | 569 |
remote_list = fetch_remote_list(args, require_attribs = False) |
| 570 |
- remote_list, exclude_list = _filelist_filter_exclude_include(remote_list) |
|
| 570 |
+ remote_list, exclude_list = filter_exclude_include(remote_list) |
|
| 571 | 571 |
|
| 572 | 572 |
remote_count = len(remote_list) |
| 573 | 573 |
|
| ... | ... |
@@ -651,195 +522,6 @@ def cmd_info(args): |
| 651 | 651 |
else: |
| 652 | 652 |
raise |
| 653 | 653 |
|
| 654 |
-def _get_filelist_local(local_uri): |
|
| 655 |
- info(u"Compiling list of local files...") |
|
| 656 |
- if local_uri.isdir(): |
|
| 657 |
- local_base = deunicodise(local_uri.basename()) |
|
| 658 |
- local_path = deunicodise(local_uri.path()) |
|
| 659 |
- filelist = fswalk(local_path, cfg.follow_symlinks) |
|
| 660 |
- single_file = False |
|
| 661 |
- else: |
|
| 662 |
- local_base = "" |
|
| 663 |
- local_path = deunicodise(local_uri.dirname()) |
|
| 664 |
- filelist = [( local_path, [], [deunicodise(local_uri.basename())] )] |
|
| 665 |
- single_file = True |
|
| 666 |
- loc_list = SortedDict(ignore_case = False) |
|
| 667 |
- for root, dirs, files in filelist: |
|
| 668 |
- rel_root = root.replace(local_path, local_base, 1) |
|
| 669 |
- for f in files: |
|
| 670 |
- full_name = os.path.join(root, f) |
|
| 671 |
- if not os.path.isfile(full_name): |
|
| 672 |
- continue |
|
| 673 |
- if os.path.islink(full_name): |
|
| 674 |
- if not cfg.follow_symlinks: |
|
| 675 |
- continue |
|
| 676 |
- relative_file = unicodise(os.path.join(rel_root, f)) |
|
| 677 |
- if os.path.sep != "/": |
|
| 678 |
- # Convert non-unix dir separators to '/' |
|
| 679 |
- relative_file = "/".join(relative_file.split(os.path.sep)) |
|
| 680 |
- if cfg.urlencoding_mode == "normal": |
|
| 681 |
- relative_file = replace_nonprintables(relative_file) |
|
| 682 |
- if relative_file.startswith('./'):
|
|
| 683 |
- relative_file = relative_file[2:] |
|
| 684 |
- sr = os.stat_result(os.lstat(full_name)) |
|
| 685 |
- loc_list[relative_file] = {
|
|
| 686 |
- 'full_name_unicode' : unicodise(full_name), |
|
| 687 |
- 'full_name' : full_name, |
|
| 688 |
- 'size' : sr.st_size, |
|
| 689 |
- 'mtime' : sr.st_mtime, |
|
| 690 |
- ## TODO: Possibly more to save here... |
|
| 691 |
- } |
|
| 692 |
- return loc_list, single_file |
|
| 693 |
- |
|
| 694 |
-def _get_filelist_remote(remote_uri, recursive = True): |
|
| 695 |
- ## If remote_uri ends with '/' then all remote files will have |
|
| 696 |
- ## the remote_uri prefix removed in the relative path. |
|
| 697 |
- ## If, on the other hand, the remote_uri ends with something else |
|
| 698 |
- ## (probably alphanumeric symbol) we'll use the last path part |
|
| 699 |
- ## in the relative path. |
|
| 700 |
- ## |
|
| 701 |
- ## Complicated, eh? See an example: |
|
| 702 |
- ## _get_filelist_remote("s3://bckt/abc/def") may yield:
|
|
| 703 |
- ## { 'def/file1.jpg' : {}, 'def/xyz/blah.txt' : {} }
|
|
| 704 |
- ## _get_filelist_remote("s3://bckt/abc/def/") will yield:
|
|
| 705 |
- ## { 'file1.jpg' : {}, 'xyz/blah.txt' : {} }
|
|
| 706 |
- ## Furthermore a prefix-magic can restrict the return list: |
|
| 707 |
- ## _get_filelist_remote("s3://bckt/abc/def/x") yields:
|
|
| 708 |
- ## { 'xyz/blah.txt' : {} }
|
|
| 709 |
- |
|
| 710 |
- info(u"Retrieving list of remote files for %s ..." % remote_uri) |
|
| 711 |
- |
|
| 712 |
- s3 = S3(Config()) |
|
| 713 |
- response = s3.bucket_list(remote_uri.bucket(), prefix = remote_uri.object(), recursive = recursive) |
|
| 714 |
- |
|
| 715 |
- rem_base_original = rem_base = remote_uri.object() |
|
| 716 |
- remote_uri_original = remote_uri |
|
| 717 |
- if rem_base != '' and rem_base[-1] != '/': |
|
| 718 |
- rem_base = rem_base[:rem_base.rfind('/')+1]
|
|
| 719 |
- remote_uri = S3Uri("s3://%s/%s" % (remote_uri.bucket(), rem_base))
|
|
| 720 |
- rem_base_len = len(rem_base) |
|
| 721 |
- rem_list = SortedDict(ignore_case = False) |
|
| 722 |
- break_now = False |
|
| 723 |
- for object in response['list']: |
|
| 724 |
- if object['Key'] == rem_base_original and object['Key'][-1] != os.path.sep: |
|
| 725 |
- ## We asked for one file and we got that file :-) |
|
| 726 |
- key = os.path.basename(object['Key']) |
|
| 727 |
- object_uri_str = remote_uri_original.uri() |
|
| 728 |
- break_now = True |
|
| 729 |
- rem_list = {} ## Remove whatever has already been put to rem_list
|
|
| 730 |
- else: |
|
| 731 |
- key = object['Key'][rem_base_len:] ## Beware - this may be '' if object['Key']==rem_base !! |
|
| 732 |
- object_uri_str = remote_uri.uri() + key |
|
| 733 |
- rem_list[key] = {
|
|
| 734 |
- 'size' : int(object['Size']), |
|
| 735 |
- 'timestamp' : dateS3toUnix(object['LastModified']), ## Sadly it's upload time, not our lastmod time :-( |
|
| 736 |
- 'md5' : object['ETag'][1:-1], |
|
| 737 |
- 'object_key' : object['Key'], |
|
| 738 |
- 'object_uri_str' : object_uri_str, |
|
| 739 |
- 'base_uri' : remote_uri, |
|
| 740 |
- } |
|
| 741 |
- if break_now: |
|
| 742 |
- break |
|
| 743 |
- return rem_list |
|
| 744 |
- |
|
| 745 |
-def _filelist_filter_exclude_include(src_list): |
|
| 746 |
- info(u"Applying --exclude/--include") |
|
| 747 |
- cfg = Config() |
|
| 748 |
- exclude_list = SortedDict(ignore_case = False) |
|
| 749 |
- for file in src_list.keys(): |
|
| 750 |
- debug(u"CHECK: %s" % file) |
|
| 751 |
- excluded = False |
|
| 752 |
- for r in cfg.exclude: |
|
| 753 |
- if r.search(file): |
|
| 754 |
- excluded = True |
|
| 755 |
- debug(u"EXCL-MATCH: '%s'" % (cfg.debug_exclude[r])) |
|
| 756 |
- break |
|
| 757 |
- if excluded: |
|
| 758 |
- ## No need to check for --include if not excluded |
|
| 759 |
- for r in cfg.include: |
|
| 760 |
- if r.search(file): |
|
| 761 |
- excluded = False |
|
| 762 |
- debug(u"INCL-MATCH: '%s'" % (cfg.debug_include[r])) |
|
| 763 |
- break |
|
| 764 |
- if excluded: |
|
| 765 |
- ## Still excluded - ok, action it |
|
| 766 |
- debug(u"EXCLUDE: %s" % file) |
|
| 767 |
- exclude_list[file] = src_list[file] |
|
| 768 |
- del(src_list[file]) |
|
| 769 |
- continue |
|
| 770 |
- else: |
|
| 771 |
- debug(u"PASS: %s" % (file)) |
|
| 772 |
- return src_list, exclude_list |
|
| 773 |
- |
|
| 774 |
-def _compare_filelists(src_list, dst_list, src_remote, dst_remote): |
|
| 775 |
- def __direction_str(is_remote): |
|
| 776 |
- return is_remote and "remote" or "local" |
|
| 777 |
- |
|
| 778 |
- # We don't support local->local sync, use 'rsync' or something like that instead ;-) |
|
| 779 |
- assert(not(src_remote == False and dst_remote == False)) |
|
| 780 |
- |
|
| 781 |
- info(u"Verifying attributes...") |
|
| 782 |
- cfg = Config() |
|
| 783 |
- exists_list = SortedDict(ignore_case = False) |
|
| 784 |
- |
|
| 785 |
- debug("Comparing filelists (direction: %s -> %s)" % (__direction_str(src_remote), __direction_str(dst_remote)))
|
|
| 786 |
- debug("src_list.keys: %s" % src_list.keys())
|
|
| 787 |
- debug("dst_list.keys: %s" % dst_list.keys())
|
|
| 788 |
- |
|
| 789 |
- for file in src_list.keys(): |
|
| 790 |
- debug(u"CHECK: %s" % file) |
|
| 791 |
- if dst_list.has_key(file): |
|
| 792 |
- ## Was --skip-existing requested? |
|
| 793 |
- if cfg.skip_existing: |
|
| 794 |
- debug(u"IGNR: %s (used --skip-existing)" % (file)) |
|
| 795 |
- exists_list[file] = src_list[file] |
|
| 796 |
- del(src_list[file]) |
|
| 797 |
- ## Remove from destination-list, all that is left there will be deleted |
|
| 798 |
- del(dst_list[file]) |
|
| 799 |
- continue |
|
| 800 |
- |
|
| 801 |
- attribs_match = True |
|
| 802 |
- ## Check size first |
|
| 803 |
- if 'size' in cfg.sync_checks and dst_list[file]['size'] != src_list[file]['size']: |
|
| 804 |
- debug(u"XFER: %s (size mismatch: src=%s dst=%s)" % (file, src_list[file]['size'], dst_list[file]['size'])) |
|
| 805 |
- attribs_match = False |
|
| 806 |
- |
|
| 807 |
- if attribs_match and 'md5' in cfg.sync_checks: |
|
| 808 |
- ## ... same size, check MD5 |
|
| 809 |
- try: |
|
| 810 |
- if src_remote == False and dst_remote == True: |
|
| 811 |
- src_md5 = Utils.hash_file_md5(src_list[file]['full_name']) |
|
| 812 |
- dst_md5 = dst_list[file]['md5'] |
|
| 813 |
- elif src_remote == True and dst_remote == False: |
|
| 814 |
- src_md5 = src_list[file]['md5'] |
|
| 815 |
- dst_md5 = Utils.hash_file_md5(dst_list[file]['full_name']) |
|
| 816 |
- elif src_remote == True and dst_remote == True: |
|
| 817 |
- src_md5 = src_list[file]['md5'] |
|
| 818 |
- dst_md5 = dst_list[file]['md5'] |
|
| 819 |
- except (IOError,OSError), e: |
|
| 820 |
- # MD5 sum verification failed - ignore that file altogether |
|
| 821 |
- debug(u"IGNR: %s (disappeared)" % (file)) |
|
| 822 |
- warning(u"%s: file disappeared, ignoring." % (file)) |
|
| 823 |
- del(src_list[file]) |
|
| 824 |
- del(dst_list[file]) |
|
| 825 |
- continue |
|
| 826 |
- |
|
| 827 |
- if src_md5 != dst_md5: |
|
| 828 |
- ## Checksums are different. |
|
| 829 |
- attribs_match = False |
|
| 830 |
- debug(u"XFER: %s (md5 mismatch: src=%s dst=%s)" % (file, src_md5, dst_md5)) |
|
| 831 |
- |
|
| 832 |
- if attribs_match: |
|
| 833 |
- ## Remove from source-list, all that is left there will be transferred |
|
| 834 |
- debug(u"IGNR: %s (transfer not needed)" % file) |
|
| 835 |
- exists_list[file] = src_list[file] |
|
| 836 |
- del(src_list[file]) |
|
| 837 |
- |
|
| 838 |
- ## Remove from destination-list, all that is left there will be deleted |
|
| 839 |
- del(dst_list[file]) |
|
| 840 |
- |
|
| 841 |
- return src_list, dst_list, exists_list |
|
| 842 |
- |
|
| 843 | 654 |
def cmd_sync_remote2remote(args): |
| 844 | 655 |
s3 = S3(Config()) |
| 845 | 656 |
|
| ... | ... |
@@ -854,9 +536,9 @@ def cmd_sync_remote2remote(args): |
| 854 | 854 |
|
| 855 | 855 |
info(u"Found %d source files, %d destination files" % (src_count, dst_count)) |
| 856 | 856 |
|
| 857 |
- src_list, exclude_list = _filelist_filter_exclude_include(src_list) |
|
| 857 |
+ src_list, exclude_list = filter_exclude_include(src_list) |
|
| 858 | 858 |
|
| 859 |
- src_list, dst_list, existing_list = _compare_filelists(src_list, dst_list, src_remote = True, dst_remote = True) |
|
| 859 |
+ src_list, dst_list, existing_list = compare_filelists(src_list, dst_list, src_remote = True, dst_remote = True) |
|
| 860 | 860 |
|
| 861 | 861 |
src_count = len(src_list) |
| 862 | 862 |
dst_count = len(dst_list) |
| ... | ... |
@@ -933,9 +615,9 @@ def cmd_sync_remote2local(args): |
| 933 | 933 |
|
| 934 | 934 |
info(u"Found %d remote files, %d local files" % (remote_count, local_count)) |
| 935 | 935 |
|
| 936 |
- remote_list, exclude_list = _filelist_filter_exclude_include(remote_list) |
|
| 936 |
+ remote_list, exclude_list = filter_exclude_include(remote_list) |
|
| 937 | 937 |
|
| 938 |
- remote_list, local_list, existing_list = _compare_filelists(remote_list, local_list, src_remote = True, dst_remote = False) |
|
| 938 |
+ remote_list, local_list, existing_list = compare_filelists(remote_list, local_list, src_remote = True, dst_remote = False) |
|
| 939 | 939 |
|
| 940 | 940 |
local_count = len(local_list) |
| 941 | 941 |
remote_count = len(remote_list) |
| ... | ... |
@@ -1116,7 +798,7 @@ def cmd_sync_local2remote(args): |
| 1116 | 1116 |
|
| 1117 | 1117 |
info(u"Found %d local files, %d remote files" % (local_count, remote_count)) |
| 1118 | 1118 |
|
| 1119 |
- local_list, exclude_list = _filelist_filter_exclude_include(local_list) |
|
| 1119 |
+ local_list, exclude_list = filter_exclude_include(local_list) |
|
| 1120 | 1120 |
|
| 1121 | 1121 |
if single_file_local and len(local_list) == 1 and len(remote_list) == 1: |
| 1122 | 1122 |
## Make remote_key same as local_key for comparison if we're dealing with only one file |
| ... | ... |
@@ -1124,7 +806,7 @@ def cmd_sync_local2remote(args): |
| 1124 | 1124 |
# Flush remote_list, by the way |
| 1125 | 1125 |
remote_list = { local_list.keys()[0] : remote_list_entry }
|
| 1126 | 1126 |
|
| 1127 |
- local_list, remote_list, existing_list = _compare_filelists(local_list, remote_list, src_remote = False, dst_remote = True) |
|
| 1127 |
+ local_list, remote_list, existing_list = compare_filelists(local_list, remote_list, src_remote = False, dst_remote = True) |
|
| 1128 | 1128 |
|
| 1129 | 1129 |
local_count = len(local_list) |
| 1130 | 1130 |
remote_count = len(remote_list) |
| ... | ... |
@@ -1275,7 +957,7 @@ def cmd_setacl(args): |
| 1275 | 1275 |
args.append(arg) |
| 1276 | 1276 |
|
| 1277 | 1277 |
remote_list = fetch_remote_list(args) |
| 1278 |
- remote_list, exclude_list = _filelist_filter_exclude_include(remote_list) |
|
| 1278 |
+ remote_list, exclude_list = filter_exclude_include(remote_list) |
|
| 1279 | 1279 |
|
| 1280 | 1280 |
remote_count = len(remote_list) |
| 1281 | 1281 |
|
| ... | ... |
@@ -2002,6 +1684,7 @@ if __name__ == '__main__': |
| 2002 | 2002 |
from S3.Utils import * |
| 2003 | 2003 |
from S3.Progress import Progress |
| 2004 | 2004 |
from S3.CloudFront import Cmd as CfCmd |
| 2005 |
+ from S3.FileLists import * |
|
| 2005 | 2006 |
|
| 2006 | 2007 |
main() |
| 2007 | 2008 |
sys.exit(0) |