git-svn-id: https://s3tools.svn.sourceforge.net/svnroot/s3tools/s3cmd/trunk@467 830e0280-6d2a-0410-9c65-932aecc39d9d
Michal Ludvig authored on 2011/01/13 19:09:124 | 9 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,339 @@ |
0 |
+## Create and compare lists of files/objects |
|
1 |
+## Author: Michal Ludvig <michal@logix.cz> |
|
2 |
+## http://www.logix.cz/michal |
|
3 |
+## License: GPL Version 2 |
|
4 |
+ |
|
5 |
+from S3 import S3 |
|
6 |
+from Config import Config |
|
7 |
+from S3Uri import S3Uri |
|
8 |
+from SortedDict import SortedDict |
|
9 |
+from Utils import * |
|
10 |
+ |
|
11 |
+from logging import debug, info, warning, error |
|
12 |
+ |
|
13 |
+import os |
|
14 |
+import glob |
|
15 |
+ |
|
16 |
+__all__ = ["fetch_local_list", "fetch_remote_list", "compare_filelists", "filter_exclude_include"] |
|
17 |
+ |
|
18 |
+def _fswalk_follow_symlinks(path): |
|
19 |
+ ''' |
|
20 |
+ Walk filesystem, following symbolic links (but without recursion), on python2.4 and later |
|
21 |
+ |
|
22 |
+ If a recursive directory link is detected, emit a warning and skip. |
|
23 |
+ ''' |
|
24 |
+ assert os.path.isdir(path) # only designed for directory argument |
|
25 |
+ walkdirs = set([path]) |
|
26 |
+ targets = set() |
|
27 |
+ for dirpath, dirnames, filenames in os.walk(path): |
|
28 |
+ for dirname in dirnames: |
|
29 |
+ current = os.path.join(dirpath, dirname) |
|
30 |
+ target = os.path.realpath(current) |
|
31 |
+ if os.path.islink(current): |
|
32 |
+ if target in targets: |
|
33 |
+ warning("Skipping recursively symlinked directory %s" % dirname) |
|
34 |
+ else: |
|
35 |
+ walkdirs.add(current) |
|
36 |
+ targets.add(target) |
|
37 |
+ for walkdir in walkdirs: |
|
38 |
+ for value in os.walk(walkdir): |
|
39 |
+ yield value |
|
40 |
+ |
|
41 |
+def _fswalk(path, follow_symlinks): |
|
42 |
+ ''' |
|
43 |
+ Directory tree generator |
|
44 |
+ |
|
45 |
+ path (str) is the root of the directory tree to walk |
|
46 |
+ |
|
47 |
+ follow_symlinks (bool) indicates whether to descend into symbolically linked directories |
|
48 |
+ ''' |
|
49 |
+ if follow_symlinks: |
|
50 |
+ return _fswalk_follow_symlinks(path) |
|
51 |
+ return os.walk(path) |
|
52 |
+ |
|
53 |
+def filter_exclude_include(src_list): |
|
54 |
+ info(u"Applying --exclude/--include") |
|
55 |
+ cfg = Config() |
|
56 |
+ exclude_list = SortedDict(ignore_case = False) |
|
57 |
+ for file in src_list.keys(): |
|
58 |
+ debug(u"CHECK: %s" % file) |
|
59 |
+ excluded = False |
|
60 |
+ for r in cfg.exclude: |
|
61 |
+ if r.search(file): |
|
62 |
+ excluded = True |
|
63 |
+ debug(u"EXCL-MATCH: '%s'" % (cfg.debug_exclude[r])) |
|
64 |
+ break |
|
65 |
+ if excluded: |
|
66 |
+ ## No need to check for --include if not excluded |
|
67 |
+ for r in cfg.include: |
|
68 |
+ if r.search(file): |
|
69 |
+ excluded = False |
|
70 |
+ debug(u"INCL-MATCH: '%s'" % (cfg.debug_include[r])) |
|
71 |
+ break |
|
72 |
+ if excluded: |
|
73 |
+ ## Still excluded - ok, action it |
|
74 |
+ debug(u"EXCLUDE: %s" % file) |
|
75 |
+ exclude_list[file] = src_list[file] |
|
76 |
+ del(src_list[file]) |
|
77 |
+ continue |
|
78 |
+ else: |
|
79 |
+ debug(u"PASS: %s" % (file)) |
|
80 |
+ return src_list, exclude_list |
|
81 |
+ |
|
82 |
+def fetch_local_list(args, recursive = None): |
|
83 |
+ def _get_filelist_local(local_uri): |
|
84 |
+ info(u"Compiling list of local files...") |
|
85 |
+ if local_uri.isdir(): |
|
86 |
+ local_base = deunicodise(local_uri.basename()) |
|
87 |
+ local_path = deunicodise(local_uri.path()) |
|
88 |
+ filelist = _fswalk(local_path, cfg.follow_symlinks) |
|
89 |
+ single_file = False |
|
90 |
+ else: |
|
91 |
+ local_base = "" |
|
92 |
+ local_path = deunicodise(local_uri.dirname()) |
|
93 |
+ filelist = [( local_path, [], [deunicodise(local_uri.basename())] )] |
|
94 |
+ single_file = True |
|
95 |
+ loc_list = SortedDict(ignore_case = False) |
|
96 |
+ for root, dirs, files in filelist: |
|
97 |
+ rel_root = root.replace(local_path, local_base, 1) |
|
98 |
+ for f in files: |
|
99 |
+ full_name = os.path.join(root, f) |
|
100 |
+ if not os.path.isfile(full_name): |
|
101 |
+ continue |
|
102 |
+ if os.path.islink(full_name): |
|
103 |
+ if not cfg.follow_symlinks: |
|
104 |
+ continue |
|
105 |
+ relative_file = unicodise(os.path.join(rel_root, f)) |
|
106 |
+ if os.path.sep != "/": |
|
107 |
+ # Convert non-unix dir separators to '/' |
|
108 |
+ relative_file = "/".join(relative_file.split(os.path.sep)) |
|
109 |
+ if cfg.urlencoding_mode == "normal": |
|
110 |
+ relative_file = replace_nonprintables(relative_file) |
|
111 |
+ if relative_file.startswith('./'): |
|
112 |
+ relative_file = relative_file[2:] |
|
113 |
+ sr = os.stat_result(os.lstat(full_name)) |
|
114 |
+ loc_list[relative_file] = { |
|
115 |
+ 'full_name_unicode' : unicodise(full_name), |
|
116 |
+ 'full_name' : full_name, |
|
117 |
+ 'size' : sr.st_size, |
|
118 |
+ 'mtime' : sr.st_mtime, |
|
119 |
+ ## TODO: Possibly more to save here... |
|
120 |
+ } |
|
121 |
+ return loc_list, single_file |
|
122 |
+ |
|
123 |
+ cfg = Config() |
|
124 |
+ local_uris = [] |
|
125 |
+ local_list = SortedDict(ignore_case = False) |
|
126 |
+ single_file = False |
|
127 |
+ |
|
128 |
+ if type(args) not in (list, tuple): |
|
129 |
+ args = [args] |
|
130 |
+ |
|
131 |
+ if recursive == None: |
|
132 |
+ recursive = cfg.recursive |
|
133 |
+ |
|
134 |
+ for arg in args: |
|
135 |
+ uri = S3Uri(arg) |
|
136 |
+ if not uri.type == 'file': |
|
137 |
+ raise ParameterError("Expecting filename or directory instead of: %s" % arg) |
|
138 |
+ if uri.isdir() and not recursive: |
|
139 |
+ raise ParameterError("Use --recursive to upload a directory: %s" % arg) |
|
140 |
+ local_uris.append(uri) |
|
141 |
+ |
|
142 |
+ for uri in local_uris: |
|
143 |
+ list_for_uri, single_file = _get_filelist_local(uri) |
|
144 |
+ local_list.update(list_for_uri) |
|
145 |
+ |
|
146 |
+ ## Single file is True if and only if the user |
|
147 |
+ ## specified one local URI and that URI represents |
|
148 |
+ ## a FILE. Ie it is False if the URI was of a DIR |
|
149 |
+ ## and that dir contained only one FILE. That's not |
|
150 |
+ ## a case of single_file==True. |
|
151 |
+ if len(local_list) > 1: |
|
152 |
+ single_file = False |
|
153 |
+ |
|
154 |
+ return local_list, single_file |
|
155 |
+ |
|
156 |
+def fetch_remote_list(args, require_attribs = False, recursive = None): |
|
157 |
+ def _get_filelist_remote(remote_uri, recursive = True): |
|
158 |
+ ## If remote_uri ends with '/' then all remote files will have |
|
159 |
+ ## the remote_uri prefix removed in the relative path. |
|
160 |
+ ## If, on the other hand, the remote_uri ends with something else |
|
161 |
+ ## (probably alphanumeric symbol) we'll use the last path part |
|
162 |
+ ## in the relative path. |
|
163 |
+ ## |
|
164 |
+ ## Complicated, eh? See an example: |
|
165 |
+ ## _get_filelist_remote("s3://bckt/abc/def") may yield: |
|
166 |
+ ## { 'def/file1.jpg' : {}, 'def/xyz/blah.txt' : {} } |
|
167 |
+ ## _get_filelist_remote("s3://bckt/abc/def/") will yield: |
|
168 |
+ ## { 'file1.jpg' : {}, 'xyz/blah.txt' : {} } |
|
169 |
+ ## Furthermore a prefix-magic can restrict the return list: |
|
170 |
+ ## _get_filelist_remote("s3://bckt/abc/def/x") yields: |
|
171 |
+ ## { 'xyz/blah.txt' : {} } |
|
172 |
+ |
|
173 |
+ info(u"Retrieving list of remote files for %s ..." % remote_uri) |
|
174 |
+ |
|
175 |
+ s3 = S3(Config()) |
|
176 |
+ response = s3.bucket_list(remote_uri.bucket(), prefix = remote_uri.object(), recursive = recursive) |
|
177 |
+ |
|
178 |
+ rem_base_original = rem_base = remote_uri.object() |
|
179 |
+ remote_uri_original = remote_uri |
|
180 |
+ if rem_base != '' and rem_base[-1] != '/': |
|
181 |
+ rem_base = rem_base[:rem_base.rfind('/')+1] |
|
182 |
+ remote_uri = S3Uri("s3://%s/%s" % (remote_uri.bucket(), rem_base)) |
|
183 |
+ rem_base_len = len(rem_base) |
|
184 |
+ rem_list = SortedDict(ignore_case = False) |
|
185 |
+ break_now = False |
|
186 |
+ for object in response['list']: |
|
187 |
+ if object['Key'] == rem_base_original and object['Key'][-1] != os.path.sep: |
|
188 |
+ ## We asked for one file and we got that file :-) |
|
189 |
+ key = os.path.basename(object['Key']) |
|
190 |
+ object_uri_str = remote_uri_original.uri() |
|
191 |
+ break_now = True |
|
192 |
+ rem_list = {} ## Remove whatever has already been put to rem_list |
|
193 |
+ else: |
|
194 |
+ key = object['Key'][rem_base_len:] ## Beware - this may be '' if object['Key']==rem_base !! |
|
195 |
+ object_uri_str = remote_uri.uri() + key |
|
196 |
+ rem_list[key] = { |
|
197 |
+ 'size' : int(object['Size']), |
|
198 |
+ 'timestamp' : dateS3toUnix(object['LastModified']), ## Sadly it's upload time, not our lastmod time :-( |
|
199 |
+ 'md5' : object['ETag'][1:-1], |
|
200 |
+ 'object_key' : object['Key'], |
|
201 |
+ 'object_uri_str' : object_uri_str, |
|
202 |
+ 'base_uri' : remote_uri, |
|
203 |
+ } |
|
204 |
+ if break_now: |
|
205 |
+ break |
|
206 |
+ return rem_list |
|
207 |
+ |
|
208 |
+ cfg = Config() |
|
209 |
+ remote_uris = [] |
|
210 |
+ remote_list = SortedDict(ignore_case = False) |
|
211 |
+ |
|
212 |
+ if type(args) not in (list, tuple): |
|
213 |
+ args = [args] |
|
214 |
+ |
|
215 |
+ if recursive == None: |
|
216 |
+ recursive = cfg.recursive |
|
217 |
+ |
|
218 |
+ for arg in args: |
|
219 |
+ uri = S3Uri(arg) |
|
220 |
+ if not uri.type == 's3': |
|
221 |
+ raise ParameterError("Expecting S3 URI instead of '%s'" % arg) |
|
222 |
+ remote_uris.append(uri) |
|
223 |
+ |
|
224 |
+ if recursive: |
|
225 |
+ for uri in remote_uris: |
|
226 |
+ objectlist = _get_filelist_remote(uri) |
|
227 |
+ for key in objectlist: |
|
228 |
+ remote_list[key] = objectlist[key] |
|
229 |
+ else: |
|
230 |
+ for uri in remote_uris: |
|
231 |
+ uri_str = str(uri) |
|
232 |
+ ## Wildcards used in remote URI? |
|
233 |
+ ## If yes we'll need a bucket listing... |
|
234 |
+ if uri_str.find('*') > -1 or uri_str.find('?') > -1: |
|
235 |
+ first_wildcard = uri_str.find('*') |
|
236 |
+ first_questionmark = uri_str.find('?') |
|
237 |
+ if first_questionmark > -1 and first_questionmark < first_wildcard: |
|
238 |
+ first_wildcard = first_questionmark |
|
239 |
+ prefix = uri_str[:first_wildcard] |
|
240 |
+ rest = uri_str[first_wildcard+1:] |
|
241 |
+ ## Only request recursive listing if the 'rest' of the URI, |
|
242 |
+ ## i.e. the part after first wildcard, contains '/' |
|
243 |
+ need_recursion = rest.find('/') > -1 |
|
244 |
+ objectlist = _get_filelist_remote(S3Uri(prefix), recursive = need_recursion) |
|
245 |
+ for key in objectlist: |
|
246 |
+ ## Check whether the 'key' matches the requested wildcards |
|
247 |
+ if glob.fnmatch.fnmatch(objectlist[key]['object_uri_str'], uri_str): |
|
248 |
+ remote_list[key] = objectlist[key] |
|
249 |
+ else: |
|
250 |
+ ## No wildcards - simply append the given URI to the list |
|
251 |
+ key = os.path.basename(uri.object()) |
|
252 |
+ if not key: |
|
253 |
+ raise ParameterError(u"Expecting S3 URI with a filename or --recursive: %s" % uri.uri()) |
|
254 |
+ remote_item = { |
|
255 |
+ 'base_uri': uri, |
|
256 |
+ 'object_uri_str': unicode(uri), |
|
257 |
+ 'object_key': uri.object() |
|
258 |
+ } |
|
259 |
+ if require_attribs: |
|
260 |
+ response = S3(cfg).object_info(uri) |
|
261 |
+ remote_item.update({ |
|
262 |
+ 'size': int(response['headers']['content-length']), |
|
263 |
+ 'md5': response['headers']['etag'].strip('"\''), |
|
264 |
+ 'timestamp' : dateRFC822toUnix(response['headers']['date']) |
|
265 |
+ }) |
|
266 |
+ remote_list[key] = remote_item |
|
267 |
+ return remote_list |
|
268 |
+ |
|
269 |
+def compare_filelists(src_list, dst_list, src_remote, dst_remote): |
|
270 |
+ def __direction_str(is_remote): |
|
271 |
+ return is_remote and "remote" or "local" |
|
272 |
+ |
|
273 |
+ # We don't support local->local sync, use 'rsync' or something like that instead ;-) |
|
274 |
+ assert(not(src_remote == False and dst_remote == False)) |
|
275 |
+ |
|
276 |
+ info(u"Verifying attributes...") |
|
277 |
+ cfg = Config() |
|
278 |
+ exists_list = SortedDict(ignore_case = False) |
|
279 |
+ |
|
280 |
+ debug("Comparing filelists (direction: %s -> %s)" % (__direction_str(src_remote), __direction_str(dst_remote))) |
|
281 |
+ debug("src_list.keys: %s" % src_list.keys()) |
|
282 |
+ debug("dst_list.keys: %s" % dst_list.keys()) |
|
283 |
+ |
|
284 |
+ for file in src_list.keys(): |
|
285 |
+ debug(u"CHECK: %s" % file) |
|
286 |
+ if dst_list.has_key(file): |
|
287 |
+ ## Was --skip-existing requested? |
|
288 |
+ if cfg.skip_existing: |
|
289 |
+ debug(u"IGNR: %s (used --skip-existing)" % (file)) |
|
290 |
+ exists_list[file] = src_list[file] |
|
291 |
+ del(src_list[file]) |
|
292 |
+ ## Remove from destination-list, all that is left there will be deleted |
|
293 |
+ del(dst_list[file]) |
|
294 |
+ continue |
|
295 |
+ |
|
296 |
+ attribs_match = True |
|
297 |
+ ## Check size first |
|
298 |
+ if 'size' in cfg.sync_checks and dst_list[file]['size'] != src_list[file]['size']: |
|
299 |
+ debug(u"XFER: %s (size mismatch: src=%s dst=%s)" % (file, src_list[file]['size'], dst_list[file]['size'])) |
|
300 |
+ attribs_match = False |
|
301 |
+ |
|
302 |
+ if attribs_match and 'md5' in cfg.sync_checks: |
|
303 |
+ ## ... same size, check MD5 |
|
304 |
+ try: |
|
305 |
+ if src_remote == False and dst_remote == True: |
|
306 |
+ src_md5 = hash_file_md5(src_list[file]['full_name']) |
|
307 |
+ dst_md5 = dst_list[file]['md5'] |
|
308 |
+ elif src_remote == True and dst_remote == False: |
|
309 |
+ src_md5 = src_list[file]['md5'] |
|
310 |
+ dst_md5 = hash_file_md5(dst_list[file]['full_name']) |
|
311 |
+ elif src_remote == True and dst_remote == True: |
|
312 |
+ src_md5 = src_list[file]['md5'] |
|
313 |
+ dst_md5 = dst_list[file]['md5'] |
|
314 |
+ except (IOError,OSError), e: |
|
315 |
+ # MD5 sum verification failed - ignore that file altogether |
|
316 |
+ debug(u"IGNR: %s (disappeared)" % (file)) |
|
317 |
+ warning(u"%s: file disappeared, ignoring." % (file)) |
|
318 |
+ del(src_list[file]) |
|
319 |
+ del(dst_list[file]) |
|
320 |
+ continue |
|
321 |
+ |
|
322 |
+ if src_md5 != dst_md5: |
|
323 |
+ ## Checksums are different. |
|
324 |
+ attribs_match = False |
|
325 |
+ debug(u"XFER: %s (md5 mismatch: src=%s dst=%s)" % (file, src_md5, dst_md5)) |
|
326 |
+ |
|
327 |
+ if attribs_match: |
|
328 |
+ ## Remove from source-list, all that is left there will be transferred |
|
329 |
+ debug(u"IGNR: %s (transfer not needed)" % file) |
|
330 |
+ exists_list[file] = src_list[file] |
|
331 |
+ del(src_list[file]) |
|
332 |
+ |
|
333 |
+ ## Remove from destination-list, all that is left there will be deleted |
|
334 |
+ del(dst_list[file]) |
|
335 |
+ |
|
336 |
+ return src_list, dst_list, exists_list |
|
337 |
+ |
|
338 |
+ |
... | ... |
@@ -37,41 +37,6 @@ def check_args_type(args, type, verbose_type): |
37 | 37 |
if S3Uri(arg).type != type: |
38 | 38 |
raise ParameterError("Expecting %s instead of '%s'" % (verbose_type, arg)) |
39 | 39 |
|
40 |
-def _fswalk_follow_symlinks(path): |
|
41 |
- ''' |
|
42 |
- Walk filesystem, following symbolic links (but without recursion), on python2.4 and later |
|
43 |
- |
|
44 |
- If a recursive directory link is detected, emit a warning and skip. |
|
45 |
- ''' |
|
46 |
- assert os.path.isdir(path) # only designed for directory argument |
|
47 |
- walkdirs = set([path]) |
|
48 |
- targets = set() |
|
49 |
- for dirpath, dirnames, filenames in os.walk(path): |
|
50 |
- for dirname in dirnames: |
|
51 |
- current = os.path.join(dirpath, dirname) |
|
52 |
- target = os.path.realpath(current) |
|
53 |
- if os.path.islink(current): |
|
54 |
- if target in targets: |
|
55 |
- warning("Skipping recursively symlinked directory %s" % dirname) |
|
56 |
- else: |
|
57 |
- walkdirs.add(current) |
|
58 |
- targets.add(target) |
|
59 |
- for walkdir in walkdirs: |
|
60 |
- for value in os.walk(walkdir): |
|
61 |
- yield value |
|
62 |
- |
|
63 |
-def fswalk(path, follow_symlinks): |
|
64 |
- ''' |
|
65 |
- Directory tree generator |
|
66 |
- |
|
67 |
- path (str) is the root of the directory tree to walk |
|
68 |
- |
|
69 |
- follow_symlinks (bool) indicates whether to descend into symbolically linked directories |
|
70 |
- ''' |
|
71 |
- if follow_symlinks: |
|
72 |
- return _fswalk_follow_symlinks(path) |
|
73 |
- return os.walk(path) |
|
74 |
- |
|
75 | 40 |
def cmd_du(args): |
76 | 41 |
s3 = S3(Config()) |
77 | 42 |
if len(args) > 0: |
... | ... |
@@ -222,100 +187,6 @@ def cmd_bucket_delete(args): |
222 | 222 |
_bucket_delete_one(uri) |
223 | 223 |
output(u"Bucket '%s' removed" % uri.uri()) |
224 | 224 |
|
225 |
-def fetch_local_list(args, recursive = None): |
|
226 |
- local_uris = [] |
|
227 |
- local_list = SortedDict(ignore_case = False) |
|
228 |
- single_file = False |
|
229 |
- |
|
230 |
- if type(args) not in (list, tuple): |
|
231 |
- args = [args] |
|
232 |
- |
|
233 |
- if recursive == None: |
|
234 |
- recursive = cfg.recursive |
|
235 |
- |
|
236 |
- for arg in args: |
|
237 |
- uri = S3Uri(arg) |
|
238 |
- if not uri.type == 'file': |
|
239 |
- raise ParameterError("Expecting filename or directory instead of: %s" % arg) |
|
240 |
- if uri.isdir() and not recursive: |
|
241 |
- raise ParameterError("Use --recursive to upload a directory: %s" % arg) |
|
242 |
- local_uris.append(uri) |
|
243 |
- |
|
244 |
- for uri in local_uris: |
|
245 |
- list_for_uri, single_file = _get_filelist_local(uri) |
|
246 |
- local_list.update(list_for_uri) |
|
247 |
- |
|
248 |
- ## Single file is True if and only if the user |
|
249 |
- ## specified one local URI and that URI represents |
|
250 |
- ## a FILE. Ie it is False if the URI was of a DIR |
|
251 |
- ## and that dir contained only one FILE. That's not |
|
252 |
- ## a case of single_file==True. |
|
253 |
- if len(local_list) > 1: |
|
254 |
- single_file = False |
|
255 |
- |
|
256 |
- return local_list, single_file |
|
257 |
- |
|
258 |
-def fetch_remote_list(args, require_attribs = False, recursive = None): |
|
259 |
- remote_uris = [] |
|
260 |
- remote_list = SortedDict(ignore_case = False) |
|
261 |
- |
|
262 |
- if type(args) not in (list, tuple): |
|
263 |
- args = [args] |
|
264 |
- |
|
265 |
- if recursive == None: |
|
266 |
- recursive = cfg.recursive |
|
267 |
- |
|
268 |
- for arg in args: |
|
269 |
- uri = S3Uri(arg) |
|
270 |
- if not uri.type == 's3': |
|
271 |
- raise ParameterError("Expecting S3 URI instead of '%s'" % arg) |
|
272 |
- remote_uris.append(uri) |
|
273 |
- |
|
274 |
- if recursive: |
|
275 |
- for uri in remote_uris: |
|
276 |
- objectlist = _get_filelist_remote(uri) |
|
277 |
- for key in objectlist: |
|
278 |
- remote_list[key] = objectlist[key] |
|
279 |
- else: |
|
280 |
- for uri in remote_uris: |
|
281 |
- uri_str = str(uri) |
|
282 |
- ## Wildcards used in remote URI? |
|
283 |
- ## If yes we'll need a bucket listing... |
|
284 |
- if uri_str.find('*') > -1 or uri_str.find('?') > -1: |
|
285 |
- first_wildcard = uri_str.find('*') |
|
286 |
- first_questionmark = uri_str.find('?') |
|
287 |
- if first_questionmark > -1 and first_questionmark < first_wildcard: |
|
288 |
- first_wildcard = first_questionmark |
|
289 |
- prefix = uri_str[:first_wildcard] |
|
290 |
- rest = uri_str[first_wildcard+1:] |
|
291 |
- ## Only request recursive listing if the 'rest' of the URI, |
|
292 |
- ## i.e. the part after first wildcard, contains '/' |
|
293 |
- need_recursion = rest.find('/') > -1 |
|
294 |
- objectlist = _get_filelist_remote(S3Uri(prefix), recursive = need_recursion) |
|
295 |
- for key in objectlist: |
|
296 |
- ## Check whether the 'key' matches the requested wildcards |
|
297 |
- if glob.fnmatch.fnmatch(objectlist[key]['object_uri_str'], uri_str): |
|
298 |
- remote_list[key] = objectlist[key] |
|
299 |
- else: |
|
300 |
- ## No wildcards - simply append the given URI to the list |
|
301 |
- key = os.path.basename(uri.object()) |
|
302 |
- if not key: |
|
303 |
- raise ParameterError(u"Expecting S3 URI with a filename or --recursive: %s" % uri.uri()) |
|
304 |
- remote_item = { |
|
305 |
- 'base_uri': uri, |
|
306 |
- 'object_uri_str': unicode(uri), |
|
307 |
- 'object_key': uri.object() |
|
308 |
- } |
|
309 |
- if require_attribs: |
|
310 |
- response = S3(cfg).object_info(uri) |
|
311 |
- remote_item.update({ |
|
312 |
- 'size': int(response['headers']['content-length']), |
|
313 |
- 'md5': response['headers']['etag'].strip('"\''), |
|
314 |
- 'timestamp' : Utils.dateRFC822toUnix(response['headers']['date']) |
|
315 |
- }) |
|
316 |
- remote_list[key] = remote_item |
|
317 |
- return remote_list |
|
318 |
- |
|
319 | 225 |
def cmd_object_put(args): |
320 | 226 |
cfg = Config() |
321 | 227 |
s3 = S3(cfg) |
... | ... |
@@ -334,7 +205,7 @@ def cmd_object_put(args): |
334 | 334 |
|
335 | 335 |
local_list, single_file_local = fetch_local_list(args) |
336 | 336 |
|
337 |
- local_list, exclude_list = _filelist_filter_exclude_include(local_list) |
|
337 |
+ local_list, exclude_list = filter_exclude_include(local_list) |
|
338 | 338 |
|
339 | 339 |
local_count = len(local_list) |
340 | 340 |
|
... | ... |
@@ -437,7 +308,7 @@ def cmd_object_get(args): |
437 | 437 |
raise ParameterError("Nothing to download. Expecting S3 URI.") |
438 | 438 |
|
439 | 439 |
remote_list = fetch_remote_list(args, require_attribs = False) |
440 |
- remote_list, exclude_list = _filelist_filter_exclude_include(remote_list) |
|
440 |
+ remote_list, exclude_list = filter_exclude_include(remote_list) |
|
441 | 441 |
|
442 | 442 |
remote_count = len(remote_list) |
443 | 443 |
|
... | ... |
@@ -538,7 +409,7 @@ def subcmd_object_del_uri(uri_str, recursive = None): |
538 | 538 |
recursive = cfg.recursive |
539 | 539 |
|
540 | 540 |
remote_list = fetch_remote_list(uri_str, require_attribs = False, recursive = recursive) |
541 |
- remote_list, exclude_list = _filelist_filter_exclude_include(remote_list) |
|
541 |
+ remote_list, exclude_list = filter_exclude_include(remote_list) |
|
542 | 542 |
|
543 | 543 |
remote_count = len(remote_list) |
544 | 544 |
|
... | ... |
@@ -567,7 +438,7 @@ def subcmd_cp_mv(args, process_fce, action_str, message): |
567 | 567 |
destination_base = dst_base_uri.uri() |
568 | 568 |
|
569 | 569 |
remote_list = fetch_remote_list(args, require_attribs = False) |
570 |
- remote_list, exclude_list = _filelist_filter_exclude_include(remote_list) |
|
570 |
+ remote_list, exclude_list = filter_exclude_include(remote_list) |
|
571 | 571 |
|
572 | 572 |
remote_count = len(remote_list) |
573 | 573 |
|
... | ... |
@@ -651,195 +522,6 @@ def cmd_info(args): |
651 | 651 |
else: |
652 | 652 |
raise |
653 | 653 |
|
654 |
-def _get_filelist_local(local_uri): |
|
655 |
- info(u"Compiling list of local files...") |
|
656 |
- if local_uri.isdir(): |
|
657 |
- local_base = deunicodise(local_uri.basename()) |
|
658 |
- local_path = deunicodise(local_uri.path()) |
|
659 |
- filelist = fswalk(local_path, cfg.follow_symlinks) |
|
660 |
- single_file = False |
|
661 |
- else: |
|
662 |
- local_base = "" |
|
663 |
- local_path = deunicodise(local_uri.dirname()) |
|
664 |
- filelist = [( local_path, [], [deunicodise(local_uri.basename())] )] |
|
665 |
- single_file = True |
|
666 |
- loc_list = SortedDict(ignore_case = False) |
|
667 |
- for root, dirs, files in filelist: |
|
668 |
- rel_root = root.replace(local_path, local_base, 1) |
|
669 |
- for f in files: |
|
670 |
- full_name = os.path.join(root, f) |
|
671 |
- if not os.path.isfile(full_name): |
|
672 |
- continue |
|
673 |
- if os.path.islink(full_name): |
|
674 |
- if not cfg.follow_symlinks: |
|
675 |
- continue |
|
676 |
- relative_file = unicodise(os.path.join(rel_root, f)) |
|
677 |
- if os.path.sep != "/": |
|
678 |
- # Convert non-unix dir separators to '/' |
|
679 |
- relative_file = "/".join(relative_file.split(os.path.sep)) |
|
680 |
- if cfg.urlencoding_mode == "normal": |
|
681 |
- relative_file = replace_nonprintables(relative_file) |
|
682 |
- if relative_file.startswith('./'): |
|
683 |
- relative_file = relative_file[2:] |
|
684 |
- sr = os.stat_result(os.lstat(full_name)) |
|
685 |
- loc_list[relative_file] = { |
|
686 |
- 'full_name_unicode' : unicodise(full_name), |
|
687 |
- 'full_name' : full_name, |
|
688 |
- 'size' : sr.st_size, |
|
689 |
- 'mtime' : sr.st_mtime, |
|
690 |
- ## TODO: Possibly more to save here... |
|
691 |
- } |
|
692 |
- return loc_list, single_file |
|
693 |
- |
|
694 |
-def _get_filelist_remote(remote_uri, recursive = True): |
|
695 |
- ## If remote_uri ends with '/' then all remote files will have |
|
696 |
- ## the remote_uri prefix removed in the relative path. |
|
697 |
- ## If, on the other hand, the remote_uri ends with something else |
|
698 |
- ## (probably alphanumeric symbol) we'll use the last path part |
|
699 |
- ## in the relative path. |
|
700 |
- ## |
|
701 |
- ## Complicated, eh? See an example: |
|
702 |
- ## _get_filelist_remote("s3://bckt/abc/def") may yield: |
|
703 |
- ## { 'def/file1.jpg' : {}, 'def/xyz/blah.txt' : {} } |
|
704 |
- ## _get_filelist_remote("s3://bckt/abc/def/") will yield: |
|
705 |
- ## { 'file1.jpg' : {}, 'xyz/blah.txt' : {} } |
|
706 |
- ## Furthermore a prefix-magic can restrict the return list: |
|
707 |
- ## _get_filelist_remote("s3://bckt/abc/def/x") yields: |
|
708 |
- ## { 'xyz/blah.txt' : {} } |
|
709 |
- |
|
710 |
- info(u"Retrieving list of remote files for %s ..." % remote_uri) |
|
711 |
- |
|
712 |
- s3 = S3(Config()) |
|
713 |
- response = s3.bucket_list(remote_uri.bucket(), prefix = remote_uri.object(), recursive = recursive) |
|
714 |
- |
|
715 |
- rem_base_original = rem_base = remote_uri.object() |
|
716 |
- remote_uri_original = remote_uri |
|
717 |
- if rem_base != '' and rem_base[-1] != '/': |
|
718 |
- rem_base = rem_base[:rem_base.rfind('/')+1] |
|
719 |
- remote_uri = S3Uri("s3://%s/%s" % (remote_uri.bucket(), rem_base)) |
|
720 |
- rem_base_len = len(rem_base) |
|
721 |
- rem_list = SortedDict(ignore_case = False) |
|
722 |
- break_now = False |
|
723 |
- for object in response['list']: |
|
724 |
- if object['Key'] == rem_base_original and object['Key'][-1] != os.path.sep: |
|
725 |
- ## We asked for one file and we got that file :-) |
|
726 |
- key = os.path.basename(object['Key']) |
|
727 |
- object_uri_str = remote_uri_original.uri() |
|
728 |
- break_now = True |
|
729 |
- rem_list = {} ## Remove whatever has already been put to rem_list |
|
730 |
- else: |
|
731 |
- key = object['Key'][rem_base_len:] ## Beware - this may be '' if object['Key']==rem_base !! |
|
732 |
- object_uri_str = remote_uri.uri() + key |
|
733 |
- rem_list[key] = { |
|
734 |
- 'size' : int(object['Size']), |
|
735 |
- 'timestamp' : dateS3toUnix(object['LastModified']), ## Sadly it's upload time, not our lastmod time :-( |
|
736 |
- 'md5' : object['ETag'][1:-1], |
|
737 |
- 'object_key' : object['Key'], |
|
738 |
- 'object_uri_str' : object_uri_str, |
|
739 |
- 'base_uri' : remote_uri, |
|
740 |
- } |
|
741 |
- if break_now: |
|
742 |
- break |
|
743 |
- return rem_list |
|
744 |
- |
|
745 |
-def _filelist_filter_exclude_include(src_list): |
|
746 |
- info(u"Applying --exclude/--include") |
|
747 |
- cfg = Config() |
|
748 |
- exclude_list = SortedDict(ignore_case = False) |
|
749 |
- for file in src_list.keys(): |
|
750 |
- debug(u"CHECK: %s" % file) |
|
751 |
- excluded = False |
|
752 |
- for r in cfg.exclude: |
|
753 |
- if r.search(file): |
|
754 |
- excluded = True |
|
755 |
- debug(u"EXCL-MATCH: '%s'" % (cfg.debug_exclude[r])) |
|
756 |
- break |
|
757 |
- if excluded: |
|
758 |
- ## No need to check for --include if not excluded |
|
759 |
- for r in cfg.include: |
|
760 |
- if r.search(file): |
|
761 |
- excluded = False |
|
762 |
- debug(u"INCL-MATCH: '%s'" % (cfg.debug_include[r])) |
|
763 |
- break |
|
764 |
- if excluded: |
|
765 |
- ## Still excluded - ok, action it |
|
766 |
- debug(u"EXCLUDE: %s" % file) |
|
767 |
- exclude_list[file] = src_list[file] |
|
768 |
- del(src_list[file]) |
|
769 |
- continue |
|
770 |
- else: |
|
771 |
- debug(u"PASS: %s" % (file)) |
|
772 |
- return src_list, exclude_list |
|
773 |
- |
|
774 |
-def _compare_filelists(src_list, dst_list, src_remote, dst_remote): |
|
775 |
- def __direction_str(is_remote): |
|
776 |
- return is_remote and "remote" or "local" |
|
777 |
- |
|
778 |
- # We don't support local->local sync, use 'rsync' or something like that instead ;-) |
|
779 |
- assert(not(src_remote == False and dst_remote == False)) |
|
780 |
- |
|
781 |
- info(u"Verifying attributes...") |
|
782 |
- cfg = Config() |
|
783 |
- exists_list = SortedDict(ignore_case = False) |
|
784 |
- |
|
785 |
- debug("Comparing filelists (direction: %s -> %s)" % (__direction_str(src_remote), __direction_str(dst_remote))) |
|
786 |
- debug("src_list.keys: %s" % src_list.keys()) |
|
787 |
- debug("dst_list.keys: %s" % dst_list.keys()) |
|
788 |
- |
|
789 |
- for file in src_list.keys(): |
|
790 |
- debug(u"CHECK: %s" % file) |
|
791 |
- if dst_list.has_key(file): |
|
792 |
- ## Was --skip-existing requested? |
|
793 |
- if cfg.skip_existing: |
|
794 |
- debug(u"IGNR: %s (used --skip-existing)" % (file)) |
|
795 |
- exists_list[file] = src_list[file] |
|
796 |
- del(src_list[file]) |
|
797 |
- ## Remove from destination-list, all that is left there will be deleted |
|
798 |
- del(dst_list[file]) |
|
799 |
- continue |
|
800 |
- |
|
801 |
- attribs_match = True |
|
802 |
- ## Check size first |
|
803 |
- if 'size' in cfg.sync_checks and dst_list[file]['size'] != src_list[file]['size']: |
|
804 |
- debug(u"XFER: %s (size mismatch: src=%s dst=%s)" % (file, src_list[file]['size'], dst_list[file]['size'])) |
|
805 |
- attribs_match = False |
|
806 |
- |
|
807 |
- if attribs_match and 'md5' in cfg.sync_checks: |
|
808 |
- ## ... same size, check MD5 |
|
809 |
- try: |
|
810 |
- if src_remote == False and dst_remote == True: |
|
811 |
- src_md5 = Utils.hash_file_md5(src_list[file]['full_name']) |
|
812 |
- dst_md5 = dst_list[file]['md5'] |
|
813 |
- elif src_remote == True and dst_remote == False: |
|
814 |
- src_md5 = src_list[file]['md5'] |
|
815 |
- dst_md5 = Utils.hash_file_md5(dst_list[file]['full_name']) |
|
816 |
- elif src_remote == True and dst_remote == True: |
|
817 |
- src_md5 = src_list[file]['md5'] |
|
818 |
- dst_md5 = dst_list[file]['md5'] |
|
819 |
- except (IOError,OSError), e: |
|
820 |
- # MD5 sum verification failed - ignore that file altogether |
|
821 |
- debug(u"IGNR: %s (disappeared)" % (file)) |
|
822 |
- warning(u"%s: file disappeared, ignoring." % (file)) |
|
823 |
- del(src_list[file]) |
|
824 |
- del(dst_list[file]) |
|
825 |
- continue |
|
826 |
- |
|
827 |
- if src_md5 != dst_md5: |
|
828 |
- ## Checksums are different. |
|
829 |
- attribs_match = False |
|
830 |
- debug(u"XFER: %s (md5 mismatch: src=%s dst=%s)" % (file, src_md5, dst_md5)) |
|
831 |
- |
|
832 |
- if attribs_match: |
|
833 |
- ## Remove from source-list, all that is left there will be transferred |
|
834 |
- debug(u"IGNR: %s (transfer not needed)" % file) |
|
835 |
- exists_list[file] = src_list[file] |
|
836 |
- del(src_list[file]) |
|
837 |
- |
|
838 |
- ## Remove from destination-list, all that is left there will be deleted |
|
839 |
- del(dst_list[file]) |
|
840 |
- |
|
841 |
- return src_list, dst_list, exists_list |
|
842 |
- |
|
843 | 654 |
def cmd_sync_remote2remote(args): |
844 | 655 |
s3 = S3(Config()) |
845 | 656 |
|
... | ... |
@@ -854,9 +536,9 @@ def cmd_sync_remote2remote(args): |
854 | 854 |
|
855 | 855 |
info(u"Found %d source files, %d destination files" % (src_count, dst_count)) |
856 | 856 |
|
857 |
- src_list, exclude_list = _filelist_filter_exclude_include(src_list) |
|
857 |
+ src_list, exclude_list = filter_exclude_include(src_list) |
|
858 | 858 |
|
859 |
- src_list, dst_list, existing_list = _compare_filelists(src_list, dst_list, src_remote = True, dst_remote = True) |
|
859 |
+ src_list, dst_list, existing_list = compare_filelists(src_list, dst_list, src_remote = True, dst_remote = True) |
|
860 | 860 |
|
861 | 861 |
src_count = len(src_list) |
862 | 862 |
dst_count = len(dst_list) |
... | ... |
@@ -933,9 +615,9 @@ def cmd_sync_remote2local(args): |
933 | 933 |
|
934 | 934 |
info(u"Found %d remote files, %d local files" % (remote_count, local_count)) |
935 | 935 |
|
936 |
- remote_list, exclude_list = _filelist_filter_exclude_include(remote_list) |
|
936 |
+ remote_list, exclude_list = filter_exclude_include(remote_list) |
|
937 | 937 |
|
938 |
- remote_list, local_list, existing_list = _compare_filelists(remote_list, local_list, src_remote = True, dst_remote = False) |
|
938 |
+ remote_list, local_list, existing_list = compare_filelists(remote_list, local_list, src_remote = True, dst_remote = False) |
|
939 | 939 |
|
940 | 940 |
local_count = len(local_list) |
941 | 941 |
remote_count = len(remote_list) |
... | ... |
@@ -1116,7 +798,7 @@ def cmd_sync_local2remote(args): |
1116 | 1116 |
|
1117 | 1117 |
info(u"Found %d local files, %d remote files" % (local_count, remote_count)) |
1118 | 1118 |
|
1119 |
- local_list, exclude_list = _filelist_filter_exclude_include(local_list) |
|
1119 |
+ local_list, exclude_list = filter_exclude_include(local_list) |
|
1120 | 1120 |
|
1121 | 1121 |
if single_file_local and len(local_list) == 1 and len(remote_list) == 1: |
1122 | 1122 |
## Make remote_key same as local_key for comparison if we're dealing with only one file |
... | ... |
@@ -1124,7 +806,7 @@ def cmd_sync_local2remote(args): |
1124 | 1124 |
# Flush remote_list, by the way |
1125 | 1125 |
remote_list = { local_list.keys()[0] : remote_list_entry } |
1126 | 1126 |
|
1127 |
- local_list, remote_list, existing_list = _compare_filelists(local_list, remote_list, src_remote = False, dst_remote = True) |
|
1127 |
+ local_list, remote_list, existing_list = compare_filelists(local_list, remote_list, src_remote = False, dst_remote = True) |
|
1128 | 1128 |
|
1129 | 1129 |
local_count = len(local_list) |
1130 | 1130 |
remote_count = len(remote_list) |
... | ... |
@@ -1275,7 +957,7 @@ def cmd_setacl(args): |
1275 | 1275 |
args.append(arg) |
1276 | 1276 |
|
1277 | 1277 |
remote_list = fetch_remote_list(args) |
1278 |
- remote_list, exclude_list = _filelist_filter_exclude_include(remote_list) |
|
1278 |
+ remote_list, exclude_list = filter_exclude_include(remote_list) |
|
1279 | 1279 |
|
1280 | 1280 |
remote_count = len(remote_list) |
1281 | 1281 |
|
... | ... |
@@ -2002,6 +1684,7 @@ if __name__ == '__main__': |
2002 | 2002 |
from S3.Utils import * |
2003 | 2003 |
from S3.Progress import Progress |
2004 | 2004 |
from S3.CloudFront import Cmd as CfCmd |
2005 |
+ from S3.FileLists import * |
|
2005 | 2006 |
|
2006 | 2007 |
main() |
2007 | 2008 |
sys.exit(0) |