Browse code

* s3cmd, S3/FileLists.py: Move file/object listing functions to S3/FileLists.py

git-svn-id: https://s3tools.svn.sourceforge.net/svnroot/s3tools/s3cmd/trunk@467 830e0280-6d2a-0410-9c65-932aecc39d9d

Michal Ludvig authored on 2011/01/13 19:09:12
Showing 3 changed files
... ...
@@ -1,3 +1,8 @@
1
+2011-01-13  Michal Ludvig  <mludvig@logix.net.nz>
2
+
3
+	* s3cmd, S3/FileLists.py: Move file/object listing functions
4
+	  to S3/FileLists.py
5
+
1 6
 2011-01-09  Michal Ludvig  <mludvig@logix.net.nz>
2 7
 
3 8
 	* Released version 1.0.0
4 9
new file mode 100644
... ...
@@ -0,0 +1,339 @@
0
+## Create and compare lists of files/objects
1
+## Author: Michal Ludvig <michal@logix.cz>
2
+##         http://www.logix.cz/michal
3
+## License: GPL Version 2
4
+
5
+from S3 import S3
6
+from Config import Config
7
+from S3Uri import S3Uri
8
+from SortedDict import SortedDict
9
+from Utils import *
10
+
11
+from logging import debug, info, warning, error
12
+
13
+import os
14
+import glob
15
+
16
+__all__ = ["fetch_local_list", "fetch_remote_list", "compare_filelists", "filter_exclude_include"]
17
+
18
+def _fswalk_follow_symlinks(path):
19
+        '''
20
+        Walk filesystem, following symbolic links (but without recursion), on python2.4 and later
21
+
22
+        If a recursive directory link is detected, emit a warning and skip.
23
+        '''
24
+        assert os.path.isdir(path) # only designed for directory argument
25
+        walkdirs = set([path])
26
+        targets = set()
27
+        for dirpath, dirnames, filenames in os.walk(path):
28
+                for dirname in dirnames:
29
+                        current = os.path.join(dirpath, dirname)
30
+                        target = os.path.realpath(current)
31
+                        if os.path.islink(current):
32
+                                if target in targets:
33
+                                        warning("Skipping recursively symlinked directory %s" % dirname)
34
+                                else:
35
+                                        walkdirs.add(current)
36
+                        targets.add(target)
37
+        for walkdir in walkdirs:
38
+                for value in os.walk(walkdir):
39
+                        yield value
40
+
41
+def _fswalk(path, follow_symlinks):
42
+        '''
43
+        Directory tree generator
44
+
45
+        path (str) is the root of the directory tree to walk
46
+
47
+        follow_symlinks (bool) indicates whether to descend into symbolically linked directories
48
+        '''
49
+        if follow_symlinks:
50
+                return _fswalk_follow_symlinks(path)
51
+        return os.walk(path)
52
+
53
+def filter_exclude_include(src_list):
54
+	info(u"Applying --exclude/--include")
55
+	cfg = Config()
56
+	exclude_list = SortedDict(ignore_case = False)
57
+	for file in src_list.keys():
58
+		debug(u"CHECK: %s" % file)
59
+		excluded = False
60
+		for r in cfg.exclude:
61
+			if r.search(file):
62
+				excluded = True
63
+				debug(u"EXCL-MATCH: '%s'" % (cfg.debug_exclude[r]))
64
+				break
65
+		if excluded:
66
+			## No need to check for --include if not excluded
67
+			for r in cfg.include:
68
+				if r.search(file):
69
+					excluded = False
70
+					debug(u"INCL-MATCH: '%s'" % (cfg.debug_include[r]))
71
+					break
72
+		if excluded:
73
+			## Still excluded - ok, action it
74
+			debug(u"EXCLUDE: %s" % file)
75
+			exclude_list[file] = src_list[file]
76
+			del(src_list[file])
77
+			continue
78
+		else:
79
+			debug(u"PASS: %s" % (file))
80
+	return src_list, exclude_list
81
+
82
+def fetch_local_list(args, recursive = None):
83
+	def _get_filelist_local(local_uri):
84
+		info(u"Compiling list of local files...")
85
+		if local_uri.isdir():
86
+			local_base = deunicodise(local_uri.basename())
87
+			local_path = deunicodise(local_uri.path())
88
+			filelist = _fswalk(local_path, cfg.follow_symlinks)
89
+			single_file = False
90
+		else:
91
+			local_base = ""
92
+			local_path = deunicodise(local_uri.dirname())
93
+			filelist = [( local_path, [], [deunicodise(local_uri.basename())] )]
94
+			single_file = True
95
+		loc_list = SortedDict(ignore_case = False)
96
+		for root, dirs, files in filelist:
97
+			rel_root = root.replace(local_path, local_base, 1)
98
+			for f in files:
99
+				full_name = os.path.join(root, f)
100
+				if not os.path.isfile(full_name):
101
+					continue
102
+				if os.path.islink(full_name):
103
+                                    if not cfg.follow_symlinks:
104
+                                            continue
105
+				relative_file = unicodise(os.path.join(rel_root, f))
106
+				if os.path.sep != "/":
107
+					# Convert non-unix dir separators to '/'
108
+					relative_file = "/".join(relative_file.split(os.path.sep))
109
+				if cfg.urlencoding_mode == "normal":
110
+					relative_file = replace_nonprintables(relative_file)
111
+				if relative_file.startswith('./'):
112
+					relative_file = relative_file[2:]
113
+				sr = os.stat_result(os.lstat(full_name))
114
+				loc_list[relative_file] = {
115
+					'full_name_unicode' : unicodise(full_name),
116
+					'full_name' : full_name,
117
+					'size' : sr.st_size, 
118
+					'mtime' : sr.st_mtime,
119
+					## TODO: Possibly more to save here...
120
+				}
121
+		return loc_list, single_file
122
+
123
+	cfg = Config()
124
+	local_uris = []
125
+	local_list = SortedDict(ignore_case = False)
126
+	single_file = False
127
+
128
+	if type(args) not in (list, tuple):
129
+		args = [args]
130
+
131
+	if recursive == None:
132
+		recursive = cfg.recursive
133
+
134
+	for arg in args:
135
+		uri = S3Uri(arg)
136
+		if not uri.type == 'file':
137
+			raise ParameterError("Expecting filename or directory instead of: %s" % arg)
138
+		if uri.isdir() and not recursive:
139
+			raise ParameterError("Use --recursive to upload a directory: %s" % arg)
140
+		local_uris.append(uri)
141
+
142
+	for uri in local_uris:
143
+		list_for_uri, single_file = _get_filelist_local(uri)
144
+		local_list.update(list_for_uri)
145
+
146
+	## Single file is True if and only if the user 
147
+	## specified one local URI and that URI represents
148
+	## a FILE. Ie it is False if the URI was of a DIR
149
+	## and that dir contained only one FILE. That's not
150
+	## a case of single_file==True.
151
+	if len(local_list) > 1:
152
+		single_file = False
153
+
154
+	return local_list, single_file
155
+
156
+def fetch_remote_list(args, require_attribs = False, recursive = None):
157
+	def _get_filelist_remote(remote_uri, recursive = True):
158
+		## If remote_uri ends with '/' then all remote files will have 
159
+		## the remote_uri prefix removed in the relative path.
160
+		## If, on the other hand, the remote_uri ends with something else
161
+		## (probably alphanumeric symbol) we'll use the last path part 
162
+		## in the relative path.
163
+		##
164
+		## Complicated, eh? See an example:
165
+		## _get_filelist_remote("s3://bckt/abc/def") may yield:
166
+		## { 'def/file1.jpg' : {}, 'def/xyz/blah.txt' : {} }
167
+		## _get_filelist_remote("s3://bckt/abc/def/") will yield:
168
+		## { 'file1.jpg' : {}, 'xyz/blah.txt' : {} }
169
+		## Furthermore a prefix-magic can restrict the return list:
170
+		## _get_filelist_remote("s3://bckt/abc/def/x") yields:
171
+		## { 'xyz/blah.txt' : {} }
172
+
173
+		info(u"Retrieving list of remote files for %s ..." % remote_uri)
174
+
175
+		s3 = S3(Config())
176
+		response = s3.bucket_list(remote_uri.bucket(), prefix = remote_uri.object(), recursive = recursive)
177
+
178
+		rem_base_original = rem_base = remote_uri.object()
179
+		remote_uri_original = remote_uri
180
+		if rem_base != '' and rem_base[-1] != '/':
181
+			rem_base = rem_base[:rem_base.rfind('/')+1]
182
+			remote_uri = S3Uri("s3://%s/%s" % (remote_uri.bucket(), rem_base))
183
+		rem_base_len = len(rem_base)
184
+		rem_list = SortedDict(ignore_case = False)
185
+		break_now = False
186
+		for object in response['list']:
187
+			if object['Key'] == rem_base_original and object['Key'][-1] != os.path.sep:
188
+				## We asked for one file and we got that file :-)
189
+				key = os.path.basename(object['Key'])
190
+				object_uri_str = remote_uri_original.uri()
191
+				break_now = True
192
+				rem_list = {}	## Remove whatever has already been put to rem_list
193
+			else:
194
+				key = object['Key'][rem_base_len:]		## Beware - this may be '' if object['Key']==rem_base !!
195
+				object_uri_str = remote_uri.uri() + key
196
+			rem_list[key] = { 
197
+				'size' : int(object['Size']),
198
+				'timestamp' : dateS3toUnix(object['LastModified']), ## Sadly it's upload time, not our lastmod time :-(
199
+				'md5' : object['ETag'][1:-1],
200
+				'object_key' : object['Key'],
201
+				'object_uri_str' : object_uri_str,
202
+				'base_uri' : remote_uri,
203
+			}
204
+			if break_now:
205
+				break
206
+		return rem_list
207
+
208
+	cfg = Config()
209
+	remote_uris = []
210
+	remote_list = SortedDict(ignore_case = False)
211
+
212
+	if type(args) not in (list, tuple):
213
+		args = [args]
214
+
215
+	if recursive == None:
216
+		recursive = cfg.recursive
217
+
218
+	for arg in args:
219
+		uri = S3Uri(arg)
220
+		if not uri.type == 's3':
221
+			raise ParameterError("Expecting S3 URI instead of '%s'" % arg)
222
+		remote_uris.append(uri)
223
+
224
+	if recursive:
225
+		for uri in remote_uris:
226
+			objectlist = _get_filelist_remote(uri)
227
+			for key in objectlist:
228
+				remote_list[key] = objectlist[key]
229
+	else:
230
+		for uri in remote_uris:
231
+			uri_str = str(uri)
232
+			## Wildcards used in remote URI?
233
+			## If yes we'll need a bucket listing...
234
+			if uri_str.find('*') > -1 or uri_str.find('?') > -1:
235
+				first_wildcard = uri_str.find('*')
236
+				first_questionmark = uri_str.find('?')
237
+				if first_questionmark > -1 and first_questionmark < first_wildcard:
238
+					first_wildcard = first_questionmark
239
+				prefix = uri_str[:first_wildcard]
240
+				rest = uri_str[first_wildcard+1:]
241
+				## Only request recursive listing if the 'rest' of the URI,
242
+				## i.e. the part after first wildcard, contains '/'
243
+				need_recursion = rest.find('/') > -1
244
+				objectlist = _get_filelist_remote(S3Uri(prefix), recursive = need_recursion)
245
+				for key in objectlist:
246
+					## Check whether the 'key' matches the requested wildcards
247
+					if glob.fnmatch.fnmatch(objectlist[key]['object_uri_str'], uri_str):
248
+						remote_list[key] = objectlist[key]
249
+			else:
250
+				## No wildcards - simply append the given URI to the list
251
+				key = os.path.basename(uri.object())
252
+				if not key:
253
+					raise ParameterError(u"Expecting S3 URI with a filename or --recursive: %s" % uri.uri())
254
+				remote_item = {
255
+					'base_uri': uri,
256
+					'object_uri_str': unicode(uri),
257
+					'object_key': uri.object()
258
+				}
259
+				if require_attribs:
260
+					response = S3(cfg).object_info(uri)
261
+					remote_item.update({
262
+					'size': int(response['headers']['content-length']),
263
+					'md5': response['headers']['etag'].strip('"\''),
264
+					'timestamp' : dateRFC822toUnix(response['headers']['date'])
265
+					})
266
+				remote_list[key] = remote_item
267
+	return remote_list
268
+
269
+def compare_filelists(src_list, dst_list, src_remote, dst_remote):
270
+	def __direction_str(is_remote):
271
+		return is_remote and "remote" or "local"
272
+
273
+	# We don't support local->local sync, use 'rsync' or something like that instead ;-)
274
+	assert(not(src_remote == False and dst_remote == False))
275
+
276
+	info(u"Verifying attributes...")
277
+	cfg = Config()
278
+	exists_list = SortedDict(ignore_case = False)
279
+
280
+	debug("Comparing filelists (direction: %s -> %s)" % (__direction_str(src_remote), __direction_str(dst_remote)))
281
+	debug("src_list.keys: %s" % src_list.keys())
282
+	debug("dst_list.keys: %s" % dst_list.keys())
283
+
284
+	for file in src_list.keys():
285
+		debug(u"CHECK: %s" % file)
286
+		if dst_list.has_key(file):
287
+			## Was --skip-existing requested?
288
+			if cfg.skip_existing:
289
+				debug(u"IGNR: %s (used --skip-existing)" % (file))
290
+				exists_list[file] = src_list[file]
291
+				del(src_list[file])
292
+				## Remove from destination-list, all that is left there will be deleted
293
+				del(dst_list[file])
294
+				continue
295
+
296
+			attribs_match = True
297
+			## Check size first
298
+			if 'size' in cfg.sync_checks and dst_list[file]['size'] != src_list[file]['size']:
299
+				debug(u"XFER: %s (size mismatch: src=%s dst=%s)" % (file, src_list[file]['size'], dst_list[file]['size']))
300
+				attribs_match = False
301
+			
302
+			if attribs_match and 'md5' in cfg.sync_checks:
303
+				## ... same size, check MD5
304
+				try:
305
+					if src_remote == False and dst_remote == True:
306
+						src_md5 = hash_file_md5(src_list[file]['full_name'])
307
+						dst_md5 = dst_list[file]['md5']
308
+					elif src_remote == True and dst_remote == False:
309
+						src_md5 = src_list[file]['md5']
310
+						dst_md5 = hash_file_md5(dst_list[file]['full_name'])
311
+					elif src_remote == True and dst_remote == True:
312
+						src_md5 = src_list[file]['md5']
313
+						dst_md5 = dst_list[file]['md5']
314
+				except (IOError,OSError), e:
315
+					# MD5 sum verification failed - ignore that file altogether
316
+					debug(u"IGNR: %s (disappeared)" % (file))
317
+					warning(u"%s: file disappeared, ignoring." % (file))
318
+					del(src_list[file])
319
+					del(dst_list[file])
320
+					continue
321
+
322
+				if src_md5 != dst_md5:
323
+					## Checksums are different.
324
+					attribs_match = False
325
+					debug(u"XFER: %s (md5 mismatch: src=%s dst=%s)" % (file, src_md5, dst_md5))
326
+
327
+			if attribs_match:
328
+				## Remove from source-list, all that is left there will be transferred
329
+				debug(u"IGNR: %s (transfer not needed)" % file)
330
+				exists_list[file] = src_list[file]
331
+				del(src_list[file])
332
+
333
+			## Remove from destination-list, all that is left there will be deleted
334
+			del(dst_list[file])
335
+
336
+	return src_list, dst_list, exists_list
337
+
338
+
... ...
@@ -37,41 +37,6 @@ def check_args_type(args, type, verbose_type):
37 37
 		if S3Uri(arg).type != type:
38 38
 			raise ParameterError("Expecting %s instead of '%s'" % (verbose_type, arg))
39 39
 
40
-def _fswalk_follow_symlinks(path):
41
-        '''
42
-        Walk filesystem, following symbolic links (but without recursion), on python2.4 and later
43
-
44
-        If a recursive directory link is detected, emit a warning and skip.
45
-        '''
46
-        assert os.path.isdir(path) # only designed for directory argument
47
-        walkdirs = set([path])
48
-        targets = set()
49
-        for dirpath, dirnames, filenames in os.walk(path):
50
-                for dirname in dirnames:
51
-                        current = os.path.join(dirpath, dirname)
52
-                        target = os.path.realpath(current)
53
-                        if os.path.islink(current):
54
-                                if target in targets:
55
-                                        warning("Skipping recursively symlinked directory %s" % dirname)
56
-                                else:
57
-                                        walkdirs.add(current)
58
-                        targets.add(target)
59
-        for walkdir in walkdirs:
60
-                for value in os.walk(walkdir):
61
-                        yield value
62
-
63
-def fswalk(path, follow_symlinks):
64
-        '''
65
-        Directory tree generator
66
-
67
-        path (str) is the root of the directory tree to walk
68
-
69
-        follow_symlinks (bool) indicates whether to descend into symbolically linked directories
70
-        '''
71
-        if follow_symlinks:
72
-                return _fswalk_follow_symlinks(path)
73
-        return os.walk(path)
74
-
75 40
 def cmd_du(args):
76 41
 	s3 = S3(Config())
77 42
 	if len(args) > 0:
... ...
@@ -222,100 +187,6 @@ def cmd_bucket_delete(args):
222 222
 		_bucket_delete_one(uri)
223 223
 		output(u"Bucket '%s' removed" % uri.uri())
224 224
 
225
-def fetch_local_list(args, recursive = None):
226
-	local_uris = []
227
-	local_list = SortedDict(ignore_case = False)
228
-	single_file = False
229
-
230
-	if type(args) not in (list, tuple):
231
-		args = [args]
232
-
233
-	if recursive == None:
234
-		recursive = cfg.recursive
235
-
236
-	for arg in args:
237
-		uri = S3Uri(arg)
238
-		if not uri.type == 'file':
239
-			raise ParameterError("Expecting filename or directory instead of: %s" % arg)
240
-		if uri.isdir() and not recursive:
241
-			raise ParameterError("Use --recursive to upload a directory: %s" % arg)
242
-		local_uris.append(uri)
243
-
244
-	for uri in local_uris:
245
-		list_for_uri, single_file = _get_filelist_local(uri)
246
-		local_list.update(list_for_uri)
247
-
248
-	## Single file is True if and only if the user 
249
-	## specified one local URI and that URI represents
250
-	## a FILE. Ie it is False if the URI was of a DIR
251
-	## and that dir contained only one FILE. That's not
252
-	## a case of single_file==True.
253
-	if len(local_list) > 1:
254
-		single_file = False
255
-
256
-	return local_list, single_file
257
-
258
-def fetch_remote_list(args, require_attribs = False, recursive = None):
259
-	remote_uris = []
260
-	remote_list = SortedDict(ignore_case = False)
261
-
262
-	if type(args) not in (list, tuple):
263
-		args = [args]
264
-
265
-	if recursive == None:
266
-		recursive = cfg.recursive
267
-
268
-	for arg in args:
269
-		uri = S3Uri(arg)
270
-		if not uri.type == 's3':
271
-			raise ParameterError("Expecting S3 URI instead of '%s'" % arg)
272
-		remote_uris.append(uri)
273
-
274
-	if recursive:
275
-		for uri in remote_uris:
276
-			objectlist = _get_filelist_remote(uri)
277
-			for key in objectlist:
278
-				remote_list[key] = objectlist[key]
279
-	else:
280
-		for uri in remote_uris:
281
-			uri_str = str(uri)
282
-			## Wildcards used in remote URI?
283
-			## If yes we'll need a bucket listing...
284
-			if uri_str.find('*') > -1 or uri_str.find('?') > -1:
285
-				first_wildcard = uri_str.find('*')
286
-				first_questionmark = uri_str.find('?')
287
-				if first_questionmark > -1 and first_questionmark < first_wildcard:
288
-					first_wildcard = first_questionmark
289
-				prefix = uri_str[:first_wildcard]
290
-				rest = uri_str[first_wildcard+1:]
291
-				## Only request recursive listing if the 'rest' of the URI,
292
-				## i.e. the part after first wildcard, contains '/'
293
-				need_recursion = rest.find('/') > -1
294
-				objectlist = _get_filelist_remote(S3Uri(prefix), recursive = need_recursion)
295
-				for key in objectlist:
296
-					## Check whether the 'key' matches the requested wildcards
297
-					if glob.fnmatch.fnmatch(objectlist[key]['object_uri_str'], uri_str):
298
-						remote_list[key] = objectlist[key]
299
-			else:
300
-				## No wildcards - simply append the given URI to the list
301
-				key = os.path.basename(uri.object())
302
-				if not key:
303
-					raise ParameterError(u"Expecting S3 URI with a filename or --recursive: %s" % uri.uri())
304
-				remote_item = {
305
-					'base_uri': uri,
306
-					'object_uri_str': unicode(uri),
307
-					'object_key': uri.object()
308
-				}
309
-				if require_attribs:
310
-					response = S3(cfg).object_info(uri)
311
-					remote_item.update({
312
-					'size': int(response['headers']['content-length']),
313
-					'md5': response['headers']['etag'].strip('"\''),
314
-					'timestamp' : Utils.dateRFC822toUnix(response['headers']['date'])
315
-					})
316
-				remote_list[key] = remote_item
317
-	return remote_list
318
-
319 225
 def cmd_object_put(args):
320 226
 	cfg = Config()
321 227
 	s3 = S3(cfg)
... ...
@@ -334,7 +205,7 @@ def cmd_object_put(args):
334 334
 
335 335
 	local_list, single_file_local = fetch_local_list(args)
336 336
 
337
-	local_list, exclude_list = _filelist_filter_exclude_include(local_list)
337
+	local_list, exclude_list = filter_exclude_include(local_list)
338 338
 
339 339
 	local_count = len(local_list)
340 340
 
... ...
@@ -437,7 +308,7 @@ def cmd_object_get(args):
437 437
 		raise ParameterError("Nothing to download. Expecting S3 URI.")
438 438
 
439 439
 	remote_list = fetch_remote_list(args, require_attribs = False)
440
-	remote_list, exclude_list = _filelist_filter_exclude_include(remote_list)
440
+	remote_list, exclude_list = filter_exclude_include(remote_list)
441 441
 
442 442
 	remote_count = len(remote_list)
443 443
 
... ...
@@ -538,7 +409,7 @@ def subcmd_object_del_uri(uri_str, recursive = None):
538 538
 		recursive = cfg.recursive
539 539
 
540 540
 	remote_list = fetch_remote_list(uri_str, require_attribs = False, recursive = recursive)
541
-	remote_list, exclude_list = _filelist_filter_exclude_include(remote_list)
541
+	remote_list, exclude_list = filter_exclude_include(remote_list)
542 542
 
543 543
 	remote_count = len(remote_list)
544 544
 
... ...
@@ -567,7 +438,7 @@ def subcmd_cp_mv(args, process_fce, action_str, message):
567 567
 	destination_base = dst_base_uri.uri()
568 568
 
569 569
 	remote_list = fetch_remote_list(args, require_attribs = False)
570
-	remote_list, exclude_list = _filelist_filter_exclude_include(remote_list)
570
+	remote_list, exclude_list = filter_exclude_include(remote_list)
571 571
 
572 572
 	remote_count = len(remote_list)
573 573
 
... ...
@@ -651,195 +522,6 @@ def cmd_info(args):
651 651
 			else:
652 652
 				raise
653 653
 
654
-def _get_filelist_local(local_uri):
655
-	info(u"Compiling list of local files...")
656
-	if local_uri.isdir():
657
-		local_base = deunicodise(local_uri.basename())
658
-		local_path = deunicodise(local_uri.path())
659
-		filelist = fswalk(local_path, cfg.follow_symlinks)
660
-		single_file = False
661
-	else:
662
-		local_base = ""
663
-		local_path = deunicodise(local_uri.dirname())
664
-		filelist = [( local_path, [], [deunicodise(local_uri.basename())] )]
665
-		single_file = True
666
-	loc_list = SortedDict(ignore_case = False)
667
-	for root, dirs, files in filelist:
668
-		rel_root = root.replace(local_path, local_base, 1)
669
-		for f in files:
670
-			full_name = os.path.join(root, f)
671
-			if not os.path.isfile(full_name):
672
-				continue
673
-			if os.path.islink(full_name):
674
-                                if not cfg.follow_symlinks:
675
-                                        continue
676
-			relative_file = unicodise(os.path.join(rel_root, f))
677
-			if os.path.sep != "/":
678
-				# Convert non-unix dir separators to '/'
679
-				relative_file = "/".join(relative_file.split(os.path.sep))
680
-			if cfg.urlencoding_mode == "normal":
681
-				relative_file = replace_nonprintables(relative_file)
682
-			if relative_file.startswith('./'):
683
-				relative_file = relative_file[2:]
684
-			sr = os.stat_result(os.lstat(full_name))
685
-			loc_list[relative_file] = {
686
-				'full_name_unicode' : unicodise(full_name),
687
-				'full_name' : full_name,
688
-				'size' : sr.st_size, 
689
-				'mtime' : sr.st_mtime,
690
-				## TODO: Possibly more to save here...
691
-			}
692
-	return loc_list, single_file
693
-
694
-def _get_filelist_remote(remote_uri, recursive = True):
695
-	## If remote_uri ends with '/' then all remote files will have 
696
-	## the remote_uri prefix removed in the relative path.
697
-	## If, on the other hand, the remote_uri ends with something else
698
-	## (probably alphanumeric symbol) we'll use the last path part 
699
-	## in the relative path.
700
-	##
701
-	## Complicated, eh? See an example:
702
-	## _get_filelist_remote("s3://bckt/abc/def") may yield:
703
-	## { 'def/file1.jpg' : {}, 'def/xyz/blah.txt' : {} }
704
-	## _get_filelist_remote("s3://bckt/abc/def/") will yield:
705
-	## { 'file1.jpg' : {}, 'xyz/blah.txt' : {} }
706
-	## Furthermore a prefix-magic can restrict the return list:
707
-	## _get_filelist_remote("s3://bckt/abc/def/x") yields:
708
-	## { 'xyz/blah.txt' : {} }
709
-
710
-	info(u"Retrieving list of remote files for %s ..." % remote_uri)
711
-
712
-	s3 = S3(Config())
713
-	response = s3.bucket_list(remote_uri.bucket(), prefix = remote_uri.object(), recursive = recursive)
714
-
715
-	rem_base_original = rem_base = remote_uri.object()
716
-	remote_uri_original = remote_uri
717
-	if rem_base != '' and rem_base[-1] != '/':
718
-		rem_base = rem_base[:rem_base.rfind('/')+1]
719
-		remote_uri = S3Uri("s3://%s/%s" % (remote_uri.bucket(), rem_base))
720
-	rem_base_len = len(rem_base)
721
-	rem_list = SortedDict(ignore_case = False)
722
-	break_now = False
723
-	for object in response['list']:
724
-		if object['Key'] == rem_base_original and object['Key'][-1] != os.path.sep:
725
-			## We asked for one file and we got that file :-)
726
-			key = os.path.basename(object['Key'])
727
-			object_uri_str = remote_uri_original.uri()
728
-			break_now = True
729
-			rem_list = {}	## Remove whatever has already been put to rem_list
730
-		else:
731
-			key = object['Key'][rem_base_len:]		## Beware - this may be '' if object['Key']==rem_base !!
732
-			object_uri_str = remote_uri.uri() + key
733
-		rem_list[key] = { 
734
-			'size' : int(object['Size']),
735
-			'timestamp' : dateS3toUnix(object['LastModified']), ## Sadly it's upload time, not our lastmod time :-(
736
-			'md5' : object['ETag'][1:-1],
737
-			'object_key' : object['Key'],
738
-			'object_uri_str' : object_uri_str,
739
-			'base_uri' : remote_uri,
740
-		}
741
-		if break_now:
742
-			break
743
-	return rem_list
744
-
745
-def _filelist_filter_exclude_include(src_list):
746
-	info(u"Applying --exclude/--include")
747
-	cfg = Config()
748
-	exclude_list = SortedDict(ignore_case = False)
749
-	for file in src_list.keys():
750
-		debug(u"CHECK: %s" % file)
751
-		excluded = False
752
-		for r in cfg.exclude:
753
-			if r.search(file):
754
-				excluded = True
755
-				debug(u"EXCL-MATCH: '%s'" % (cfg.debug_exclude[r]))
756
-				break
757
-		if excluded:
758
-			## No need to check for --include if not excluded
759
-			for r in cfg.include:
760
-				if r.search(file):
761
-					excluded = False
762
-					debug(u"INCL-MATCH: '%s'" % (cfg.debug_include[r]))
763
-					break
764
-		if excluded:
765
-			## Still excluded - ok, action it
766
-			debug(u"EXCLUDE: %s" % file)
767
-			exclude_list[file] = src_list[file]
768
-			del(src_list[file])
769
-			continue
770
-		else:
771
-			debug(u"PASS: %s" % (file))
772
-	return src_list, exclude_list
773
-
774
-def _compare_filelists(src_list, dst_list, src_remote, dst_remote):
775
-	def __direction_str(is_remote):
776
-		return is_remote and "remote" or "local"
777
-
778
-	# We don't support local->local sync, use 'rsync' or something like that instead ;-)
779
-	assert(not(src_remote == False and dst_remote == False))
780
-
781
-	info(u"Verifying attributes...")
782
-	cfg = Config()
783
-	exists_list = SortedDict(ignore_case = False)
784
-
785
-	debug("Comparing filelists (direction: %s -> %s)" % (__direction_str(src_remote), __direction_str(dst_remote)))
786
-	debug("src_list.keys: %s" % src_list.keys())
787
-	debug("dst_list.keys: %s" % dst_list.keys())
788
-
789
-	for file in src_list.keys():
790
-		debug(u"CHECK: %s" % file)
791
-		if dst_list.has_key(file):
792
-			## Was --skip-existing requested?
793
-			if cfg.skip_existing:
794
-				debug(u"IGNR: %s (used --skip-existing)" % (file))
795
-				exists_list[file] = src_list[file]
796
-				del(src_list[file])
797
-				## Remove from destination-list, all that is left there will be deleted
798
-				del(dst_list[file])
799
-				continue
800
-
801
-			attribs_match = True
802
-			## Check size first
803
-			if 'size' in cfg.sync_checks and dst_list[file]['size'] != src_list[file]['size']:
804
-				debug(u"XFER: %s (size mismatch: src=%s dst=%s)" % (file, src_list[file]['size'], dst_list[file]['size']))
805
-				attribs_match = False
806
-			
807
-			if attribs_match and 'md5' in cfg.sync_checks:
808
-				## ... same size, check MD5
809
-				try:
810
-					if src_remote == False and dst_remote == True:
811
-						src_md5 = Utils.hash_file_md5(src_list[file]['full_name'])
812
-						dst_md5 = dst_list[file]['md5']
813
-					elif src_remote == True and dst_remote == False:
814
-						src_md5 = src_list[file]['md5']
815
-						dst_md5 = Utils.hash_file_md5(dst_list[file]['full_name'])
816
-					elif src_remote == True and dst_remote == True:
817
-						src_md5 = src_list[file]['md5']
818
-						dst_md5 = dst_list[file]['md5']
819
-				except (IOError,OSError), e:
820
-					# MD5 sum verification failed - ignore that file altogether
821
-					debug(u"IGNR: %s (disappeared)" % (file))
822
-					warning(u"%s: file disappeared, ignoring." % (file))
823
-					del(src_list[file])
824
-					del(dst_list[file])
825
-					continue
826
-
827
-				if src_md5 != dst_md5:
828
-					## Checksums are different.
829
-					attribs_match = False
830
-					debug(u"XFER: %s (md5 mismatch: src=%s dst=%s)" % (file, src_md5, dst_md5))
831
-
832
-			if attribs_match:
833
-				## Remove from source-list, all that is left there will be transferred
834
-				debug(u"IGNR: %s (transfer not needed)" % file)
835
-				exists_list[file] = src_list[file]
836
-				del(src_list[file])
837
-
838
-			## Remove from destination-list, all that is left there will be deleted
839
-			del(dst_list[file])
840
-
841
-	return src_list, dst_list, exists_list
842
-
843 654
 def cmd_sync_remote2remote(args):
844 655
 	s3 = S3(Config())
845 656
 
... ...
@@ -854,9 +536,9 @@ def cmd_sync_remote2remote(args):
854 854
 
855 855
 	info(u"Found %d source files, %d destination files" % (src_count, dst_count))
856 856
 
857
-	src_list, exclude_list = _filelist_filter_exclude_include(src_list)
857
+	src_list, exclude_list = filter_exclude_include(src_list)
858 858
 
859
-	src_list, dst_list, existing_list = _compare_filelists(src_list, dst_list, src_remote = True, dst_remote = True)
859
+	src_list, dst_list, existing_list = compare_filelists(src_list, dst_list, src_remote = True, dst_remote = True)
860 860
 
861 861
 	src_count = len(src_list)
862 862
 	dst_count = len(dst_list)
... ...
@@ -933,9 +615,9 @@ def cmd_sync_remote2local(args):
933 933
 
934 934
 	info(u"Found %d remote files, %d local files" % (remote_count, local_count))
935 935
 
936
-	remote_list, exclude_list = _filelist_filter_exclude_include(remote_list)
936
+	remote_list, exclude_list = filter_exclude_include(remote_list)
937 937
 
938
-	remote_list, local_list, existing_list = _compare_filelists(remote_list, local_list, src_remote = True, dst_remote = False)
938
+	remote_list, local_list, existing_list = compare_filelists(remote_list, local_list, src_remote = True, dst_remote = False)
939 939
 
940 940
 	local_count = len(local_list)
941 941
 	remote_count = len(remote_list)
... ...
@@ -1116,7 +798,7 @@ def cmd_sync_local2remote(args):
1116 1116
 
1117 1117
 	info(u"Found %d local files, %d remote files" % (local_count, remote_count))
1118 1118
 
1119
-	local_list, exclude_list = _filelist_filter_exclude_include(local_list)
1119
+	local_list, exclude_list = filter_exclude_include(local_list)
1120 1120
 
1121 1121
 	if single_file_local and len(local_list) == 1 and len(remote_list) == 1:
1122 1122
 		## Make remote_key same as local_key for comparison if we're dealing with only one file
... ...
@@ -1124,7 +806,7 @@ def cmd_sync_local2remote(args):
1124 1124
 		# Flush remote_list, by the way
1125 1125
 		remote_list = { local_list.keys()[0] : remote_list_entry }
1126 1126
 
1127
-	local_list, remote_list, existing_list = _compare_filelists(local_list, remote_list, src_remote = False, dst_remote = True)
1127
+	local_list, remote_list, existing_list = compare_filelists(local_list, remote_list, src_remote = False, dst_remote = True)
1128 1128
 
1129 1129
 	local_count = len(local_list)
1130 1130
 	remote_count = len(remote_list)
... ...
@@ -1275,7 +957,7 @@ def cmd_setacl(args):
1275 1275
 				args.append(arg)
1276 1276
 
1277 1277
 	remote_list = fetch_remote_list(args)
1278
-	remote_list, exclude_list = _filelist_filter_exclude_include(remote_list)
1278
+	remote_list, exclude_list = filter_exclude_include(remote_list)
1279 1279
 
1280 1280
 	remote_count = len(remote_list)
1281 1281
 
... ...
@@ -2002,6 +1684,7 @@ if __name__ == '__main__':
2002 2002
 		from S3.Utils import *
2003 2003
 		from S3.Progress import Progress
2004 2004
 		from S3.CloudFront import Cmd as CfCmd
2005
+		from S3.FileLists import *
2005 2006
 
2006 2007
 		main()
2007 2008
 		sys.exit(0)