Browse code

* s3cmd, S3/Config.py, s3cmd.1: Added --exclude-from and --debug-syncmatch switches for sync.

git-svn-id: https://s3tools.svn.sourceforge.net/svnroot/s3tools/s3cmd/trunk@190 830e0280-6d2a-0410-9c65-932aecc39d9d

Michal Ludvig authored on 2008/06/11 10:43:40
Showing 4 changed files
... ...
@@ -2,6 +2,8 @@
2 2
 
3 3
 	* s3cmd: Remove python 2.5 specific code (try/except/finally 
4 4
 	  block) and make s3cmd compatible with python 2.4 again.
5
+	* s3cmd, S3/Config.py, s3cmd.1: Added --exclude-from and --debug-syncmatch
6
+	  switches for sync.
5 7
 
6 8
 2008-06-10  Michal Ludvig  <michal@logix.cz>
7 9
 
... ...
@@ -48,7 +48,11 @@ class Config(object):
48 48
 	bucket_location = "US"
49 49
 	default_mime_type = "binary/octet-stream"
50 50
 	guess_mime_type = False
51
+	debug_syncmatch = False
52
+	# List of compiled REGEXPs
51 53
 	exclude = []
54
+	# Dict mapping compiled REGEXPs back to their textual form
55
+	debug_exclude = {}
52 56
 
53 57
 	## Creating a singleton
54 58
 	def __new__(self, configfile = None):
... ...
@@ -359,29 +359,37 @@ def _get_filelist_remote(remote_uri):
359 359
 	
360 360
 def _compare_filelists(src_list, dst_list, src_is_local_and_dst_is_remote):
361 361
 	output("Verifying checksums...")
362
+	cfg = Config()
362 363
 	exists_list = {}
363 364
 	exclude_list = {}
365
+	if cfg.debug_syncmatch:
366
+		logging.root.setLevel(logging.DEBUG)
364 367
 	for file in src_list.keys():
365
-		debug("Checking %s ..." % file)
368
+		if not cfg.debug_syncmatch:
369
+			debug("CHECK: %s" % (os.sep + file))
366 370
 		excluded = False
367
-		for r in Config().exclude:
371
+		for r in cfg.exclude:
368 372
 			## all paths start with '/' from the base dir
369 373
 			if r.search(os.sep + file):
370 374
 				## Can't directly 'continue' to the outer loop
371 375
 				## therefore this awkward excluded switch :-(
372 376
 				excluded = True
377
+				if cfg.debug_syncmatch:
378
+					debug("EXCL: %s" % (os.sep + file))
379
+					debug("RULE: '%s'" % (cfg.debug_exclude[r]))
380
+				else:
381
+					info("%s: excluded" % file)
373 382
 				break
374 383
 		if excluded:
375
-			info("%s: excluded" % file)
376 384
 			exclude_list = src_list[file]
377 385
 			del(src_list[file])
378 386
 			continue
379
-
387
+		else:
388
+			debug("PASS: %s" % (os.sep + file))
380 389
 		if dst_list.has_key(file):
381
-			debug("%s exists in remote list" % file)
382 390
 			## Check size first
383 391
 			if dst_list[file]['size'] == src_list[file]['size']:
384
-				debug("%s same size: %s" % (file, dst_list[file]['size']))
392
+				#debug("%s same size: %s" % (file, dst_list[file]['size']))
385 393
 				## ... same size, check MD5
386 394
 				if src_is_local_and_dst_is_remote:
387 395
 					src_md5 = Utils.hash_file_md5(src_list[file]['full_name'])
... ...
@@ -390,20 +398,24 @@ def _compare_filelists(src_list, dst_list, src_is_local_and_dst_is_remote):
390 390
 					src_md5 = src_list[file]['md5']
391 391
 					dst_md5 = Utils.hash_file_md5(dst_list[file]['full_name'])
392 392
 				if src_md5 == dst_md5:
393
-					debug("%s md5 matches: %s" % (file, dst_md5))
393
+					#debug("%s md5 matches: %s" % (file, dst_md5))
394 394
 					## Checksums are the same.
395 395
 					## Remove from source-list, all that is left there will be transferred
396
-					debug("%s removed from source list - transfer not needed" % file)
396
+					debug("IGNR: %s (transfer not needed: MD5 OK, Size OK)" % file)
397 397
 					exists_list[file] = src_list[file]
398 398
 					del(src_list[file])
399 399
 				else:
400
-					debug("! %s md5 mismatch: src=%s dst=%s" % (file, src_md5, dst_md5))
400
+					debug("XFER: %s (md5 mismatch: src=%s dst=%s)" % (file, src_md5, dst_md5))
401 401
 			else:
402
-				debug("! %s size mismatch: src=%s dst=%s" % (file, src_list[file]['size'], dst_list[file]['size']))
402
+				debug("XFER: %s (size mismatch: src=%s dst=%s)" % (file, src_list[file]['size'], dst_list[file]['size']))
403 403
                         
404 404
 			## Remove from destination-list, all that is left there will be deleted
405
-			debug("%s removed from destination list" % file)
405
+			#debug("%s removed from destination list" % file)
406 406
 			del(dst_list[file])
407
+	if cfg.debug_syncmatch:
408
+		warning("Exiting because of --debug-syncmatch")
409
+		sys.exit(0)
410
+
407 411
 	return src_list, dst_list, exists_list, exclude_list
408 412
 
409 413
 def cmd_sync_remote2local(src, dst):
... ...
@@ -414,13 +426,6 @@ def cmd_sync_remote2local(src, dst):
414 414
 			attrs[key] = val
415 415
 		return attrs
416 416
 		
417
-	def _try_close_dst_stream(dst_stream):
418
-		## Close the file if still open. Don't care if not.
419
-		try:
420
-			dst_stream.close()
421
-		except:
422
-			pass
423
-
424 417
 	s3 = S3(Config())
425 418
 
426 419
 	src_uri = S3Uri(src)
... ...
@@ -491,7 +496,8 @@ def cmd_sync_remote2local(src, dst):
491 491
 						os.utime(dst_file, (atime, mtime))
492 492
 					## FIXME: uid/gid / uname/gname handling comes here! TODO
493 493
 			except OSError, e:
494
-				_try_close_dst_stream(dst_stream)
494
+				try: dst_stream.close()
495
+				except: pass
495 496
 				if e.errno == errno.EEXIST:
496 497
 					warning("%s exists - not overwriting" % (dst_file))
497 498
 					continue
... ...
@@ -499,14 +505,21 @@ def cmd_sync_remote2local(src, dst):
499 499
 					warning("%s not writable: %s" % (dst_file, e.strerror))
500 500
 					continue
501 501
 				raise
502
+			except KeyboardInterrupt:
503
+				try: dst_stream.close()
504
+				except: pass
505
+				warning("Exiting after keyboard interrupt")
506
+				return
502 507
 			except Exception, e:
503
-				_try_close_dst_stream(dst_stream)
508
+				try: dst_stream.close()
509
+				except: pass
504 510
 				error("%s: %s" % (file, e))
505 511
 				continue
506 512
 			# We have to keep repeating this call because 
507 513
 			# Python 2.4 doesn't support try/except/finally
508 514
 			# construction :-(
509
-			_try_close_dst_stream(dst_stream)
515
+			try: dst_stream.close()
516
+			except: pass
510 517
 		except S3DownloadError, e:
511 518
 			error("%s: download failed too many times. Skipping that file." % file)
512 519
 			continue
... ...
@@ -865,7 +878,8 @@ if __name__ == '__main__':
865 865
 	optparser.add_option("-p", "--preserve", dest="preserve_attrs", action="store_true", help="Preserve filesystem attributes (mode, ownership, timestamps). Default for [sync] command.")
866 866
 	optparser.add_option(      "--no-preserve", dest="preserve_attrs", action="store_false", help="Don't store FS attributes")
867 867
 	optparser.add_option(      "--exclude", dest="exclude", action="append", metavar="REGEXP", help="Filenames and paths matching REGEXP will be excluded from sync")
868
-	#optparser.add_option(      "--exclude-from", dest="exclude_from", action="append", metavar="FILE", help="Read --exclude REGEXPs from FILE")
868
+	optparser.add_option(      "--exclude-from", dest="exclude_from", action="append", metavar="FILE", help="Read --exclude REGEXPs from FILE")
869
+	optparser.add_option(      "--debug-syncmatch", dest="debug_syncmatch", action="store_true", help="Output detailed information about remote vs. local filelist matching and then exit")
869 870
 
870 871
 	optparser.add_option(      "--bucket-location", dest="bucket_location", help="Datacentre to create bucket in. Either EU or US (default)")
871 872
 
... ...
@@ -924,8 +938,27 @@ if __name__ == '__main__':
924 924
 			## Some Config() options are not settable from command line
925 925
 			pass
926 926
 
927
-	for ex in options.exclude:
928
-		cfg.exclude.append(re.compile(ex))
927
+	if options.exclude is None:
928
+		options.exclude = []
929
+
930
+	if options.exclude_from:
931
+		for exf in options.exclude_from:
932
+			debug("processing --exclude-from %s" % exf)
933
+			exfi = open(exf, "rt")
934
+			for ex in exfi:
935
+				ex = ex.strip()
936
+				if re.match("^#", ex) or re.match("^\s*$", ex):
937
+					continue
938
+				debug("adding rule: %s" % ex)
939
+				options.exclude.append(ex)
940
+
941
+	if options.exclude:
942
+		for ex in options.exclude:
943
+			debug("processing rule: %s" % ex)
944
+			exc = re.compile(ex)
945
+			cfg.exclude.append(exc)
946
+			if options.debug_syncmatch:
947
+				cfg.debug_exclude[exc] = ex
929 948
 
930 949
 	if cfg.encrypt and cfg.gpg_passphrase == "":
931 950
 		error("Encryption requested but no passphrase set in config file.")
... ...
@@ -92,8 +92,8 @@ Exclude files matching REGEXP from \fIsync\fI. See SYNC COMMAND section for more
92 92
 \fB\-\-exclude\-from FILE\fR
93 93
 Same as \-\-exclude but reads REGEXPs from the given FILE instead of expecting them on the command line.
94 94
 .TP
95
-\fB\-\-debug\-exclude\fR
96
-Display detailed information about matching file names against exclude\-rules.
95
+\fB\-\-debug\-syncmatch\fR
96
+Display detailed information about matching file names against exclude\-rules as well as information about remote vs local filelists matching. S3cmd exits after performing the match and no actual transfer takes place.
97 97
 .\".TP
98 98
 .\"\fB\-n\fR, \fB\-\-dry\-run\fR
99 99
 .\"Only show what would be uploaded or downloaded but don't actually do it. May still perform S3 requests to get bucket listings and other information though.
... ...
@@ -178,8 +178,8 @@ slash regardless whether you specified s3://test-bucket/backup or
178 178
 s3://test-bucket/backup/ (note the trailing slash) on the command line.
179 179
 
180 180
 Both \fB\-\-exclude\fR and \fB\-\-exclude\-from\fR options expect regular expressions, not 
181
-shell-style wildcards! Run s3cmd with \fB\-\-debug\-exclude\fR to get a detailed list of 
182
-matching file names against exclude rules.
181
+shell-style wildcards! Run s3cmd with \fB\-\-debug\-syncmatch\fR to get detailed information
182
+about matching file names against exclude rules.
183 183
 
184 184
 For example to exclude all files with ".bin" extension use:
185 185
 .PP