* s3cmd: Initial support for 'sync' operation. For
now only local->s3 direction. In this version doesn't
work well with non-ASCII filenames and doesn't support
encryption.
git-svn-id: https://s3tools.svn.sourceforge.net/svnroot/s3tools/s3cmd/trunk@145 830e0280-6d2a-0410-9c65-932aecc39d9d
... | ... |
@@ -1,3 +1,10 @@ |
1 |
+2007-09-02 Michal Ludvig <michal@logix.cz> |
|
2 |
+ |
|
3 |
+ * s3cmd: Initial support for 'sync' operation. For |
|
4 |
+ now only local->s3 direction. In this version doesn't |
|
5 |
+ work well with non-ASCII filenames and doesn't support |
|
6 |
+ encryption. |
|
7 |
+ |
|
1 | 8 |
2007-08-24 Michal Ludvig <michal@logix.cz> |
2 | 9 |
|
3 | 10 |
* s3cmd, S3/Util.py: More ElementTree imports cleanup |
... | ... |
@@ -10,6 +10,7 @@ import re |
10 | 10 |
class Config(object): |
11 | 11 |
_instance = None |
12 | 12 |
_parsed_files = [] |
13 |
+ _doc = {} |
|
13 | 14 |
access_key = "" |
14 | 15 |
secret_key = "" |
15 | 16 |
host = "s3.amazonaws.com" |
... | ... |
@@ -22,6 +23,8 @@ class Config(object): |
22 | 22 |
proxy_host = "" |
23 | 23 |
proxy_port = 3128 |
24 | 24 |
encrypt = False |
25 |
+ delete_removed = False |
|
26 |
+ _doc['delete_removed'] = "[sync] Remove remote S3 objects when local file has been deleted" |
|
25 | 27 |
gpg_passphrase = "" |
26 | 28 |
gpg_command = "" |
27 | 29 |
gpg_encrypt = "%(gpg_command)s -c --verbose --no-use-agent --batch --yes --passphrase-fd %(passphrase_fd)s -o %(output_file)s %(input_file)s" |
... | ... |
@@ -108,8 +108,10 @@ class S3(object): |
108 | 108 |
## TODO: use prefix if supplied |
109 | 109 |
request = self.create_request("BUCKET_LIST", bucket = bucket, prefix = prefix) |
110 | 110 |
response = self.send_request(request) |
111 |
- debug(response) |
|
111 |
+ #debug(response) |
|
112 | 112 |
response["list"] = getListFromXml(response["data"], "Contents") |
113 |
+ if getTextFromXml(response['data'], ".//IsTruncated").lower() != "false": |
|
114 |
+ raise Exception("Listing truncated. Please notify s3cmd developers.") |
|
113 | 115 |
return response |
114 | 116 |
|
115 | 117 |
def bucket_create(self, bucket): |
... | ... |
@@ -15,20 +15,30 @@ try: |
15 | 15 |
except ImportError: |
16 | 16 |
import elementtree.ElementTree as ET |
17 | 17 |
|
18 |
+def stripTagXmlns(xmlns, tag): |
|
19 |
+ """ |
|
20 |
+ Returns a function that, given a tag name argument, removes |
|
21 |
+ eventual ElementTree xmlns from it. |
|
22 |
+ |
|
23 |
+ Example: |
|
24 |
+ stripTagXmlns("{myXmlNS}tag") -> "tag" |
|
25 |
+ """ |
|
26 |
+ if not xmlns: |
|
27 |
+ return tag |
|
28 |
+ return re.sub(xmlns, "", tag) |
|
29 |
+ |
|
30 |
+def fixupXPath(xmlns, xpath, max = 0): |
|
31 |
+ if not xmlns: |
|
32 |
+ return xpath |
|
33 |
+ retval = re.subn("//", "//%s" % xmlns, xpath, max)[0] |
|
34 |
+ return retval |
|
35 |
+ |
|
18 | 36 |
def parseNodes(nodes, xmlns = ""): |
19 | 37 |
retval = [] |
20 | 38 |
for node in nodes: |
21 | 39 |
retval_item = {} |
22 |
- if xmlns != "": |
|
23 |
- ## Take regexp compilation out of the loop |
|
24 |
- r = re.compile(xmlns) |
|
25 |
- fixup = lambda string : r.sub("", string) |
|
26 |
- else: |
|
27 |
- ## Do-nothing function |
|
28 |
- fixup = lambda string : string |
|
29 |
- |
|
30 | 40 |
for child in node.getchildren(): |
31 |
- name = fixup(child.tag) |
|
41 |
+ name = stripTagXmlns(xmlns, child.tag) |
|
32 | 42 |
retval_item[name] = node.findtext(".//%s" % child.tag) |
33 | 43 |
|
34 | 44 |
retval.append(retval_item) |
... | ... |
@@ -45,6 +55,11 @@ def getListFromXml(xml, node): |
45 | 45 |
nodes = tree.findall('.//%s%s' % (xmlns, node)) |
46 | 46 |
return parseNodes(nodes, xmlns) |
47 | 47 |
|
48 |
+def getTextFromXml(xml, xpath): |
|
49 |
+ tree = ET.fromstring(xml) |
|
50 |
+ xmlns = getNameSpace(tree) |
|
51 |
+ return tree.findtext(fixupXPath(xmlns, xpath)) |
|
52 |
+ |
|
48 | 53 |
def dateS3toPython(date): |
49 | 54 |
date = re.compile("\.\d\d\dZ").sub(".000Z", date) |
50 | 55 |
return time.strptime(date, "%Y-%m-%dT%H:%M:%S.000Z") |
... | ... |
@@ -256,6 +256,97 @@ def cmd_object_del(args): |
256 | 256 |
response = s3.object_delete_uri(uri) |
257 | 257 |
output("Object %s deleted" % uri) |
258 | 258 |
|
259 |
+def cmd_sync(agrs): |
|
260 |
+ src = args.pop(0) |
|
261 |
+ if S3Uri(src).type != "file": |
|
262 |
+ raise ParameterError("Source must be a local path instead of: %s" % src) |
|
263 |
+ dst = args.pop(0) |
|
264 |
+ dst_uri = S3Uri(dst) |
|
265 |
+ if dst_uri.type != "s3": |
|
266 |
+ raise ParameterError("Destination must be a S3 URI instead of: %s" % dst) |
|
267 |
+ if (len(args)): |
|
268 |
+ raise ParameterError("Too many parameters! Expected: %s" % commands['sync']['param']) |
|
269 |
+ |
|
270 |
+ s3 = S3(Config()) |
|
271 |
+ |
|
272 |
+ output("Compiling list of local files...") |
|
273 |
+ loc_base = os.path.join(src, "") |
|
274 |
+ loc_base_len = len(loc_base) |
|
275 |
+ loc_list = {} |
|
276 |
+ for root, dirs, files in os.walk(src): |
|
277 |
+ ## TODO: implement explicit exclude |
|
278 |
+ for f in files: |
|
279 |
+ full_name = os.path.join(root, f) |
|
280 |
+ if not os.path.isfile(full_name): |
|
281 |
+ continue |
|
282 |
+ file = full_name[loc_base_len:] |
|
283 |
+ sr = os.stat_result(os.lstat(full_name)) |
|
284 |
+ loc_list[file] = { |
|
285 |
+ 'full_name' : full_name, |
|
286 |
+ 'size' : sr.st_size, |
|
287 |
+ 'mtime' : sr.st_mtime, |
|
288 |
+ ## TODO: Possibly more to save here... |
|
289 |
+ } |
|
290 |
+ |
|
291 |
+ output("Retrieving list of remote files...") |
|
292 |
+ response = s3.bucket_list(dst_uri.bucket(), prefix = dst_uri.object()) |
|
293 |
+ |
|
294 |
+ rem_base = dst_uri.object() |
|
295 |
+ rem_base_len = len(rem_base) |
|
296 |
+ rem_list = {} |
|
297 |
+ for object in response['list']: |
|
298 |
+ key = object['Key'][rem_base_len:] |
|
299 |
+ rem_list[key] = { |
|
300 |
+ 'size' : int(object['Size']), |
|
301 |
+ # 'mtime' : dateS3toUnix(object['LastModified']), ## That's upload time, not our lastmod time :-( |
|
302 |
+ 'md5' : object['ETag'][1:-1], |
|
303 |
+ 'object_key' : object['Key'], |
|
304 |
+ } |
|
305 |
+ output("Verifying checksums...") |
|
306 |
+ for file in loc_list.keys(): |
|
307 |
+ debug("Checking %s ..." % file) |
|
308 |
+ if rem_list.has_key(file): |
|
309 |
+ debug("%s exists in remote list" % file) |
|
310 |
+ ## Check size first |
|
311 |
+ if rem_list[file]['size'] == loc_list[file]['size']: |
|
312 |
+ debug("%s same size: %s" % (file, rem_list[file]['size'])) |
|
313 |
+ ## ... same size, check MD5 |
|
314 |
+ loc_md5 = Utils.hash_file_md5(loc_list[file]['full_name']) |
|
315 |
+ if loc_md5 == rem_list[file]['md5']: |
|
316 |
+ debug("%s md5 matches: %s" % (file, rem_list[file]['md5'])) |
|
317 |
+ ## Checksums are the same. |
|
318 |
+ ## Remove from local-list, all that is left there will be uploaded |
|
319 |
+ debug("%s removed from local list - upload not needed" % file) |
|
320 |
+ del(loc_list[file]) |
|
321 |
+ else: |
|
322 |
+ debug("! %s md5 mismatch: local=%s remote=%s" % (file, loc_md5, rem_list[file]['md5'])) |
|
323 |
+ else: |
|
324 |
+ debug("! %s size mismatch: local=%s remote=%s" % (file, loc_list[file]['size'], rem_list[file]['size'])) |
|
325 |
+ |
|
326 |
+ ## Remove from remote-list, all that is left there will be deleted |
|
327 |
+ debug("%s removed from remote list" % file) |
|
328 |
+ del(rem_list[file]) |
|
329 |
+ |
|
330 |
+ output("Summary: %d local files to upload, %d remote files to delete" % (len(loc_list), len(rem_list))) |
|
331 |
+ if cfg.delete_removed: |
|
332 |
+ for file in rem_list: |
|
333 |
+ uri = S3Uri("s3://" + dst_uri.bucket()+"/"+rem_list[file]['object_key']) |
|
334 |
+ response = s3.object_delete_uri(uri) |
|
335 |
+ output("%s deleted" % uri) |
|
336 |
+ |
|
337 |
+ total_size = 0 |
|
338 |
+ dst_base = dst_uri.uri() |
|
339 |
+ if not dst_base[-1] == "/": dst_base += "/" |
|
340 |
+ file_list = loc_list.keys() |
|
341 |
+ file_list.sort() |
|
342 |
+ for file in file_list: |
|
343 |
+ src = loc_list[file]['full_name'] |
|
344 |
+ uri = S3Uri(dst_base + file) |
|
345 |
+ response = s3.object_put_uri(src, uri) |
|
346 |
+ output("%s stored as %s (%d bytes)" % (src, uri, response["size"])) |
|
347 |
+ total_size += response["size"] |
|
348 |
+ output("Done. Uploaded %d bytes." % total_size) |
|
349 |
+ |
|
259 | 350 |
def resolve_list(lst, args): |
260 | 351 |
retval = [] |
261 | 352 |
for item in lst: |
... | ... |
@@ -425,6 +516,8 @@ commands_list = [ |
425 | 425 |
{"cmd":"put", "label":"Put file into bucket", "param":"FILE [FILE...] s3://BUCKET[/PREFIX]", "func":cmd_object_put, "argc":2}, |
426 | 426 |
{"cmd":"get", "label":"Get file from bucket", "param":"s3://BUCKET/OBJECT LOCAL_FILE", "func":cmd_object_get, "argc":1}, |
427 | 427 |
{"cmd":"del", "label":"Delete file from bucket", "param":"s3://BUCKET/OBJECT", "func":cmd_object_del, "argc":1}, |
428 |
+ #{"cmd":"mkdir", "label":"Make a virtual S3 directory", "param":"s3://BUCKET/path/to/dir", "func":cmd_mkdir, "argc":1}, |
|
429 |
+ {"cmd":"sync", "label":"Synchronize a directory tree to S3 and back", "param":"LOCAL_DIR s3://BUCKET[/PREFIX]", "func":cmd_sync, "argc":2}, |
|
428 | 430 |
{"cmd":"du", "label":"Disk usage by buckets", "param":"[s3://BUCKET[/PREFIX]]", "func":cmd_du, "argc":0}, |
429 | 431 |
] |
430 | 432 |
|
... | ... |
@@ -475,6 +568,7 @@ if __name__ == '__main__': |
475 | 475 |
optparser.add_option("-e", "--encrypt", dest="encrypt", action="store_true", help="Encrypt files before uploading to S3.") |
476 | 476 |
optparser.add_option("-f", "--force", dest="force", action="store_true", help="Force overwrite and other dangerous operations.") |
477 | 477 |
optparser.add_option("-P", "--acl-public", dest="acl_public", action="store_true", help="Store objects with ACL allowing read by anyone.") |
478 |
+ optparser.add_option( "--delete-removed", dest="delete_removed", action="store_true", help="Delete remote objects with no corresponding local file [sync]") |
|
478 | 479 |
|
479 | 480 |
optparser.add_option("-m", "--mime-type", dest="default_mime_type", type="mimetype", metavar="MIME/TYPE", help="Default MIME-type to be set for objects stored.") |
480 | 481 |
optparser.add_option("-M", "--guess-mime-type", dest="guess_mime_type", action="store_true", help="Guess MIME-type of files by their extension. Falls back to default MIME-Type as specified by --mime-type option") |