git-svn-id: https://s3tools.svn.sourceforge.net/svnroot/s3tools/s3cmd/trunk@180 830e0280-6d2a-0410-9c65-932aecc39d9d
Michal Ludvig authored on 2008/06/03 22:40:11... | ... |
@@ -298,52 +298,15 @@ def cmd_info(args): |
298 | 298 |
else: |
299 | 299 |
raise |
300 | 300 |
|
301 |
-def cmd_sync(args): |
|
302 |
- def _build_attr_header(src): |
|
303 |
- attrs = {} |
|
304 |
- st = os.stat_result(os.stat(src)) |
|
305 |
- for attr in cfg.preserve_attrs_list: |
|
306 |
- if attr == 'uname': |
|
307 |
- try: |
|
308 |
- val = pwd.getpwuid(st.st_uid).pw_name |
|
309 |
- except KeyError: |
|
310 |
- attr = "uid" |
|
311 |
- val = st.st_uid |
|
312 |
- warning("%s: Owner username not known. Storing UID=%d instead." % (src, val)) |
|
313 |
- elif attr == 'gname': |
|
314 |
- try: |
|
315 |
- val = grp.getgrgid(st.st_gid).gr_name |
|
316 |
- except KeyError: |
|
317 |
- attr = "gid" |
|
318 |
- val = st.st_gid |
|
319 |
- warning("%s: Owner groupname not known. Storing GID=%d instead." % (src, val)) |
|
320 |
- else: |
|
321 |
- val = getattr(st, 'st_' + attr) |
|
322 |
- attrs[attr] = val |
|
323 |
- result = "" |
|
324 |
- for k in attrs: result += "%s:%s/" % (k, attrs[k]) |
|
325 |
- return { 'x-amz-meta-s3cmd-attrs' : result[:-1] } |
|
326 |
- src = args.pop(0) |
|
327 |
- if S3Uri(src).type != "file": |
|
328 |
- raise ParameterError("Source must be a local path instead of: %s" % src) |
|
329 |
- dst = args.pop(0) |
|
330 |
- if not dst.endswith('/'): |
|
331 |
- dst += "/" |
|
332 |
- dst_uri = S3Uri(dst) |
|
333 |
- if dst_uri.type != "s3": |
|
334 |
- raise ParameterError("Destination must be a S3 URI instead of: %s" % dst) |
|
335 |
- if (len(args)): |
|
336 |
- raise ParameterError("Too many parameters! Expected: %s" % commands['sync']['param']) |
|
337 |
- |
|
338 |
- s3 = S3(Config()) |
|
339 |
- |
|
301 |
+def _get_filelist_local(local_uri): |
|
340 | 302 |
output("Compiling list of local files...") |
341 |
- if os.path.isdir(src): |
|
342 |
- loc_base = os.path.join(src, "") |
|
343 |
- filelist = os.walk(src) |
|
303 |
+ local_path = local_uri.path() |
|
304 |
+ if os.path.isdir(local_path): |
|
305 |
+ loc_base = os.path.join(local_path, "") |
|
306 |
+ filelist = os.walk(local_path) |
|
344 | 307 |
else: |
345 | 308 |
loc_base = "./" |
346 |
- filelist = [( '.', [], [src] )] |
|
309 |
+ filelist = [( '.', [], [local_path] )] |
|
347 | 310 |
loc_base_len = len(loc_base) |
348 | 311 |
loc_list = {} |
349 | 312 |
for root, dirs, files in filelist: |
... | ... |
@@ -364,15 +327,17 @@ def cmd_sync(args): |
364 | 364 |
'mtime' : sr.st_mtime, |
365 | 365 |
## TODO: Possibly more to save here... |
366 | 366 |
} |
367 |
- loc_count = len(loc_list) |
|
368 |
- |
|
367 |
+ return loc_list |
|
368 |
+ |
|
369 |
+def _get_filelist_remote(remote_uri): |
|
369 | 370 |
output("Retrieving list of remote files...") |
370 |
- response = s3.bucket_list(dst_uri.bucket(), prefix = dst_uri.object()) |
|
371 | 371 |
|
372 |
- rem_base = dst_uri.object() |
|
372 |
+ s3 = S3(Config()) |
|
373 |
+ response = s3.bucket_list(remote_uri.bucket(), prefix = remote_uri.object()) |
|
374 |
+ |
|
375 |
+ rem_base = remote_uri.object() |
|
373 | 376 |
rem_base_len = len(rem_base) |
374 | 377 |
rem_list = {} |
375 |
- rem_count = len(response['list']) |
|
376 | 378 |
for object in response['list']: |
377 | 379 |
key = object['Key'][rem_base_len:].encode('utf-8') |
378 | 380 |
rem_list[key] = { |
... | ... |
@@ -381,32 +346,85 @@ def cmd_sync(args): |
381 | 381 |
'md5' : object['ETag'][1:-1], |
382 | 382 |
'object_key' : object['Key'].encode('utf-8'), |
383 | 383 |
} |
384 |
- output("Found %d local files, %d remote files" % (loc_count, rem_count)) |
|
385 |
- |
|
384 |
+ return rem_list |
|
385 |
+ |
|
386 |
+def _compare_filelists(src_list, dst_list, src_is_local_and_dst_is_remote): |
|
386 | 387 |
output("Verifying checksums...") |
387 |
- for file in loc_list.keys(): |
|
388 |
+ exists_list = {} |
|
389 |
+ for file in src_list.keys(): |
|
388 | 390 |
debug("Checking %s ..." % file) |
389 |
- if rem_list.has_key(file): |
|
391 |
+ if dst_list.has_key(file): |
|
390 | 392 |
debug("%s exists in remote list" % file) |
391 | 393 |
## Check size first |
392 |
- if rem_list[file]['size'] == loc_list[file]['size']: |
|
393 |
- debug("%s same size: %s" % (file, rem_list[file]['size'])) |
|
394 |
+ if dst_list[file]['size'] == src_list[file]['size']: |
|
395 |
+ debug("%s same size: %s" % (file, dst_list[file]['size'])) |
|
394 | 396 |
## ... same size, check MD5 |
395 |
- loc_md5 = Utils.hash_file_md5(loc_list[file]['full_name']) |
|
396 |
- if loc_md5 == rem_list[file]['md5']: |
|
397 |
- debug("%s md5 matches: %s" % (file, rem_list[file]['md5'])) |
|
397 |
+ if src_is_local_and_dst_is_remote: |
|
398 |
+ src_md5 = Utils.hash_file_md5(src_list[file]['full_name']) |
|
399 |
+ dst_md5 = dst_list[file]['md5'] |
|
400 |
+ else: |
|
401 |
+ src_md5 = src_list[file]['md5'] |
|
402 |
+ dst_md5 = Utils.hash_file_md5(dst_list[file]['full_name']) |
|
403 |
+ if src_md5 == dst_md5: |
|
404 |
+ debug("%s md5 matches: %s" % (file, dst_md5)) |
|
398 | 405 |
## Checksums are the same. |
399 |
- ## Remove from local-list, all that is left there will be uploaded |
|
400 |
- debug("%s removed from local list - upload not needed" % file) |
|
401 |
- del(loc_list[file]) |
|
406 |
+ ## Remove from source-list, all that is left there will be transferred |
|
407 |
+ debug("%s removed from source list - transfer not needed" % file) |
|
408 |
+ exists_list[file] = src_list[file] |
|
409 |
+ del(src_list[file]) |
|
402 | 410 |
else: |
403 |
- debug("! %s md5 mismatch: local=%s remote=%s" % (file, loc_md5, rem_list[file]['md5'])) |
|
411 |
+ debug("! %s md5 mismatch: src=%s dst=%s" % (file, src_md5, dst_md5)) |
|
404 | 412 |
else: |
405 |
- debug("! %s size mismatch: local=%s remote=%s" % (file, loc_list[file]['size'], rem_list[file]['size'])) |
|
413 |
+ debug("! %s size mismatch: src=%s dst=%s" % (file, src_list[file]['size'], dst_list[file]['size'])) |
|
406 | 414 |
|
407 |
- ## Remove from remote-list, all that is left there will be deleted |
|
408 |
- debug("%s removed from remote list" % file) |
|
409 |
- del(rem_list[file]) |
|
415 |
+ ## Remove from destination-list, all that is left there will be deleted |
|
416 |
+ debug("%s removed from destination list" % file) |
|
417 |
+ del(dst_list[file]) |
|
418 |
+ return src_list, dst_list, exists_list |
|
419 |
+ |
|
420 |
+def cmd_sync_remote2local(src, dst): |
|
421 |
+ raise NotImplementedError("Remote->Local sync is not yet implemented.") |
|
422 |
+ |
|
423 |
+def cmd_sync_local2remote(src, dst): |
|
424 |
+ def _build_attr_header(src): |
|
425 |
+ attrs = {} |
|
426 |
+ st = os.stat_result(os.stat(src)) |
|
427 |
+ for attr in cfg.preserve_attrs_list: |
|
428 |
+ if attr == 'uname': |
|
429 |
+ try: |
|
430 |
+ val = pwd.getpwuid(st.st_uid).pw_name |
|
431 |
+ except KeyError: |
|
432 |
+ attr = "uid" |
|
433 |
+ val = st.st_uid |
|
434 |
+ warning("%s: Owner username not known. Storing UID=%d instead." % (src, val)) |
|
435 |
+ elif attr == 'gname': |
|
436 |
+ try: |
|
437 |
+ val = grp.getgrgid(st.st_gid).gr_name |
|
438 |
+ except KeyError: |
|
439 |
+ attr = "gid" |
|
440 |
+ val = st.st_gid |
|
441 |
+ warning("%s: Owner groupname not known. Storing GID=%d instead." % (src, val)) |
|
442 |
+ else: |
|
443 |
+ val = getattr(st, 'st_' + attr) |
|
444 |
+ attrs[attr] = val |
|
445 |
+ result = "" |
|
446 |
+ for k in attrs: result += "%s:%s/" % (k, attrs[k]) |
|
447 |
+ return { 'x-amz-meta-s3cmd-attrs' : result[:-1] } |
|
448 |
+ |
|
449 |
+ s3 = S3(Config()) |
|
450 |
+ |
|
451 |
+ src_uri = S3Uri(src) |
|
452 |
+ dst_uri = S3Uri(dst) |
|
453 |
+ |
|
454 |
+ loc_list = _get_filelist_local(src_uri) |
|
455 |
+ loc_count = len(loc_list) |
|
456 |
+ |
|
457 |
+ rem_list = _get_filelist_remote(dst_uri) |
|
458 |
+ rem_count = len(rem_list) |
|
459 |
+ |
|
460 |
+ output("Found %d local files, %d remote files" % (loc_count, rem_count)) |
|
461 |
+ |
|
462 |
+ _compare_filelists(loc_list, rem_list, True) |
|
410 | 463 |
|
411 | 464 |
output("Summary: %d local files to upload, %d remote files to delete" % (len(loc_list), len(rem_list))) |
412 | 465 |
for file in rem_list: |
... | ... |
@@ -449,6 +467,20 @@ def cmd_sync(args): |
449 | 449 |
output("Done. Uploaded %d bytes in %0.1f seconds, %0.2f %sB/s" % |
450 | 450 |
(total_size, total_elapsed, speed_fmt[0], speed_fmt[1])) |
451 | 451 |
|
452 |
+def cmd_sync(args): |
|
453 |
+ src = args.pop(0) |
|
454 |
+ dst = args.pop(0) |
|
455 |
+ if (len(args)): |
|
456 |
+ raise ParameterError("Too many parameters! Expected: %s" % commands['sync']['param']) |
|
457 |
+ |
|
458 |
+ if not dst.endswith('/'): |
|
459 |
+ dst += "/" |
|
460 |
+ |
|
461 |
+ if S3Uri(src).type == "file" and S3Uri(dst).type == "s3": |
|
462 |
+ return cmd_sync_local2remote(src, dst) |
|
463 |
+ if S3Uri(src).type == "s3" and S3Uri(dst).type == "file": |
|
464 |
+ return cmd_sync_remote2local(src, dst) |
|
465 |
+ |
|
452 | 466 |
def resolve_list(lst, args): |
453 | 467 |
retval = [] |
454 | 468 |
for item in lst: |