Split md5/hardlink stuff out of SortedDict into FileDict
| 1 | 1 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,53 @@ |
| 0 |
+## Amazon S3 manager |
|
| 1 |
+## Author: Michal Ludvig <michal@logix.cz> |
|
| 2 |
+## http://www.logix.cz/michal |
|
| 3 |
+## License: GPL Version 2 |
|
| 4 |
+ |
|
| 5 |
+from SortedDict import SortedDict |
|
| 6 |
+import Utils |
|
| 7 |
+ |
|
| 8 |
+class FileDict(SortedDict): |
|
| 9 |
+ def __init__(self, mapping = {}, ignore_case = True, **kwargs):
|
|
| 10 |
+ SortedDict.__init__(self, mapping = mapping, ignore_case = ignore_case, **kwargs) |
|
| 11 |
+ self.hardlinks = dict() # { dev: { inode : {'md5':, 'relative_files':}}}
|
|
| 12 |
+ self.by_md5 = dict() # {md5: set(relative_files)}
|
|
| 13 |
+ |
|
| 14 |
+ def record_md5(self, relative_file, md5): |
|
| 15 |
+ if md5 not in self.by_md5: |
|
| 16 |
+ self.by_md5[md5] = set() |
|
| 17 |
+ self.by_md5[md5].add(relative_file) |
|
| 18 |
+ |
|
| 19 |
+ def find_md5_one(self, md5): |
|
| 20 |
+ try: |
|
| 21 |
+ return list(self.by_md5.get(md5, set()))[0] |
|
| 22 |
+ except: |
|
| 23 |
+ return None |
|
| 24 |
+ |
|
| 25 |
+ def get_md5(self, relative_file): |
|
| 26 |
+ """returns md5 if it can, or raises IOError if file is unreadable""" |
|
| 27 |
+ md5 = None |
|
| 28 |
+ if 'md5' in self[relative_file]: |
|
| 29 |
+ return self[relative_file]['md5'] |
|
| 30 |
+ md5 = self.get_hardlink_md5(relative_file) |
|
| 31 |
+ if md5 is None: |
|
| 32 |
+ md5 = Utils.hash_file_md5(self[relative_file]['full_name']) |
|
| 33 |
+ self.record_md5(relative_file, md5) |
|
| 34 |
+ self[relative_file]['md5'] = md5 |
|
| 35 |
+ return md5 |
|
| 36 |
+ |
|
| 37 |
+ def record_hardlink(self, relative_file, dev, inode, md5): |
|
| 38 |
+ if dev not in self.hardlinks: |
|
| 39 |
+ self.hardlinks[dev] = dict() |
|
| 40 |
+ if inode not in self.hardlinks[dev]: |
|
| 41 |
+ self.hardlinks[dev][inode] = dict(md5=md5, relative_files=set()) |
|
| 42 |
+ self.hardlinks[dev][inode]['relative_files'].add(relative_file) |
|
| 43 |
+ |
|
| 44 |
+ def get_hardlink_md5(self, relative_file): |
|
| 45 |
+ md5 = None |
|
| 46 |
+ dev = self[relative_file]['dev'] |
|
| 47 |
+ inode = self[relative_file]['inode'] |
|
| 48 |
+ try: |
|
| 49 |
+ md5 = self.hardlinks[dev][inode]['md5'] |
|
| 50 |
+ except: |
|
| 51 |
+ pass |
|
| 52 |
+ return md5 |
| ... | ... |
@@ -6,7 +6,7 @@ |
| 6 | 6 |
from S3 import S3 |
| 7 | 7 |
from Config import Config |
| 8 | 8 |
from S3Uri import S3Uri |
| 9 |
-from SortedDict import SortedDict |
|
| 9 |
+from FileDict import FileDict |
|
| 10 | 10 |
from Utils import * |
| 11 | 11 |
from Exceptions import ParameterError |
| 12 | 12 |
from HashCache import HashCache |
| ... | ... |
@@ -58,7 +58,7 @@ def _fswalk_no_symlinks(path): |
| 58 | 58 |
def filter_exclude_include(src_list): |
| 59 | 59 |
info(u"Applying --exclude/--include") |
| 60 | 60 |
cfg = Config() |
| 61 |
- exclude_list = SortedDict(ignore_case = False) |
|
| 61 |
+ exclude_list = FileDict(ignore_case = False) |
|
| 62 | 62 |
for file in src_list.keys(): |
| 63 | 63 |
debug(u"CHECK: %s" % file) |
| 64 | 64 |
excluded = False |
| ... | ... |
@@ -224,7 +224,7 @@ def fetch_local_list(args, recursive = None): |
| 224 | 224 |
info(u"No cache file found, creating it.") |
| 225 | 225 |
|
| 226 | 226 |
local_uris = [] |
| 227 |
- local_list = SortedDict(ignore_case = False) |
|
| 227 |
+ local_list = FileDict(ignore_case = False) |
|
| 228 | 228 |
single_file = False |
| 229 | 229 |
|
| 230 | 230 |
if type(args) not in (list, tuple): |
| ... | ... |
@@ -284,7 +284,7 @@ def fetch_remote_list(args, require_attribs = False, recursive = None): |
| 284 | 284 |
rem_base = rem_base[:rem_base.rfind('/')+1]
|
| 285 | 285 |
remote_uri = S3Uri("s3://%s/%s" % (remote_uri.bucket(), rem_base))
|
| 286 | 286 |
rem_base_len = len(rem_base) |
| 287 |
- rem_list = SortedDict(ignore_case = False) |
|
| 287 |
+ rem_list = FileDict(ignore_case = False) |
|
| 288 | 288 |
break_now = False |
| 289 | 289 |
for object in response['list']: |
| 290 | 290 |
if object['Key'] == rem_base_original and object['Key'][-1] != os.path.sep: |
| ... | ... |
@@ -292,7 +292,7 @@ def fetch_remote_list(args, require_attribs = False, recursive = None): |
| 292 | 292 |
key = os.path.basename(object['Key']) |
| 293 | 293 |
object_uri_str = remote_uri_original.uri() |
| 294 | 294 |
break_now = True |
| 295 |
- rem_list = SortedDict(ignore_case = False) ## Remove whatever has already been put to rem_list |
|
| 295 |
+ rem_list = FileDict(ignore_case = False) ## Remove whatever has already been put to rem_list |
|
| 296 | 296 |
else: |
| 297 | 297 |
key = object['Key'][rem_base_len:] ## Beware - this may be '' if object['Key']==rem_base !! |
| 298 | 298 |
object_uri_str = remote_uri.uri() + key |
| ... | ... |
@@ -314,7 +314,7 @@ def fetch_remote_list(args, require_attribs = False, recursive = None): |
| 314 | 314 |
|
| 315 | 315 |
cfg = Config() |
| 316 | 316 |
remote_uris = [] |
| 317 |
- remote_list = SortedDict(ignore_case = False) |
|
| 317 |
+ remote_list = FileDict(ignore_case = False) |
|
| 318 | 318 |
|
| 319 | 319 |
if type(args) not in (list, tuple): |
| 320 | 320 |
args = [args] |
| ... | ... |
@@ -436,7 +436,7 @@ def compare_filelists(src_list, dst_list, src_remote, dst_remote, delay_updates |
| 436 | 436 |
## Items left on src_list will be transferred |
| 437 | 437 |
## Items left on update_list will be transferred after src_list |
| 438 | 438 |
## Items left on copy_pairs will be copied from dst1 to dst2 |
| 439 |
- update_list = SortedDict(ignore_case = False) |
|
| 439 |
+ update_list = FileDict(ignore_case = False) |
|
| 440 | 440 |
## Items left on dst_list will be deleted |
| 441 | 441 |
copy_pairs = [] |
| 442 | 442 |
|
| ... | ... |
@@ -27,8 +27,6 @@ class SortedDict(dict): |
| 27 | 27 |
""" |
| 28 | 28 |
dict.__init__(self, mapping, **kwargs) |
| 29 | 29 |
self.ignore_case = ignore_case |
| 30 |
- self.hardlinks = dict() # { dev: { inode : {'md5':, 'relative_files':}}}
|
|
| 31 |
- self.by_md5 = dict() # {md5: set(relative_files)}
|
|
| 32 | 30 |
|
| 33 | 31 |
def keys(self): |
| 34 | 32 |
keys = dict.keys(self) |
| ... | ... |
@@ -49,45 +47,6 @@ class SortedDict(dict): |
| 49 | 49 |
return SortedDictIterator(self, self.keys()) |
| 50 | 50 |
|
| 51 | 51 |
|
| 52 |
- def record_md5(self, relative_file, md5): |
|
| 53 |
- if md5 not in self.by_md5: |
|
| 54 |
- self.by_md5[md5] = set() |
|
| 55 |
- self.by_md5[md5].add(relative_file) |
|
| 56 |
- |
|
| 57 |
- def find_md5_one(self, md5): |
|
| 58 |
- try: |
|
| 59 |
- return list(self.by_md5.get(md5, set()))[0] |
|
| 60 |
- except: |
|
| 61 |
- return None |
|
| 62 |
- |
|
| 63 |
- def get_md5(self, relative_file): |
|
| 64 |
- """returns md5 if it can, or raises IOError if file is unreadable""" |
|
| 65 |
- md5 = None |
|
| 66 |
- if 'md5' in self[relative_file]: |
|
| 67 |
- return self[relative_file]['md5'] |
|
| 68 |
- md5 = self.get_hardlink_md5(relative_file) |
|
| 69 |
- if md5 is None: |
|
| 70 |
- md5 = Utils.hash_file_md5(self[relative_file]['full_name']) |
|
| 71 |
- self.record_md5(relative_file, md5) |
|
| 72 |
- self[relative_file]['md5'] = md5 |
|
| 73 |
- return md5 |
|
| 74 |
- |
|
| 75 |
- def record_hardlink(self, relative_file, dev, inode, md5): |
|
| 76 |
- if dev not in self.hardlinks: |
|
| 77 |
- self.hardlinks[dev] = dict() |
|
| 78 |
- if inode not in self.hardlinks[dev]: |
|
| 79 |
- self.hardlinks[dev][inode] = dict(md5=md5, relative_files=set()) |
|
| 80 |
- self.hardlinks[dev][inode]['relative_files'].add(relative_file) |
|
| 81 |
- |
|
| 82 |
- def get_hardlink_md5(self, relative_file): |
|
| 83 |
- md5 = None |
|
| 84 |
- dev = self[relative_file]['dev'] |
|
| 85 |
- inode = self[relative_file]['inode'] |
|
| 86 |
- try: |
|
| 87 |
- md5 = self.hardlinks[dev][inode]['md5'] |
|
| 88 |
- except: |
|
| 89 |
- pass |
|
| 90 |
- return md5 |
|
| 91 | 52 |
|
| 92 | 53 |
if __name__ == "__main__": |
| 93 | 54 |
d = { 'AWS' : 1, 'Action' : 2, 'america' : 3, 'Auckland' : 4, 'America' : 5 }
|
| ... | ... |
@@ -911,7 +911,7 @@ def local_copy(copy_pairs, destination_base): |
| 911 | 911 |
# Do NOT hardlink local files by default, that'd be silly |
| 912 | 912 |
# For instance all empty files would become hardlinked together! |
| 913 | 913 |
|
| 914 |
- failed_copy_list = SortedDict() |
|
| 914 |
+ failed_copy_list = FileDict() |
|
| 915 | 915 |
for (src_obj, dst1, relative_file) in copy_pairs: |
| 916 | 916 |
src_file = os.path.join(destination_base, dst1) |
| 917 | 917 |
dst_file = os.path.join(destination_base, relative_file) |
| ... | ... |
@@ -1076,7 +1076,7 @@ def cmd_sync_local2remote(args): |
| 1076 | 1076 |
## Make remote_key same as local_key for comparison if we're dealing with only one file |
| 1077 | 1077 |
remote_list_entry = remote_list[remote_list.keys()[0]] |
| 1078 | 1078 |
# Flush remote_list, by the way |
| 1079 |
- remote_list = SortedDict() |
|
| 1079 |
+ remote_list = FileDict() |
|
| 1080 | 1080 |
remote_list[local_list.keys()[0]] = remote_list_entry |
| 1081 | 1081 |
|
| 1082 | 1082 |
local_list, remote_list, update_list, copy_pairs = compare_filelists(local_list, remote_list, src_remote = False, dst_remote = True, delay_updates = cfg.delay_updates) |
| ... | ... |
@@ -2079,6 +2079,7 @@ if __name__ == '__main__': |
| 2079 | 2079 |
from S3.S3 import S3 |
| 2080 | 2080 |
from S3.Config import Config |
| 2081 | 2081 |
from S3.SortedDict import SortedDict |
| 2082 |
+ from S3.FileDict import FileDict |
|
| 2082 | 2083 |
from S3.S3Uri import S3Uri |
| 2083 | 2084 |
from S3 import Utils |
| 2084 | 2085 |
from S3.Utils import * |