Currently we only need to find one matching md5 for remote copies and hardlinks.
So, no need to keep inner sets of matching files but just one.
This allows to prevent random failures in auto tests with python3 as now sets
and dicts orders can change between runs.
... | ... |
@@ -19,22 +19,23 @@ cfg = Config.Config() |
19 | 19 |
class FileDict(SortedDict): |
20 | 20 |
def __init__(self, mapping = None, ignore_case = True, **kwargs): |
21 | 21 |
SortedDict.__init__(self, mapping = mapping or {}, ignore_case = ignore_case, **kwargs) |
22 |
- self.hardlinks = dict() # { dev: { inode : {'md5':, 'relative_files':}}} |
|
22 |
+ self.hardlinks_md5 = dict() # { dev: { inode : {'md5':, 'relative_files':}}} |
|
23 | 23 |
self.by_md5 = dict() # {md5: set(relative_files)} |
24 | 24 |
|
25 | 25 |
def record_md5(self, relative_file, md5): |
26 |
- if md5 is None: return |
|
27 |
- if md5 == zero_length_md5: return |
|
26 |
+ if not relative_file: |
|
27 |
+ return |
|
28 |
+ if md5 is None: |
|
29 |
+ return |
|
30 |
+ if md5 == zero_length_md5: |
|
31 |
+ return |
|
28 | 32 |
if md5 not in self.by_md5: |
29 |
- self.by_md5[md5] = set() |
|
30 |
- self.by_md5[md5].add(relative_file) |
|
33 |
+ self.by_md5[md5] = relative_file |
|
31 | 34 |
|
32 | 35 |
def find_md5_one(self, md5): |
33 |
- if not md5: return None |
|
34 |
- try: |
|
35 |
- return list(self.by_md5.get(md5, set()))[0] |
|
36 |
- except: |
|
36 |
+ if not md5: |
|
37 | 37 |
return None |
38 |
+ return self.by_md5.get(md5, None) |
|
38 | 39 |
|
39 | 40 |
def get_md5(self, relative_file): |
40 | 41 |
"""returns md5 if it can, or raises IOError if file is unreadable""" |
... | ... |
@@ -50,21 +51,24 @@ class FileDict(SortedDict): |
50 | 50 |
return md5 |
51 | 51 |
|
52 | 52 |
def record_hardlink(self, relative_file, dev, inode, md5, size): |
53 |
- if md5 is None: return |
|
54 |
- if size == 0: return # don't record 0-length files |
|
55 |
- if dev == 0 or inode == 0: return # Windows |
|
56 |
- if dev not in self.hardlinks: |
|
57 |
- self.hardlinks[dev] = dict() |
|
58 |
- if inode not in self.hardlinks[dev]: |
|
59 |
- self.hardlinks[dev][inode] = dict(md5=md5, relative_files=set()) |
|
60 |
- self.hardlinks[dev][inode]['relative_files'].add(relative_file) |
|
53 |
+ if md5 is None: |
|
54 |
+ return |
|
55 |
+ if size == 0: |
|
56 |
+ # don't record 0-length files |
|
57 |
+ return |
|
58 |
+ if dev == 0 or inode == 0: |
|
59 |
+ # Windows |
|
60 |
+ return |
|
61 |
+ if dev not in self.hardlinks_md5: |
|
62 |
+ self.hardlinks_md5[dev] = dict() |
|
63 |
+ if inode not in self.hardlinks_md5[dev]: |
|
64 |
+ self.hardlinks_md5[dev][inode] = md5 |
|
61 | 65 |
|
62 | 66 |
def get_hardlink_md5(self, relative_file): |
63 |
- md5 = None |
|
64 | 67 |
try: |
65 | 68 |
dev = self[relative_file]['dev'] |
66 | 69 |
inode = self[relative_file]['inode'] |
67 |
- md5 = self.hardlinks[dev][inode]['md5'] |
|
70 |
+ md5 = self.hardlinks_md5[dev][inode] |
|
68 | 71 |
except KeyError: |
69 |
- pass |
|
72 |
+ md5 = None |
|
70 | 73 |
return md5 |