Browse code

Little performance + memory improvement. + Fixes randomless in auto tests.

Currently we only need to find one matching md5 for remote copies and hardlinks.
So, no need to keep inner sets of matching files but just one.
This allows to prevent random failures in auto tests with python3 as now sets
and dicts orders can change between runs.

Florent Viard authored on 2017/05/14 21:13:02
Showing 1 changed files
... ...
@@ -19,22 +19,23 @@ cfg = Config.Config()
19 19
 class FileDict(SortedDict):
20 20
     def __init__(self, mapping = None, ignore_case = True, **kwargs):
21 21
         SortedDict.__init__(self, mapping = mapping or {}, ignore_case = ignore_case, **kwargs)
22
-        self.hardlinks = dict() # { dev: { inode : {'md5':, 'relative_files':}}}
22
+        self.hardlinks_md5 = dict() # { dev: { inode : {'md5':, 'relative_files':}}}
23 23
         self.by_md5 = dict() # {md5: set(relative_files)}
24 24
 
25 25
     def record_md5(self, relative_file, md5):
26
-        if md5 is None: return
27
-        if md5 == zero_length_md5: return
26
+        if not relative_file:
27
+            return
28
+        if md5 is None:
29
+            return
30
+        if md5 == zero_length_md5:
31
+            return
28 32
         if md5 not in self.by_md5:
29
-            self.by_md5[md5] = set()
30
-        self.by_md5[md5].add(relative_file)
33
+            self.by_md5[md5] = relative_file
31 34
 
32 35
     def find_md5_one(self, md5):
33
-        if not md5: return None
34
-        try:
35
-            return list(self.by_md5.get(md5, set()))[0]
36
-        except:
36
+        if not md5:
37 37
             return None
38
+        return self.by_md5.get(md5, None)
38 39
 
39 40
     def get_md5(self, relative_file):
40 41
         """returns md5 if it can, or raises IOError if file is unreadable"""
... ...
@@ -50,21 +51,24 @@ class FileDict(SortedDict):
50 50
         return md5
51 51
 
52 52
     def record_hardlink(self, relative_file, dev, inode, md5, size):
53
-        if md5 is None: return
54
-        if size == 0: return # don't record 0-length files
55
-        if dev == 0 or inode == 0: return # Windows
56
-        if dev not in self.hardlinks:
57
-            self.hardlinks[dev] = dict()
58
-        if inode not in self.hardlinks[dev]:
59
-            self.hardlinks[dev][inode] = dict(md5=md5, relative_files=set())
60
-        self.hardlinks[dev][inode]['relative_files'].add(relative_file)
53
+        if md5 is None:
54
+            return
55
+        if size == 0:
56
+            # don't record 0-length files
57
+            return
58
+        if dev == 0 or inode == 0:
59
+            # Windows
60
+            return
61
+        if dev not in self.hardlinks_md5:
62
+            self.hardlinks_md5[dev] = dict()
63
+        if inode not in self.hardlinks_md5[dev]:
64
+            self.hardlinks_md5[dev][inode] = md5
61 65
 
62 66
     def get_hardlink_md5(self, relative_file):
63
-        md5 = None
64 67
         try:
65 68
             dev = self[relative_file]['dev']
66 69
             inode = self[relative_file]['inode']
67
-            md5 = self.hardlinks[dev][inode]['md5']
70
+            md5 = self.hardlinks_md5[dev][inode]
68 71
         except KeyError:
69
-            pass
72
+            md5 = None
70 73
         return md5