Browse code

Merge branch 'master' of ssh://github.com/s3tools/s3cmd

Michal Ludvig authored on 2013/03/10 22:16:37
Showing 4 changed files
1 1
new file mode 100644
... ...
@@ -0,0 +1,53 @@
0
+## Amazon S3 manager
1
+## Author: Michal Ludvig <michal@logix.cz>
2
+##         http://www.logix.cz/michal
3
+## License: GPL Version 2
4
+
5
+from SortedDict import SortedDict
6
+import Utils
7
+
8
+class FileDict(SortedDict):
9
+    def __init__(self, mapping = {}, ignore_case = True, **kwargs):
10
+        SortedDict.__init__(self, mapping = mapping, ignore_case = ignore_case, **kwargs)
11
+        self.hardlinks = dict() # { dev: { inode : {'md5':, 'relative_files':}}}
12
+        self.by_md5 = dict() # {md5: set(relative_files)}
13
+
14
+    def record_md5(self, relative_file, md5):
15
+        if md5 not in self.by_md5:
16
+            self.by_md5[md5] = set()
17
+        self.by_md5[md5].add(relative_file)
18
+
19
+    def find_md5_one(self, md5):
20
+        try:
21
+            return list(self.by_md5.get(md5, set()))[0]
22
+        except:
23
+            return None
24
+
25
+    def get_md5(self, relative_file):
26
+        """returns md5 if it can, or raises IOError if file is unreadable"""
27
+        md5 = None
28
+        if 'md5' in self[relative_file]:
29
+            return self[relative_file]['md5']
30
+        md5 = self.get_hardlink_md5(relative_file)
31
+        if md5 is None:
32
+            md5 = Utils.hash_file_md5(self[relative_file]['full_name'])
33
+        self.record_md5(relative_file, md5)
34
+        self[relative_file]['md5'] = md5
35
+        return md5
36
+
37
+    def record_hardlink(self, relative_file, dev, inode, md5):
38
+        if dev not in self.hardlinks:
39
+            self.hardlinks[dev] = dict()
40
+        if inode not in self.hardlinks[dev]:
41
+            self.hardlinks[dev][inode] = dict(md5=md5, relative_files=set())
42
+        self.hardlinks[dev][inode]['relative_files'].add(relative_file)
43
+
44
+    def get_hardlink_md5(self, relative_file):
45
+        md5 = None
46
+        dev = self[relative_file]['dev']
47
+        inode = self[relative_file]['inode']
48
+        try:
49
+            md5 = self.hardlinks[dev][inode]['md5']
50
+        except:
51
+            pass
52
+        return md5
... ...
@@ -6,7 +6,7 @@
6 6
 from S3 import S3
7 7
 from Config import Config
8 8
 from S3Uri import S3Uri
9
-from SortedDict import SortedDict
9
+from FileDict import FileDict
10 10
 from Utils import *
11 11
 from Exceptions import ParameterError
12 12
 from HashCache import HashCache
... ...
@@ -58,7 +58,7 @@ def _fswalk_no_symlinks(path):
58 58
 def filter_exclude_include(src_list):
59 59
     info(u"Applying --exclude/--include")
60 60
     cfg = Config()
61
-    exclude_list = SortedDict(ignore_case = False)
61
+    exclude_list = FileDict(ignore_case = False)
62 62
     for file in src_list.keys():
63 63
         debug(u"CHECK: %s" % file)
64 64
         excluded = False
... ...
@@ -224,7 +224,7 @@ def fetch_local_list(args, recursive = None):
224 224
             info(u"No cache file found, creating it.")
225 225
 
226 226
     local_uris = []
227
-    local_list = SortedDict(ignore_case = False)
227
+    local_list = FileDict(ignore_case = False)
228 228
     single_file = False
229 229
 
230 230
     if type(args) not in (list, tuple):
... ...
@@ -284,7 +284,7 @@ def fetch_remote_list(args, require_attribs = False, recursive = None):
284 284
             rem_base = rem_base[:rem_base.rfind('/')+1]
285 285
             remote_uri = S3Uri("s3://%s/%s" % (remote_uri.bucket(), rem_base))
286 286
         rem_base_len = len(rem_base)
287
-        rem_list = SortedDict(ignore_case = False)
287
+        rem_list = FileDict(ignore_case = False)
288 288
         break_now = False
289 289
         for object in response['list']:
290 290
             if object['Key'] == rem_base_original and object['Key'][-1] != os.path.sep:
... ...
@@ -292,7 +292,7 @@ def fetch_remote_list(args, require_attribs = False, recursive = None):
292 292
                 key = os.path.basename(object['Key'])
293 293
                 object_uri_str = remote_uri_original.uri()
294 294
                 break_now = True
295
-                rem_list = SortedDict(ignore_case = False)   ## Remove whatever has already been put to rem_list
295
+                rem_list = FileDict(ignore_case = False)   ## Remove whatever has already been put to rem_list
296 296
             else:
297 297
                 key = object['Key'][rem_base_len:]      ## Beware - this may be '' if object['Key']==rem_base !!
298 298
                 object_uri_str = remote_uri.uri() + key
... ...
@@ -314,7 +314,7 @@ def fetch_remote_list(args, require_attribs = False, recursive = None):
314 314
 
315 315
     cfg = Config()
316 316
     remote_uris = []
317
-    remote_list = SortedDict(ignore_case = False)
317
+    remote_list = FileDict(ignore_case = False)
318 318
 
319 319
     if type(args) not in (list, tuple):
320 320
         args = [args]
... ...
@@ -436,7 +436,7 @@ def compare_filelists(src_list, dst_list, src_remote, dst_remote, delay_updates
436 436
     ## Items left on src_list will be transferred
437 437
     ## Items left on update_list will be transferred after src_list
438 438
     ## Items left on copy_pairs will be copied from dst1 to dst2
439
-    update_list = SortedDict(ignore_case = False)
439
+    update_list = FileDict(ignore_case = False)
440 440
     ## Items left on dst_list will be deleted
441 441
     copy_pairs = []
442 442
 
... ...
@@ -27,8 +27,6 @@ class SortedDict(dict):
27 27
         """
28 28
         dict.__init__(self, mapping, **kwargs)
29 29
         self.ignore_case = ignore_case
30
-        self.hardlinks = dict() # { dev: { inode : {'md5':, 'relative_files':}}}
31
-        self.by_md5 = dict() # {md5: set(relative_files)}
32 30
 
33 31
     def keys(self):
34 32
         keys = dict.keys(self)
... ...
@@ -49,45 +47,6 @@ class SortedDict(dict):
49 49
         return SortedDictIterator(self, self.keys())
50 50
 
51 51
 
52
-    def record_md5(self, relative_file, md5):
53
-        if md5 not in self.by_md5:
54
-            self.by_md5[md5] = set()
55
-        self.by_md5[md5].add(relative_file)
56
-
57
-    def find_md5_one(self, md5):
58
-        try:
59
-            return list(self.by_md5.get(md5, set()))[0]
60
-        except:
61
-            return None
62
-
63
-    def get_md5(self, relative_file):
64
-        """returns md5 if it can, or raises IOError if file is unreadable"""
65
-        md5 = None
66
-        if 'md5' in self[relative_file]:
67
-            return self[relative_file]['md5']
68
-        md5 = self.get_hardlink_md5(relative_file)
69
-        if md5 is None:
70
-            md5 = Utils.hash_file_md5(self[relative_file]['full_name'])
71
-        self.record_md5(relative_file, md5)
72
-        self[relative_file]['md5'] = md5
73
-        return md5
74
-
75
-    def record_hardlink(self, relative_file, dev, inode, md5):
76
-        if dev not in self.hardlinks:
77
-            self.hardlinks[dev] = dict()
78
-        if inode not in self.hardlinks[dev]:
79
-            self.hardlinks[dev][inode] = dict(md5=md5, relative_files=set())
80
-        self.hardlinks[dev][inode]['relative_files'].add(relative_file)
81
-
82
-    def get_hardlink_md5(self, relative_file):
83
-        md5 = None
84
-        dev = self[relative_file]['dev']
85
-        inode = self[relative_file]['inode']
86
-        try:
87
-            md5 = self.hardlinks[dev][inode]['md5']
88
-        except:
89
-            pass
90
-        return md5
91 52
 
92 53
 if __name__ == "__main__":
93 54
     d = { 'AWS' : 1, 'Action' : 2, 'america' : 3, 'Auckland' : 4, 'America' : 5 }
... ...
@@ -911,7 +911,7 @@ def local_copy(copy_pairs, destination_base):
911 911
     # Do NOT hardlink local files by default, that'd be silly
912 912
     # For instance all empty files would become hardlinked together!
913 913
 
914
-    failed_copy_list = SortedDict()
914
+    failed_copy_list = FileDict()
915 915
     for (src_obj, dst1, relative_file) in copy_pairs:
916 916
         src_file = os.path.join(destination_base, dst1)
917 917
         dst_file = os.path.join(destination_base, relative_file)
... ...
@@ -1076,7 +1076,7 @@ def cmd_sync_local2remote(args):
1076 1076
             ## Make remote_key same as local_key for comparison if we're dealing with only one file
1077 1077
             remote_list_entry = remote_list[remote_list.keys()[0]]
1078 1078
             # Flush remote_list, by the way
1079
-            remote_list = SortedDict()
1079
+            remote_list = FileDict()
1080 1080
             remote_list[local_list.keys()[0]] =  remote_list_entry
1081 1081
 
1082 1082
         local_list, remote_list, update_list, copy_pairs = compare_filelists(local_list, remote_list, src_remote = False, dst_remote = True, delay_updates = cfg.delay_updates)
... ...
@@ -2079,6 +2079,7 @@ if __name__ == '__main__':
2079 2079
         from S3.S3 import S3
2080 2080
         from S3.Config import Config
2081 2081
         from S3.SortedDict import SortedDict
2082
+        from S3.FileDict import FileDict
2082 2083
         from S3.S3Uri import S3Uri
2083 2084
         from S3 import Utils
2084 2085
         from S3.Utils import *