This creates and maintains a cache (aka HashCache) of each inode in
the local tree. This is used to avoid doing local disk I/O to
calculate an MD5 value for a file if it's inode, mtime, and size
haven't changed. If these values have changed, then it does the disk
I/O.
This introduces command line option --cache-file <foo>. The file is
created if it does not exist, is read upon start and written upon
close. The contents are only useful for a given directory tree, so
caches should not be reused for different directory tree syncs.
... | ... |
@@ -9,6 +9,7 @@ from S3Uri import S3Uri |
9 | 9 |
from SortedDict import SortedDict |
10 | 10 |
from Utils import * |
11 | 11 |
from Exceptions import ParameterError |
12 |
+from HashCache import HashCache |
|
12 | 13 |
|
13 | 14 |
from logging import debug, info, warning, error |
14 | 15 |
|
... | ... |
@@ -137,7 +138,7 @@ def handle_exclude_include_walk(root, dirs, files): |
137 | 137 |
debug(u"PASS: %s" % (file)) |
138 | 138 |
|
139 | 139 |
def fetch_local_list(args, recursive = None): |
140 |
- def _get_filelist_local(loc_list, local_uri): |
|
140 |
+ def _get_filelist_local(loc_list, local_uri, cache): |
|
141 | 141 |
info(u"Compiling list of local files...") |
142 | 142 |
if local_uri.isdir(): |
143 | 143 |
local_base = deunicodise(local_uri.basename()) |
... | ... |
@@ -180,11 +181,30 @@ def fetch_local_list(args, recursive = None): |
180 | 180 |
## TODO: Possibly more to save here... |
181 | 181 |
} |
182 | 182 |
if 'md5' in cfg.sync_checks: |
183 |
- md5 = loc_list.get_md5(relative_file) |
|
183 |
+ md5 = cache.md5(sr.st_dev, sr.st_ino, sr.st_mtime, sr.st_size) |
|
184 |
+ if md5 is None: |
|
185 |
+ md5 = loc_list.get_md5(relative_file) # this does the file I/O |
|
186 |
+ cache.add(sr.st_dev, sr.st_ino, sr.st_mtime, sr.st_size, md5) |
|
184 | 187 |
loc_list.record_hardlink(relative_file, sr.st_dev, sr.st_ino, md5) |
185 | 188 |
return loc_list, single_file |
186 | 189 |
|
190 |
+ def _maintain_cache(cache, local_list): |
|
191 |
+ if cfg.cache_file: |
|
192 |
+ cache.mark_all_for_purge() |
|
193 |
+ for i in local_list.keys(): |
|
194 |
+ cache.unmark_for_purge(local_list[i]['dev'], local_list[i]['inode'], local_list[i]['mtime'], local_list[i]['size']) |
|
195 |
+ cache.purge() |
|
196 |
+ cache.save(cfg.cache_file) |
|
197 |
+ |
|
187 | 198 |
cfg = Config() |
199 |
+ |
|
200 |
+ cache = HashCache() |
|
201 |
+ if cfg.cache_file: |
|
202 |
+ try: |
|
203 |
+ cache.load(cfg.cache_file) |
|
204 |
+ except IOError: |
|
205 |
+ info(u"No cache file found, creating it.") |
|
206 |
+ |
|
188 | 207 |
local_uris = [] |
189 | 208 |
local_list = SortedDict(ignore_case = False) |
190 | 209 |
single_file = False |
... | ... |
@@ -204,7 +224,7 @@ def fetch_local_list(args, recursive = None): |
204 | 204 |
local_uris.append(uri) |
205 | 205 |
|
206 | 206 |
for uri in local_uris: |
207 |
- list_for_uri, single_file = _get_filelist_local(local_list, uri) |
|
207 |
+ list_for_uri, single_file = _get_filelist_local(local_list, uri, cache) |
|
208 | 208 |
|
209 | 209 |
## Single file is True if and only if the user |
210 | 210 |
## specified one local URI and that URI represents |
... | ... |
@@ -214,6 +234,8 @@ def fetch_local_list(args, recursive = None): |
214 | 214 |
if len(local_list) > 1: |
215 | 215 |
single_file = False |
216 | 216 |
|
217 |
+ _maintain_cache(cache, local_list) |
|
218 |
+ |
|
217 | 219 |
return local_list, single_file |
218 | 220 |
|
219 | 221 |
def fetch_remote_list(args, require_attribs = False, recursive = None): |
220 | 222 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,52 @@ |
0 |
+import cPickle as pickle |
|
1 |
+ |
|
2 |
+class HashCache(object): |
|
3 |
+ def __init__(self): |
|
4 |
+ self.inodes = dict() |
|
5 |
+ |
|
6 |
+ def add(self, dev, inode, mtime, size, md5): |
|
7 |
+ if dev not in self.inodes: |
|
8 |
+ self.inodes[dev] = dict() |
|
9 |
+ if inode not in self.inodes[dev]: |
|
10 |
+ self.inodes[dev][inode] = dict() |
|
11 |
+ self.inodes[dev][inode][mtime] = dict(md5=md5, size=size) |
|
12 |
+ |
|
13 |
+ def md5(self, dev, inode, mtime, size): |
|
14 |
+ try: |
|
15 |
+ d = self.inodes[dev][inode][mtime] |
|
16 |
+ if d['size'] != size: |
|
17 |
+ return None |
|
18 |
+ except: |
|
19 |
+ return None |
|
20 |
+ return d['md5'] |
|
21 |
+ |
|
22 |
+ def mark_all_for_purge(self): |
|
23 |
+ for d in self.inodes.keys(): |
|
24 |
+ for i in self.inodes[d].keys(): |
|
25 |
+ for c in self.inodes[d][i].keys(): |
|
26 |
+ self.inodes[d][i][c]['purge'] = True |
|
27 |
+ |
|
28 |
+ def unmark_for_purge(self, dev, inode, mtime, size): |
|
29 |
+ d = self.inodes[dev][inode][mtime] |
|
30 |
+ if d['size'] == size and 'purge' in d: |
|
31 |
+ del self.inodes[dev][inode][mtime]['purge'] |
|
32 |
+ |
|
33 |
+ def purge(self): |
|
34 |
+ for d in self.inodes.keys(): |
|
35 |
+ for i in self.inodes[d].keys(): |
|
36 |
+ for m in self.inodes[d][i].keys(): |
|
37 |
+ if 'purge' in self.inodes[d][i][m]: |
|
38 |
+ del self.inodes[d][i] |
|
39 |
+ |
|
40 |
+ def save(self, f): |
|
41 |
+ d = dict(inodes=self.inodes, version=1) |
|
42 |
+ f = open(f, 'w') |
|
43 |
+ p = pickle.dump(d, f) |
|
44 |
+ f.close() |
|
45 |
+ |
|
46 |
+ def load(self, f): |
|
47 |
+ f = open(f, 'r') |
|
48 |
+ d = pickle.load(f) |
|
49 |
+ f.close() |
|
50 |
+ if d.get('version') == 1 and 'inodes' in d: |
|
51 |
+ self.inodes = d['inodes'] |
... | ... |
@@ -1671,6 +1671,7 @@ def main(): |
1671 | 1671 |
optparser.add_option("-d", "--debug", dest="verbosity", action="store_const", const=logging.DEBUG, help="Enable debug output.") |
1672 | 1672 |
optparser.add_option( "--version", dest="show_version", action="store_true", help="Show s3cmd version (%s) and exit." % (PkgInfo.version)) |
1673 | 1673 |
optparser.add_option("-F", "--follow-symlinks", dest="follow_symlinks", action="store_true", default=False, help="Follow symbolic links as if they are regular files") |
1674 |
+ optparser.add_option( "--cache-file", dest="cache_file", action="store", default="", metavar="FILE", help="Cache FILE containing local source MD5 values") |
|
1674 | 1675 |
|
1675 | 1676 |
optparser.set_usage(optparser.usage + " COMMAND [parameters]") |
1676 | 1677 |
optparser.set_description('S3cmd is a tool for managing objects in '+ |