and move handling includes into fetch_local_list() instead of after it.
This lets us get the list, filter for excludes, then run stat() and
do all the file I/O to calculate the md5sums.
This will greatly speed up any use of excludes, as we won't do all
the disk I/O that's completely unnecessary.
... | ... |
@@ -159,6 +159,41 @@ def _get_filelist_from_file(cfg, local_path): |
159 | 159 |
return result |
160 | 160 |
|
161 | 161 |
def fetch_local_list(args, is_src = False, recursive = None): |
162 |
+ |
|
163 |
+ def _fetch_local_list_info(loc_list): |
|
164 |
+ for relative_file in sorted(loc_list.keys()): |
|
165 |
+ if relative_file == '-': continue |
|
166 |
+ |
|
167 |
+ full_name = loc_list[relative_file]['full_name'] |
|
168 |
+ try: |
|
169 |
+ sr = os.stat_result(os.stat(full_name)) |
|
170 |
+ except OSError, e: |
|
171 |
+ if e.errno == errno.ENOENT: |
|
172 |
+ # file was removed async to us getting the list |
|
173 |
+ continue |
|
174 |
+ else: |
|
175 |
+ raise |
|
176 |
+ loc_list[relative_file].update({ |
|
177 |
+ 'size' : sr.st_size, |
|
178 |
+ 'mtime' : sr.st_mtime, |
|
179 |
+ 'dev' : sr.st_dev, |
|
180 |
+ 'inode' : sr.st_ino, |
|
181 |
+ 'uid' : sr.st_uid, |
|
182 |
+ 'gid' : sr.st_gid, |
|
183 |
+ 'sr': sr # save it all, may need it in preserve_attrs_list |
|
184 |
+ ## TODO: Possibly more to save here... |
|
185 |
+ }) |
|
186 |
+ if 'md5' in cfg.sync_checks: |
|
187 |
+ md5 = cache.md5(sr.st_dev, sr.st_ino, sr.st_mtime, sr.st_size) |
|
188 |
+ if md5 is None: |
|
189 |
+ try: |
|
190 |
+ md5 = loc_list.get_md5(relative_file) # this does the file I/O |
|
191 |
+ except IOError: |
|
192 |
+ continue |
|
193 |
+ cache.add(sr.st_dev, sr.st_ino, sr.st_mtime, sr.st_size, md5) |
|
194 |
+ loc_list.record_hardlink(relative_file, sr.st_dev, sr.st_ino, md5, sr.st_size) |
|
195 |
+ |
|
196 |
+ |
|
162 | 197 |
def _get_filelist_local(loc_list, local_uri, cache): |
163 | 198 |
info(u"Compiling list of local files...") |
164 | 199 |
|
... | ... |
@@ -214,35 +249,11 @@ def fetch_local_list(args, is_src = False, recursive = None): |
214 | 214 |
relative_file = replace_nonprintables(relative_file) |
215 | 215 |
if relative_file.startswith('./'): |
216 | 216 |
relative_file = relative_file[2:] |
217 |
- try: |
|
218 |
- sr = os.stat_result(os.stat(full_name)) |
|
219 |
- except OSError, e: |
|
220 |
- if e.errno == errno.ENOENT: |
|
221 |
- # file was removed async to us getting the list |
|
222 |
- continue |
|
223 |
- else: |
|
224 |
- raise |
|
225 | 217 |
loc_list[relative_file] = { |
226 | 218 |
'full_name_unicode' : unicodise(full_name), |
227 | 219 |
'full_name' : full_name, |
228 |
- 'size' : sr.st_size, |
|
229 |
- 'mtime' : sr.st_mtime, |
|
230 |
- 'dev' : sr.st_dev, |
|
231 |
- 'inode' : sr.st_ino, |
|
232 |
- 'uid' : sr.st_uid, |
|
233 |
- 'gid' : sr.st_gid, |
|
234 |
- 'sr': sr # save it all, may need it in preserve_attrs_list |
|
235 |
- ## TODO: Possibly more to save here... |
|
236 | 220 |
} |
237 |
- if 'md5' in cfg.sync_checks: |
|
238 |
- md5 = cache.md5(sr.st_dev, sr.st_ino, sr.st_mtime, sr.st_size) |
|
239 |
- if md5 is None: |
|
240 |
- try: |
|
241 |
- md5 = loc_list.get_md5(relative_file) # this does the file I/O |
|
242 |
- except IOError: |
|
243 |
- continue |
|
244 |
- cache.add(sr.st_dev, sr.st_ino, sr.st_mtime, sr.st_size, md5) |
|
245 |
- loc_list.record_hardlink(relative_file, sr.st_dev, sr.st_ino, md5, sr.st_size) |
|
221 |
+ |
|
246 | 222 |
return loc_list, single_file |
247 | 223 |
|
248 | 224 |
def _maintain_cache(cache, local_list): |
... | ... |
@@ -295,9 +306,10 @@ def fetch_local_list(args, is_src = False, recursive = None): |
295 | 295 |
if len(local_list) > 1: |
296 | 296 |
single_file = False |
297 | 297 |
|
298 |
+ local_list, exclude_list = filter_exclude_include(local_list) |
|
299 |
+ _fetch_local_list_info(local_list) |
|
298 | 300 |
_maintain_cache(cache, local_list) |
299 |
- |
|
300 |
- return local_list, single_file |
|
301 |
+ return local_list, single_file, exclude_list |
|
301 | 302 |
|
302 | 303 |
def fetch_remote_list(args, require_attribs = False, recursive = None, batch_mode = False, uri_params = {}): |
303 | 304 |
def _get_remote_attribs(uri, remote_item): |
... | ... |
@@ -284,9 +284,7 @@ def cmd_object_put(args): |
284 | 284 |
if len(args) == 0: |
285 | 285 |
raise ParameterError("Nothing to upload. Expecting a local file or directory.") |
286 | 286 |
|
287 |
- local_list, single_file_local = fetch_local_list(args, is_src = True) |
|
288 |
- |
|
289 |
- local_list, exclude_list = filter_exclude_include(local_list) |
|
287 |
+ local_list, single_file_local, exclude_list = fetch_local_list(args, is_src = True) |
|
290 | 288 |
|
291 | 289 |
local_count = len(local_list) |
292 | 290 |
|
... | ... |
@@ -875,7 +873,7 @@ def cmd_sync_remote2local(args): |
875 | 875 |
s3 = S3(Config()) |
876 | 876 |
|
877 | 877 |
destination_base = args[-1] |
878 |
- local_list, single_file_local = fetch_local_list(destination_base, is_src = False, recursive = True) |
|
878 |
+ local_list, single_file_local, dst_exclude_list = fetch_local_list(destination_base, is_src = False, recursive = True) |
|
879 | 879 |
remote_list = fetch_remote_list(args[:-1], recursive = True, require_attribs = True) |
880 | 880 |
|
881 | 881 |
local_count = len(local_list) |
... | ... |
@@ -885,7 +883,6 @@ def cmd_sync_remote2local(args): |
885 | 885 |
info(u"Found %d remote files, %d local files" % (remote_count, local_count)) |
886 | 886 |
|
887 | 887 |
remote_list, src_exclude_list = filter_exclude_include(remote_list) |
888 |
- local_list, dst_exclude_list = filter_exclude_include(local_list) |
|
889 | 888 |
|
890 | 889 |
remote_list, local_list, update_list, copy_pairs = compare_filelists(remote_list, local_list, src_remote = True, dst_remote = False, delay_updates = cfg.delay_updates) |
891 | 890 |
|
... | ... |
@@ -1363,8 +1360,7 @@ def cmd_sync_local2remote(args): |
1363 | 1363 |
error(u"or disable encryption with --no-encrypt parameter.") |
1364 | 1364 |
sys.exit(1) |
1365 | 1365 |
|
1366 |
- local_list, single_file_local = fetch_local_list(args[:-1], is_src = True, recursive = True) |
|
1367 |
- local_list, exclude_list = filter_exclude_include(local_list) |
|
1366 |
+ local_list, single_file_local, exclude_list = fetch_local_list(args[:-1], is_src = True, recursive = True) |
|
1368 | 1367 |
|
1369 | 1368 |
destinations = [args[-1]] |
1370 | 1369 |
if cfg.additional_destinations: |