7800900e |
## Amazon S3 manager
## Author: Michal Ludvig <michal@logix.cz>
## http://www.logix.cz/michal
## License: GPL Version 2 |
afd51b6c |
## Copyright: TGRMN Software and contributors |
7800900e |
|
3434a556 |
import logging |
7800900e |
from SortedDict import SortedDict
import Utils |
3434a556 |
import Config |
7800900e |
|
6d7de25e |
zero_length_md5 = "d41d8cd98f00b204e9800998ecf8427e" |
3434a556 |
cfg = Config.Config() |
6d7de25e |
|
7800900e |
class FileDict(SortedDict):
def __init__(self, mapping = {}, ignore_case = True, **kwargs):
SortedDict.__init__(self, mapping = mapping, ignore_case = ignore_case, **kwargs)
self.hardlinks = dict() # { dev: { inode : {'md5':, 'relative_files':}}}
self.by_md5 = dict() # {md5: set(relative_files)}
def record_md5(self, relative_file, md5): |
3434a556 |
if md5 is None: return |
6d7de25e |
if md5 == zero_length_md5: return |
7800900e |
if md5 not in self.by_md5:
self.by_md5[md5] = set()
self.by_md5[md5].add(relative_file)
def find_md5_one(self, md5): |
3434a556 |
if md5 is None: return None |
7800900e |
try:
return list(self.by_md5.get(md5, set()))[0]
except:
return None
def get_md5(self, relative_file):
"""returns md5 if it can, or raises IOError if file is unreadable"""
md5 = None
if 'md5' in self[relative_file]:
return self[relative_file]['md5']
md5 = self.get_hardlink_md5(relative_file) |
3434a556 |
if md5 is None and 'md5' in cfg.sync_checks:
logging.debug(u"doing file I/O to read md5 of %s" % relative_file) |
7800900e |
md5 = Utils.hash_file_md5(self[relative_file]['full_name'])
self.record_md5(relative_file, md5)
self[relative_file]['md5'] = md5
return md5
|
7b9b9071 |
def record_hardlink(self, relative_file, dev, inode, md5, size): |
3434a556 |
if md5 is None: return |
7b9b9071 |
if size == 0: return # don't record 0-length files |
1bb1fef2 |
if dev == 0 or inode == 0: return # Windows |
7800900e |
if dev not in self.hardlinks:
self.hardlinks[dev] = dict()
if inode not in self.hardlinks[dev]:
self.hardlinks[dev][inode] = dict(md5=md5, relative_files=set())
self.hardlinks[dev][inode]['relative_files'].add(relative_file)
def get_hardlink_md5(self, relative_file):
md5 = None
dev = self[relative_file]['dev']
inode = self[relative_file]['inode']
try:
md5 = self.hardlinks[dev][inode]['md5']
except:
pass
return md5 |