GitList

S3/Utils.py

ec50b5a7	## Amazon S3 manager ## Author: Michal Ludvig <michal@logix.cz> ## http://www.logix.cz/michal ## License: GPL Version 2
afd51b6c	## Copyright: TGRMN Software and contributors
ec50b5a7
0f03a064	import datetime
8ec1807f	import os
bcb44420	import sys
df9fa4b5	import time import re
8ec1807f	import string import random
227fabf8	import rfc822
0b8ea559	import hmac import base64
ac9940ec	import errno
ff6e561b	import urllib
60f5efd9	from calendar import timegm
ed27a45e	from logging import debug, info, warning, error
8214d4f0	from ExitCodes import EX_OSFILE
49a7604e	try: import dateutil.parser except ImportError: sys.stderr.write(u""" !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ImportError trying to import dateutil.parser. Please install the python dateutil module: $ sudo apt-get install python-dateutil or $ sudo yum install python-dateutil or $ pip install python-dateutil !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! """) sys.stderr.flush()
8214d4f0	sys.exit(EX_OSFILE)
0f03a064
82d9eafa	import Config
3c07424d	import Exceptions
82d9eafa
bcb44420	# hashlib backported to python 2.4 / 2.5 is not compatible with hmac! if sys.version_info[0] == 2 and sys.version_info[1] < 6: from md5 import md5 import sha as sha1 else: from hashlib import md5, sha1
7bae4e19	try:
d439efb4	import xml.etree.ElementTree as ET
7bae4e19	except ImportError:
d439efb4	import elementtree.ElementTree as ET
3c07424d	from xml.parsers.expat import ExpatError
7bae4e19
cb0bbaef	__all__ = []
cb64ca9e	def parseNodes(nodes):
d439efb4	## WARNING: Ignores text nodes from mixed xml/text. ## For instance <tag1>some text<tag2>other text</tag2></tag1> ## will be ignore "some text" node retval = [] for node in nodes: retval_item = {} for child in node.getchildren(): name = child.tag if child.getchildren(): retval_item[name] = parseNodes([child]) else: retval_item[name] = node.findtext(".//%s" % child.tag) retval.append(retval_item) return retval
cb0bbaef	__all__.append("parseNodes")
df9fa4b5
cb64ca9e	def stripNameSpace(xml):
d439efb4	""" removeNameSpace(xml) -- remove top-level AWS namespace """ r = re.compile('^(<?[^>]+?>\s?)(<\w+) xmlns=[\'"](http://[^\'"]+)[\'"](.*)', re.MULTILINE) if r.match(xml): xmlns = r.match(xml).groups()[2] xml = r.sub("\\1\\2\\4", xml) else: xmlns = None return xml, xmlns
cb0bbaef	__all__.append("stripNameSpace")
cb64ca9e
67a8d099	def getTreeFromXml(xml):
d439efb4	xml, xmlns = stripNameSpace(xml) try: tree = ET.fromstring(xml) if xmlns: tree.attrib['xmlns'] = xmlns return tree except ExpatError, e: error(e) raise Exceptions.ParameterError("Bucket contains invalid filenames. Please run: s3cmd fixbucket s3://your-bucket/")
0ffb0ef9	except Exception, e:
d3a8f81a	error(e) error(xml) raise
cb0bbaef	__all__.append("getTreeFromXml")
d439efb4
67a8d099	def getListFromXml(xml, node):
d439efb4	tree = getTreeFromXml(xml) nodes = tree.findall('.//%s' % (node)) return parseNodes(nodes)
cb0bbaef	__all__.append("getListFromXml")
c3f0b06a	def getDictFromTree(tree):
d439efb4	ret_dict = {} for child in tree.getchildren(): if child.getchildren(): ## Complex-type child. Recurse content = getDictFromTree(child) else: content = child.text if ret_dict.has_key(child.tag): if not type(ret_dict[child.tag]) == list: ret_dict[child.tag] = [ret_dict[child.tag]] ret_dict[child.tag].append(content or "") else: ret_dict[child.tag] = content or "" return ret_dict
cb0bbaef	__all__.append("getDictFromTree")
c3f0b06a
0d91ff3f	def getTextFromXml(xml, xpath):
d439efb4	tree = getTreeFromXml(xml) if tree.tag.endswith(xpath): return tree.text else: return tree.findtext(xpath)
cb0bbaef	__all__.append("getTextFromXml")
67a8d099	def getRootTagName(xml):
d439efb4	tree = getTreeFromXml(xml) return tree.tag
cb0bbaef	__all__.append("getRootTagName")
0d91ff3f
c3f0b06a	def xmlTextNode(tag_name, text):
d439efb4	el = ET.Element(tag_name) el.text = unicode(text) return el
cb0bbaef	__all__.append("xmlTextNode")
c3f0b06a	def appendXmlTextNode(tag_name, text, parent):
d439efb4	""" Creates a new <tag_name> Node and sets its content to 'text'. Then appends the created Node to 'parent' element if given. Returns the newly created Node. """ el = xmlTextNode(tag_name, text) parent.append(el) return el
cb0bbaef	__all__.append("appendXmlTextNode")
c3f0b06a
df9fa4b5	def dateS3toPython(date):
aada0e18	# Reset milliseconds to 000 date = re.compile('\.[0-9](?:[Z\\-\\+]?)').sub(".000", date)
60f5efd9	return dateutil.parser.parse(date, fuzzy=True)
cb0bbaef	__all__.append("dateS3toPython")
df9fa4b5	def dateS3toUnix(date):
60f5efd9	## NOTE: This is timezone-aware and return the timestamp regarding GMT return timegm(dateS3toPython(date).utctimetuple())
cb0bbaef	__all__.append("dateS3toUnix")
df9fa4b5
227fabf8	def dateRFC822toPython(date):
60f5efd9	return dateutil.parser.parse(date, fuzzy=True)
cb0bbaef	__all__.append("dateRFC822toPython")
227fabf8	def dateRFC822toUnix(date):
60f5efd9	return timegm(dateRFC822toPython(date).utctimetuple())
cb0bbaef	__all__.append("dateRFC822toUnix")
227fabf8
63ba9974	def formatSize(size, human_readable = False, floating_point = False):
d439efb4	size = floating_point and float(size) or int(size) if human_readable: coeffs = ['k', 'M', 'G', 'T'] coeff = "" while size > 2048: size /= 1024 coeff = coeffs.pop(0) return (size, coeff) else: return (size, "")
cb0bbaef	__all__.append("formatSize")
df9fa4b5	def formatDateTime(s3timestamp):
60f5efd9	date_obj = dateutil.parser.parse(s3timestamp, fuzzy=True) return date_obj.strftime("%Y-%m-%d %H:%M")
cb0bbaef	__all__.append("formatDateTime")
b5fe5ac4	def convertTupleListToDict(list):
d439efb4	retval = {} for tuple in list: retval[tuple[0]] = tuple[1] return retval
cb0bbaef	__all__.append("convertTupleListToDict")
8ec1807f	_rnd_chars = string.ascii_letters+string.digits _rnd_chars_len = len(_rnd_chars) def rndstr(len):
d439efb4	retval = "" while len > 0: retval += _rnd_chars[random.randint(0, _rnd_chars_len-1)] len -= 1 return retval
cb0bbaef	__all__.append("rndstr")
8ec1807f	def mktmpsomething(prefix, randchars, createfunc):
d439efb4	old_umask = os.umask(0077) tries = 5 while tries > 0: dirname = prefix + rndstr(randchars) try: createfunc(dirname) break except OSError, e: if e.errno != errno.EEXIST: os.umask(old_umask) raise tries -= 1 os.umask(old_umask) return dirname
cb0bbaef	__all__.append("mktmpsomething")
8ec1807f
fcf89fac	def mktmpdir(prefix = os.getenv('TMP','/tmp') + "/tmpdir-", randchars = 10):
d439efb4	return mktmpsomething(prefix, randchars, os.mkdir)
cb0bbaef	__all__.append("mktmpdir")
8ec1807f
fcf89fac	def mktmpfile(prefix = os.getenv('TMP','/tmp') + "/tmpfile-", randchars = 20):
d439efb4	createfunc = lambda filename : os.close(os.open(filename, os.O_CREAT \| os.O_EXCL)) return mktmpsomething(prefix, randchars, createfunc)
cb0bbaef	__all__.append("mktmpfile")
49731b40	def hash_file_md5(filename):
d439efb4	h = md5() f = open(filename, "rb") while True: # Hash 32kB chunks data = f.read(32*1024) if not data: break h.update(data) f.close() return h.hexdigest()
5d60db3a	__all__.append("hash_file_md5")
ed27a45e
bc4c306d	def mkdir_with_parents(dir_name):
d439efb4	""" mkdir_with_parents(dst_dir) Create directory 'dir_name' with all parent directories Returns True on success, False otherwise. """ pathmembers = dir_name.split(os.sep) tmp_stack = [] while pathmembers and not os.path.isdir(os.sep.join(pathmembers)): tmp_stack.append(pathmembers.pop()) while tmp_stack: pathmembers.append(tmp_stack.pop()) cur_dir = os.sep.join(pathmembers) try: debug("mkdir(%s)" % cur_dir) os.mkdir(cur_dir) except (OSError, IOError), e: warning("%s: can not make directory: %s" % (cur_dir, e.strerror)) return False except Exception, e: warning("%s: %s" % (cur_dir, e)) return False return True
cb0bbaef	__all__.append("mkdir_with_parents")
d90a7929
82d9eafa	def unicodise(string, encoding = None, errors = "replace"):
d439efb4	""" Convert 'string' to Unicode or raise an exception. """ if not encoding: encoding = Config.Config().encoding if type(string) == unicode: return string debug("Unicodising %r using %s" % (string, encoding)) try: return string.decode(encoding, errors) except UnicodeDecodeError: raise UnicodeDecodeError("Conversion to unicode failed: %r" % string)
cb0bbaef	__all__.append("unicodise")
d90a7929
82d9eafa	def deunicodise(string, encoding = None, errors = "replace"):
d439efb4	""" Convert unicode 'string' to <type str>, by default replacing all invalid characters with '?' or raise an exception. """ if not encoding: encoding = Config.Config().encoding if type(string) != unicode: return str(string) debug("DeUnicodising %r using %s" % (string, encoding)) try: return string.encode(encoding, errors) except UnicodeEncodeError: raise UnicodeEncodeError("Conversion from unicode failed: %r" % string)
cb0bbaef	__all__.append("deunicodise")
82d9eafa	def unicodise_safe(string, encoding = None):
d439efb4	""" Convert 'string' to Unicode according to current encoding and replace all invalid characters with '?' """
82d9eafa
d439efb4	return unicodise(deunicodise(string, encoding), encoding).replace(u'\ufffd', '?')
cb0bbaef	__all__.append("unicodise_safe")
d90a7929
b40dd815	def replace_nonprintables(string):
d439efb4	""" replace_nonprintables(string) Replaces all non-printable characters 'ch' in 'string' where ord(ch) <= 26 with ^@, ^A, ... ^Z """ new_string = "" modified = 0 for c in string: o = ord(c) if (o <= 31): new_string += "^" + chr(ord('@') + o) modified += 1 elif (o == 127): new_string += "^?" modified += 1 else: new_string += c if modified and Config.Config().urlencoding_mode != "fixbucket": warning("%d non-printable characters replaced in: %s" % (modified, new_string)) return new_string
cb0bbaef	__all__.append("replace_nonprintables")
b40dd815
0b8ea559	def sign_string(string_to_sign):
ff6e561b	"""Sign a string with the secret key, returning base64 encoded results. By default the configured secret key is used, but may be overridden as an argument. Useful for REST authentication. See http://s3.amazonaws.com/doc/s3-developer-guide/RESTAuthentication.html """
d439efb4	signature = base64.encodestring(hmac.new(Config.Config().secret_key, string_to_sign, sha1).digest()).strip() return signature
cb0bbaef	__all__.append("sign_string")
b020ea02
ff6e561b	def sign_url(url_to_sign, expiry): """Sign a URL in s3://bucket/object form with the given expiry time. The object will be accessible via the signed URL until the AWS key and secret are revoked or the expiry time is reached, even if the object is otherwise private. See: http://s3.amazonaws.com/doc/s3-developer-guide/RESTAuthentication.html """ return sign_url_base( bucket = url_to_sign.bucket(), object = url_to_sign.object(), expiry = expiry ) __all__.append("sign_url") def sign_url_base(**parms): """Shared implementation of sign_url methods. Takes a hash of 'bucket', 'object' and 'expiry' as args.""" parms['expiry']=time_to_epoch(parms['expiry']) parms['access_key']=Config.Config().access_key
64fb0867	parms['host_base']=Config.Config().host_base
ff6e561b	debug("Expiry interpreted as epoch time %s", parms['expiry']) signtext = 'GET\n\n\n%(expiry)d\n/%(bucket)s/%(object)s' % parms debug("Signing plaintext: %r", signtext) parms['sig'] = urllib.quote_plus(sign_string(signtext)) debug("Urlencoded signature: %s", parms['sig'])
64fb0867	return "http://%(bucket)s.%(host_base)s/%(object)s?AWSAccessKeyId=%(access_key)s&Expires=%(expiry)d&Signature=%(sig)s" % parms
ff6e561b	def time_to_epoch(t): """Convert time specified in a variety of forms into UNIX epoch time. Accepts datetime.datetime, int, anything that has a strftime() method, and standard time 9-tuples """ if isinstance(t, int): # Already an int return t elif isinstance(t, tuple) or isinstance(t, time.struct_time): # Assume it's a time 9-tuple return int(time.mktime(t)) elif hasattr(t, 'timetuple'): # Looks like a datetime object or compatible
16e24770	return int(time.mktime(t.timetuple()))
ff6e561b	elif hasattr(t, 'strftime'): # Looks like the object supports standard srftime() return int(t.strftime('%s')) elif isinstance(t, str) or isinstance(t, unicode): # See if it's a string representation of an epoch try: return int(t) except ValueError: # Try to parse it as a timestamp string try: return time.strptime(t)
3ef70591	except ValueError, ex:
ff6e561b	# Will fall through debug("Failed to parse date with strptime: %s", ex) pass raise Exceptions.ParameterError('Unable to convert %r to an epoch time. Pass an epoch time. Try `date -d \'now + 1 year\' +%%s` (shell) or time.mktime (Python).' % t)
b020ea02	def check_bucket_name(bucket, dns_strict = True):
d439efb4	if dns_strict: invalid = re.search("([^a-z0-9\.-])", bucket) if invalid: raise Exceptions.ParameterError("Bucket name '%s' contains disallowed character '%s'. The only supported ones are: lowercase us-ascii letters (a-z), digits (0-9), dot (.) and hyphen (-)." % (bucket, invalid.groups()[0])) else: invalid = re.search("([^A-Za-z0-9\._-])", bucket) if invalid: raise Exceptions.ParameterError("Bucket name '%s' contains disallowed character '%s'. The only supported ones are: us-ascii letters (a-z, A-Z), digits (0-9), dot (.), hyphen (-) and underscore (_)." % (bucket, invalid.groups()[0])) if len(bucket) < 3: raise Exceptions.ParameterError("Bucket name '%s' is too short (min 3 characters)" % bucket) if len(bucket) > 255: raise Exceptions.ParameterError("Bucket name '%s' is too long (max 255 characters)" % bucket) if dns_strict: if len(bucket) > 63: raise Exceptions.ParameterError("Bucket name '%s' is too long (max 63 characters)" % bucket) if re.search("-\.", bucket): raise Exceptions.ParameterError("Bucket name '%s' must not contain sequence '-.' for DNS compatibility" % bucket) if re.search("\.\.", bucket): raise Exceptions.ParameterError("Bucket name '%s' must not contain sequence '..' for DNS compatibility" % bucket) if not re.search("^[0-9a-z]", bucket): raise Exceptions.ParameterError("Bucket name '%s' must start with a letter or a digit" % bucket) if not re.search("[0-9a-z]$", bucket): raise Exceptions.ParameterError("Bucket name '%s' must end with a letter or a digit" % bucket) return True
b020ea02	__all__.append("check_bucket_name") def check_bucket_name_dns_conformity(bucket):
d439efb4	try: return check_bucket_name(bucket, dns_strict = True) except Exceptions.ParameterError: return False
b020ea02	__all__.append("check_bucket_name_dns_conformity") def getBucketFromHostname(hostname):
d439efb4	""" bucket, success = getBucketFromHostname(hostname)
b020ea02
d439efb4	Only works for hostnames derived from bucket names using Config.host_bucket pattern.
b020ea02
d439efb4	Returns bucket name and a boolean success flag. """
b020ea02
d439efb4	# Create RE pattern from Config.host_bucket pattern = Config.Config().host_bucket % { 'bucket' : '(?P<bucket>.*)' } m = re.match(pattern, hostname) if not m: return (hostname, False) return m.groups()[0], True
b020ea02	__all__.append("getBucketFromHostname") def getHostnameFromBucket(bucket):
d439efb4	return Config.Config().host_bucket % { 'bucket' : bucket }
b020ea02	__all__.append("getHostnameFromBucket")
d439efb4
dc071cc1	def calculateChecksum(buffer, mfile, offset, chunk_size, send_chunk): md5_hash = md5() size_left = chunk_size if buffer == '': mfile.seek(offset) while size_left > 0: data = mfile.read(min(send_chunk, size_left)) md5_hash.update(data) size_left -= len(data) else: md5_hash.update(buffer) return md5_hash.hexdigest() __all__.append("calculateChecksum")
4459b9ad
3141e9a3	# Deal with the fact that pwd and grp modules don't exist for Windows
4459b9ad	try: import pwd def getpwuid_username(uid): """returns a username from the password databse for the given uid""" return pwd.getpwuid(uid).pw_name except ImportError:
3141e9a3	import getpass
4459b9ad	def getpwuid_username(uid): return getpass.getuser() __all__.append("getpwuid_username") try: import grp def getgrgid_grpname(gid): """returns a groupname from the group databse for the given gid""" return grp.getgrgid(gid).gr_name except ImportError: def getgrgid_grpname(gid): return "nobody" __all__.append("getgrgid_grpname")
d439efb4	# vim:et:ts=4:sts=4:ai
4459b9ad