S3/S3Uri.py
ec50b5a7
 ## Amazon S3 manager
 ## Author: Michal Ludvig <michal@logix.cz>
 ##         http://www.logix.cz/michal
 ## License: GPL Version 2
afd51b6c
 ## Copyright: TGRMN Software and contributors
ec50b5a7
 
a7ef3595
 import os
af3425b6
 import re
 import sys
 from BidirMap import BidirMap
66528933
 from logging import debug
cb0bbaef
 import S3
9bacffc4
 from Utils import unicodise, check_bucket_name_dns_conformity
b884a133
 import Config
af3425b6
 
 class S3Uri(object):
d439efb4
     type = None
     _subclasses = None
 
     def __new__(self, string):
         if not self._subclasses:
             ## Generate a list of all subclasses of S3Uri
             self._subclasses = []
             dict = sys.modules[__name__].__dict__
             for something in dict:
                 if type(dict[something]) is not type(self):
                     continue
                 if issubclass(dict[something], self) and dict[something] != self:
                     self._subclasses.append(dict[something])
         for subclass in self._subclasses:
             try:
                 instance = object.__new__(subclass)
                 instance.__init__(string)
                 return instance
             except ValueError, e:
                 continue
         raise ValueError("%s: not a recognized URI" % string)
 
     def __str__(self):
         return self.uri()
 
     def __unicode__(self):
         return self.uri()
 
6351bcde
     def __repr__(self):
         return "<%s: %s>" % (self.__class__.__name__, self.__unicode__())
 
d439efb4
     def public_url(self):
         raise ValueError("This S3 URI does not have Anonymous URL representation")
 
     def basename(self):
         return self.__unicode__().split("/")[-1]
7c0863d5
 
af3425b6
 class S3UriS3(S3Uri):
d439efb4
     type = "s3"
     _re = re.compile("^s3://([^/]+)/?(.*)", re.IGNORECASE)
     def __init__(self, string):
         match = self._re.match(string)
         if not match:
             raise ValueError("%s: not a S3 URI" % string)
         groups = match.groups()
         self._bucket = groups[0]
         self._object = unicodise(groups[1])
 
     def bucket(self):
         return self._bucket
 
     def object(self):
         return self._object
 
     def has_bucket(self):
         return bool(self._bucket)
 
     def has_object(self):
         return bool(self._object)
 
     def uri(self):
ea5451d8
         return u"/".join([u"s3:/", self._bucket, self._object])
d439efb4
 
     def is_dns_compatible(self):
         return check_bucket_name_dns_conformity(self._bucket)
 
     def public_url(self):
         if self.is_dns_compatible():
b884a133
             return "http://%s.%s/%s" % (self._bucket, Config.Config().host_base, self._object)
d439efb4
         else:
c776ea43
             return "http://%s/%s/%s" % (Config.Config().host_base, self._bucket, self._object)
d439efb4
 
     def host_name(self):
         if self.is_dns_compatible():
             return "%s.s3.amazonaws.com" % (self._bucket)
         else:
             return "s3.amazonaws.com"
 
     @staticmethod
     def compose_uri(bucket, object = ""):
         return "s3://%s/%s" % (bucket, object)
 
     @staticmethod
     def httpurl_to_s3uri(http_url):
         m=re.match("(https?://)?([^/]+)/?(.*)", http_url, re.IGNORECASE)
         hostname, object = m.groups()[1:]
         hostname = hostname.lower()
         if hostname == "s3.amazonaws.com":
             ## old-style url: http://s3.amazonaws.com/bucket/object
             if object.count("/") == 0:
                 ## no object given
                 bucket = object
                 object = ""
             else:
                 ## bucket/object
                 bucket, object = object.split("/", 1)
         elif hostname.endswith(".s3.amazonaws.com"):
             ## new-style url: http://bucket.s3.amazonaws.com/object
             bucket = hostname[:-(len(".s3.amazonaws.com"))]
         else:
             raise ValueError("Unable to parse URL: %s" % http_url)
         return S3Uri("s3://%(bucket)s/%(object)s" % {
             'bucket' : bucket,
             'object' : object })
b75a87b7
 
af3425b6
 class S3UriS3FS(S3Uri):
d439efb4
     type = "s3fs"
     _re = re.compile("^s3fs://([^/]*)/?(.*)", re.IGNORECASE)
     def __init__(self, string):
         match = self._re.match(string)
         if not match:
             raise ValueError("%s: not a S3fs URI" % string)
         groups = match.groups()
         self._fsname = groups[0]
         self._path = unicodise(groups[1]).split("/")
af3425b6
 
d439efb4
     def fsname(self):
         return self._fsname
af3425b6
 
d439efb4
     def path(self):
         return "/".join(self._path)
af3425b6
 
d439efb4
     def uri(self):
         return "/".join(["s3fs:/", self._fsname, self.path()])
af3425b6
 
 class S3UriFile(S3Uri):
d439efb4
     type = "file"
     _re = re.compile("^(\w+://)?(.*)")
     def __init__(self, string):
         match = self._re.match(string)
         groups = match.groups()
         if groups[0] not in (None, "file://"):
             raise ValueError("%s: not a file:// URI" % string)
         self._path = unicodise(groups[1]).split("/")
af3425b6
 
d439efb4
     def path(self):
         return "/".join(self._path)
af3425b6
 
d439efb4
     def uri(self):
         return "/".join(["file:/", self.path()])
af3425b6
 
d439efb4
     def isdir(self):
         return os.path.isdir(self.path())
a7ef3595
 
d439efb4
     def dirname(self):
         return os.path.dirname(self.path())
a7ef3595
 
b75a87b7
 class S3UriCloudFront(S3Uri):
d439efb4
     type = "cf"
     _re = re.compile("^cf://([^/]*)/*(.*)", re.IGNORECASE)
     def __init__(self, string):
         match = self._re.match(string)
         if not match:
             raise ValueError("%s: not a CloudFront URI" % string)
         groups = match.groups()
         self._dist_id = groups[0]
         self._request_id = groups[1] != "/" and groups[1] or None
 
     def dist_id(self):
         return self._dist_id
 
     def request_id(self):
         return self._request_id
 
     def uri(self):
         uri = "cf://" + self.dist_id()
         if self.request_id():
             uri += "/" + self.request_id()
         return uri
b75a87b7
 
af3425b6
 if __name__ == "__main__":
d439efb4
     uri = S3Uri("s3://bucket/object")
     print "type()  =", type(uri)
     print "uri     =", uri
     print "uri.type=", uri.type
     print "bucket  =", uri.bucket()
     print "object  =", uri.object()
     print
 
     uri = S3Uri("s3://bucket")
     print "type()  =", type(uri)
     print "uri     =", uri
     print "uri.type=", uri.type
     print "bucket  =", uri.bucket()
     print
 
     uri = S3Uri("s3fs://filesystem1/path/to/remote/file.txt")
     print "type()  =", type(uri)
     print "uri     =", uri
     print "uri.type=", uri.type
     print "path    =", uri.path()
     print
 
     uri = S3Uri("/path/to/local/file.txt")
     print "type()  =", type(uri)
     print "uri     =", uri
     print "uri.type=", uri.type
     print "path    =", uri.path()
     print
 
     uri = S3Uri("cf://1234567890ABCD/")
     print "type()  =", type(uri)
     print "uri     =", uri
     print "uri.type=", uri.type
     print "dist_id =", uri.dist_id()
     print
 
 # vim:et:ts=4:sts=4:ai