Thanks for all the merge work Matt!
Michal Ludvig authored on 2013/01/02 11:49:15... | ... |
@@ -133,7 +133,7 @@ class DistributionConfig(object): |
133 | 133 |
## </Logging> |
134 | 134 |
## </DistributionConfig> |
135 | 135 |
|
136 |
- EMPTY_CONFIG = "<DistributionConfig><Origin/><CallerReference/><Enabled>true</Enabled></DistributionConfig>" |
|
136 |
+ EMPTY_CONFIG = "<DistributionConfig><S3Origin><DNSName/></S3Origin><CallerReference/><Enabled>true</Enabled></DistributionConfig>" |
|
137 | 137 |
xmlns = "http://cloudfront.amazonaws.com/doc/%(api_ver)s/" % { 'api_ver' : cloudfront_api_version } |
138 | 138 |
def __init__(self, xml = None, tree = None): |
139 | 139 |
if xml is None: |
... | ... |
@@ -174,7 +174,8 @@ class DistributionConfig(object): |
174 | 174 |
tree.attrib['xmlns'] = DistributionConfig.xmlns |
175 | 175 |
|
176 | 176 |
## Retain the order of the following calls! |
177 |
- appendXmlTextNode("Origin", self.info['Origin'], tree) |
|
177 |
+ s3org = appendXmlTextNode("S3Origin", '', tree) |
|
178 |
+ appendXmlTextNode("DNSName", self.info['S3Origin']['DNSName'], s3org) |
|
178 | 179 |
appendXmlTextNode("CallerReference", self.info['CallerReference'], tree) |
179 | 180 |
for cname in self.info['CNAME']: |
180 | 181 |
appendXmlTextNode("CNAME", cname.lower(), tree) |
... | ... |
@@ -281,7 +282,7 @@ class InvalidationBatch(object): |
281 | 281 |
tree = ET.Element("InvalidationBatch") |
282 | 282 |
|
283 | 283 |
for path in self.paths: |
284 |
- if path[0] != "/": |
|
284 |
+ if len(path) < 1 or path[0] != "/": |
|
285 | 285 |
path = "/" + path |
286 | 286 |
appendXmlTextNode("Path", path, tree) |
287 | 287 |
appendXmlTextNode("CallerReference", self.reference, tree) |
... | ... |
@@ -322,7 +323,7 @@ class CloudFront(object): |
322 | 322 |
def CreateDistribution(self, uri, cnames_add = [], comment = None, logging = None, default_root_object = None): |
323 | 323 |
dist_config = DistributionConfig() |
324 | 324 |
dist_config.info['Enabled'] = True |
325 |
- dist_config.info['Origin'] = uri.host_name() |
|
325 |
+ dist_config.info['S3Origin']['DNSName'] = uri.host_name() |
|
326 | 326 |
dist_config.info['CallerReference'] = str(uri) |
327 | 327 |
dist_config.info['DefaultRootObject'] = default_root_object |
328 | 328 |
if comment == None: |
... | ... |
@@ -423,7 +424,23 @@ class CloudFront(object): |
423 | 423 |
body = request_body, headers = headers) |
424 | 424 |
return response |
425 | 425 |
|
426 |
- def InvalidateObjects(self, uri, paths): |
|
426 |
+ def InvalidateObjects(self, uri, paths, default_index_file, invalidate_default_index_on_cf, invalidate_default_index_root_on_cf): |
|
427 |
+ # joseprio: if the user doesn't want to invalidate the default index |
|
428 |
+ # path, or if the user wants to invalidate the root of the default |
|
429 |
+ # index, we need to process those paths |
|
430 |
+ if default_index_file is not None and (not invalidate_default_index_on_cf or invalidate_default_index_root_on_cf): |
|
431 |
+ new_paths = [] |
|
432 |
+ default_index_suffix = '/' + default_index_file |
|
433 |
+ for path in paths: |
|
434 |
+ if path.endswith(default_index_suffix) or path == default_index_file: |
|
435 |
+ if invalidate_default_index_on_cf: |
|
436 |
+ new_paths.append(path) |
|
437 |
+ if invalidate_default_index_root_on_cf: |
|
438 |
+ new_paths.append(path[:-len(default_index_file)]) |
|
439 |
+ else: |
|
440 |
+ new_paths.append(path) |
|
441 |
+ paths = new_paths |
|
442 |
+ |
|
427 | 443 |
# uri could be either cf:// or s3:// uri |
428 | 444 |
cfuri = self.get_dist_name_for_bucket(uri) |
429 | 445 |
if len(paths) > 999: |
... | ... |
@@ -671,7 +688,7 @@ class Cmd(object): |
671 | 671 |
d = response['distribution'] |
672 | 672 |
dc = d.info['DistributionConfig'] |
673 | 673 |
output("Distribution created:") |
674 |
- pretty_output("Origin", S3UriS3.httpurl_to_s3uri(dc.info['Origin'])) |
|
674 |
+ pretty_output("Origin", S3UriS3.httpurl_to_s3uri(dc.info['S3Origin']['DNSName'])) |
|
675 | 675 |
pretty_output("DistId", d.uri()) |
676 | 676 |
pretty_output("DomainName", d.info['DomainName']) |
677 | 677 |
pretty_output("CNAMEs", ", ".join(dc.info['CNAME'])) |
... | ... |
@@ -713,7 +730,7 @@ class Cmd(object): |
713 | 713 |
response = cf.GetDistInfo(cfuri) |
714 | 714 |
d = response['distribution'] |
715 | 715 |
dc = d.info['DistributionConfig'] |
716 |
- pretty_output("Origin", S3UriS3.httpurl_to_s3uri(dc.info['Origin'])) |
|
716 |
+ pretty_output("Origin", S3UriS3.httpurl_to_s3uri(dc.info['S3Origin']['DNSName'])) |
|
717 | 717 |
pretty_output("DistId", d.uri()) |
718 | 718 |
pretty_output("DomainName", d.info['DomainName']) |
719 | 719 |
pretty_output("Status", d.info['Status']) |
... | ... |
@@ -40,6 +40,7 @@ class Config(object): |
40 | 40 |
proxy_port = 3128 |
41 | 41 |
encrypt = False |
42 | 42 |
dry_run = False |
43 |
+ add_encoding_exts = "" |
|
43 | 44 |
preserve_attrs = True |
44 | 45 |
preserve_attrs_list = [ |
45 | 46 |
'uname', # Verbose owner Name (e.g. 'root') |
... | ... |
@@ -50,10 +51,14 @@ class Config(object): |
50 | 50 |
'mtime', # Modification timestamp |
51 | 51 |
'ctime', # Creation timestamp |
52 | 52 |
'mode', # File mode (e.g. rwxr-xr-x = 755) |
53 |
+ 'md5', # File MD5 (if known) |
|
53 | 54 |
#'acl', # Full ACL (not yet supported) |
54 | 55 |
] |
55 | 56 |
delete_removed = False |
57 |
+ delete_after = False |
|
58 |
+ delete_after_fetch = False |
|
56 | 59 |
_doc['delete_removed'] = "[sync] Remove remote S3 objects when local file has been deleted" |
60 |
+ delay_updates = False |
|
57 | 61 |
gpg_passphrase = "" |
58 | 62 |
gpg_command = "" |
59 | 63 |
gpg_encrypt = "%(gpg_command)s -c --verbose --no-use-agent --batch --yes --passphrase-fd %(passphrase_fd)s -o %(output_file)s %(input_file)s" |
... | ... |
@@ -80,9 +85,15 @@ class Config(object): |
80 | 80 |
follow_symlinks = False |
81 | 81 |
socket_timeout = 300 |
82 | 82 |
invalidate_on_cf = False |
83 |
+ # joseprio: new flags for default index invalidation |
|
84 |
+ invalidate_default_index_on_cf = False |
|
85 |
+ invalidate_default_index_root_on_cf = True |
|
83 | 86 |
website_index = "index.html" |
84 | 87 |
website_error = "" |
85 | 88 |
website_endpoint = "http://%(bucket)s.s3-website-%(location)s.amazonaws.com/" |
89 |
+ additional_destinations = [] |
|
90 |
+ cache_file = "" |
|
91 |
+ add_headers = "" |
|
86 | 92 |
|
87 | 93 |
## Creating a singleton |
88 | 94 |
def __new__(self, configfile = None): |
... | ... |
@@ -112,6 +123,12 @@ class Config(object): |
112 | 112 |
cp = ConfigParser(configfile) |
113 | 113 |
for option in self.option_list(): |
114 | 114 |
self.update_option(option, cp.get(option)) |
115 |
+ |
|
116 |
+ if cp.get('add_headers'): |
|
117 |
+ for option in cp.get('add_headers').split(","): |
|
118 |
+ (key, value) = option.split(':') |
|
119 |
+ self.extra_headers[key.replace('_', '-').strip()] = value.strip() |
|
120 |
+ |
|
115 | 121 |
self._parsed_files.append(configfile) |
116 | 122 |
|
117 | 123 |
def dump_config(self, stream): |
... | ... |
@@ -9,13 +9,15 @@ from S3Uri import S3Uri |
9 | 9 |
from SortedDict import SortedDict |
10 | 10 |
from Utils import * |
11 | 11 |
from Exceptions import ParameterError |
12 |
+from HashCache import HashCache |
|
12 | 13 |
|
13 | 14 |
from logging import debug, info, warning, error |
14 | 15 |
|
15 | 16 |
import os |
16 | 17 |
import glob |
18 |
+import copy |
|
17 | 19 |
|
18 |
-__all__ = ["fetch_local_list", "fetch_remote_list", "compare_filelists", "filter_exclude_include"] |
|
20 |
+__all__ = ["fetch_local_list", "fetch_remote_list", "compare_filelists", "filter_exclude_include", "parse_attrs_header"] |
|
19 | 21 |
|
20 | 22 |
def _fswalk_follow_symlinks(path): |
21 | 23 |
''' |
... | ... |
@@ -26,13 +28,15 @@ def _fswalk_follow_symlinks(path): |
26 | 26 |
assert os.path.isdir(path) # only designed for directory argument |
27 | 27 |
walkdirs = [path] |
28 | 28 |
for dirpath, dirnames, filenames in os.walk(path): |
29 |
+ handle_exclude_include_walk(dirpath, dirnames, []) |
|
29 | 30 |
for dirname in dirnames: |
30 | 31 |
current = os.path.join(dirpath, dirname) |
31 | 32 |
if os.path.islink(current): |
32 | 33 |
walkdirs.append(current) |
33 | 34 |
for walkdir in walkdirs: |
34 |
- for value in os.walk(walkdir): |
|
35 |
- yield value |
|
35 |
+ for dirpath, dirnames, filenames in os.walk(walkdir): |
|
36 |
+ handle_exclude_include_walk(dirpath, dirnames, []) |
|
37 |
+ yield (dirpath, dirnames, filenames) |
|
36 | 38 |
|
37 | 39 |
def _fswalk(path, follow_symlinks): |
38 | 40 |
''' |
... | ... |
@@ -43,8 +47,10 @@ def _fswalk(path, follow_symlinks): |
43 | 43 |
follow_symlinks (bool) indicates whether to descend into symbolically linked directories |
44 | 44 |
''' |
45 | 45 |
if follow_symlinks: |
46 |
- return _fswalk_follow_symlinks(path) |
|
47 |
- return os.walk(path) |
|
46 |
+ yield _fswalk_follow_symlinks(path) |
|
47 |
+ for dirpath, dirnames, filenames in os.walk(path): |
|
48 |
+ handle_exclude_include_walk(dirpath, dirnames, filenames) |
|
49 |
+ yield (dirpath, dirnames, filenames) |
|
48 | 50 |
|
49 | 51 |
def filter_exclude_include(src_list): |
50 | 52 |
info(u"Applying --exclude/--include") |
... | ... |
@@ -71,13 +77,71 @@ def filter_exclude_include(src_list): |
71 | 71 |
exclude_list[file] = src_list[file] |
72 | 72 |
del(src_list[file]) |
73 | 73 |
continue |
74 |
- else: |
|
75 |
- debug(u"PASS: %s" % (file)) |
|
74 |
+ |
|
76 | 75 |
return src_list, exclude_list |
77 | 76 |
|
77 |
+def handle_exclude_include_walk(root, dirs, files): |
|
78 |
+ cfg = Config() |
|
79 |
+ copydirs = copy.copy(dirs) |
|
80 |
+ copyfiles = copy.copy(files) |
|
81 |
+ |
|
82 |
+ # exclude dir matches in the current directory |
|
83 |
+ # this prevents us from recursing down trees we know we want to ignore |
|
84 |
+ for x in copydirs: |
|
85 |
+ d = os.path.join(root, x, '') |
|
86 |
+ excluded = False |
|
87 |
+ for r in cfg.exclude: |
|
88 |
+ if r.search(d): |
|
89 |
+ excluded = True |
|
90 |
+ debug(u"EXCL-MATCH: '%s'" % (cfg.debug_exclude[r])) |
|
91 |
+ break |
|
92 |
+ if excluded: |
|
93 |
+ ## No need to check for --include if not excluded |
|
94 |
+ for r in cfg.include: |
|
95 |
+ if r.search(d): |
|
96 |
+ excluded = False |
|
97 |
+ debug(u"INCL-MATCH: '%s'" % (cfg.debug_include[r])) |
|
98 |
+ break |
|
99 |
+ if excluded: |
|
100 |
+ ## Still excluded - ok, action it |
|
101 |
+ dirs.remove(x) |
|
102 |
+ continue |
|
103 |
+ |
|
104 |
+ # exclude file matches in the current directory |
|
105 |
+ for x in copyfiles: |
|
106 |
+ file = os.path.join(root, x) |
|
107 |
+ excluded = False |
|
108 |
+ for r in cfg.exclude: |
|
109 |
+ if r.search(file): |
|
110 |
+ excluded = True |
|
111 |
+ debug(u"EXCL-MATCH: '%s'" % (cfg.debug_exclude[r])) |
|
112 |
+ break |
|
113 |
+ if excluded: |
|
114 |
+ ## No need to check for --include if not excluded |
|
115 |
+ for r in cfg.include: |
|
116 |
+ if r.search(file): |
|
117 |
+ excluded = False |
|
118 |
+ debug(u"INCL-MATCH: '%s'" % (cfg.debug_include[r])) |
|
119 |
+ break |
|
120 |
+ if excluded: |
|
121 |
+ ## Still excluded - ok, action it |
|
122 |
+ debug(u"EXCLUDE: %s" % file) |
|
123 |
+ files.remove(x) |
|
124 |
+ continue |
|
125 |
+ |
|
78 | 126 |
def fetch_local_list(args, recursive = None): |
79 |
- def _get_filelist_local(local_uri): |
|
127 |
+ def _get_filelist_local(loc_list, local_uri, cache): |
|
80 | 128 |
info(u"Compiling list of local files...") |
129 |
+ |
|
130 |
+ if deunicodise(local_uri.basename()) == "-": |
|
131 |
+ loc_list = SortedDict(ignore_case = False) |
|
132 |
+ loc_list["-"] = { |
|
133 |
+ 'full_name_unicode' : '-', |
|
134 |
+ 'full_name' : '-', |
|
135 |
+ 'size' : -1, |
|
136 |
+ 'mtime' : -1, |
|
137 |
+ } |
|
138 |
+ return loc_list, True |
|
81 | 139 |
if local_uri.isdir(): |
82 | 140 |
local_base = deunicodise(local_uri.basename()) |
83 | 141 |
local_path = deunicodise(local_uri.path()) |
... | ... |
@@ -88,7 +152,6 @@ def fetch_local_list(args, recursive = None): |
88 | 88 |
local_path = deunicodise(local_uri.dirname()) |
89 | 89 |
filelist = [( local_path, [], [deunicodise(local_uri.basename())] )] |
90 | 90 |
single_file = True |
91 |
- loc_list = SortedDict(ignore_case = False) |
|
92 | 91 |
for root, dirs, files in filelist: |
93 | 92 |
rel_root = root.replace(local_path, local_base, 1) |
94 | 93 |
for f in files: |
... | ... |
@@ -112,11 +175,41 @@ def fetch_local_list(args, recursive = None): |
112 | 112 |
'full_name' : full_name, |
113 | 113 |
'size' : sr.st_size, |
114 | 114 |
'mtime' : sr.st_mtime, |
115 |
+ 'dev' : sr.st_dev, |
|
116 |
+ 'inode' : sr.st_ino, |
|
117 |
+ 'uid' : sr.st_uid, |
|
118 |
+ 'gid' : sr.st_gid, |
|
119 |
+ 'sr': sr # save it all, may need it in preserve_attrs_list |
|
115 | 120 |
## TODO: Possibly more to save here... |
116 | 121 |
} |
122 |
+ if 'md5' in cfg.sync_checks: |
|
123 |
+ md5 = cache.md5(sr.st_dev, sr.st_ino, sr.st_mtime, sr.st_size) |
|
124 |
+ if md5 is None: |
|
125 |
+ try: |
|
126 |
+ md5 = loc_list.get_md5(relative_file) # this does the file I/O |
|
127 |
+ except IOError: |
|
128 |
+ continue |
|
129 |
+ cache.add(sr.st_dev, sr.st_ino, sr.st_mtime, sr.st_size, md5) |
|
130 |
+ loc_list.record_hardlink(relative_file, sr.st_dev, sr.st_ino, md5) |
|
117 | 131 |
return loc_list, single_file |
118 | 132 |
|
133 |
+ def _maintain_cache(cache, local_list): |
|
134 |
+ if cfg.cache_file: |
|
135 |
+ cache.mark_all_for_purge() |
|
136 |
+ for i in local_list.keys(): |
|
137 |
+ cache.unmark_for_purge(local_list[i]['dev'], local_list[i]['inode'], local_list[i]['mtime'], local_list[i]['size']) |
|
138 |
+ cache.purge() |
|
139 |
+ cache.save(cfg.cache_file) |
|
140 |
+ |
|
119 | 141 |
cfg = Config() |
142 |
+ |
|
143 |
+ cache = HashCache() |
|
144 |
+ if cfg.cache_file: |
|
145 |
+ try: |
|
146 |
+ cache.load(cfg.cache_file) |
|
147 |
+ except IOError: |
|
148 |
+ info(u"No cache file found, creating it.") |
|
149 |
+ |
|
120 | 150 |
local_uris = [] |
121 | 151 |
local_list = SortedDict(ignore_case = False) |
122 | 152 |
single_file = False |
... | ... |
@@ -136,8 +229,7 @@ def fetch_local_list(args, recursive = None): |
136 | 136 |
local_uris.append(uri) |
137 | 137 |
|
138 | 138 |
for uri in local_uris: |
139 |
- list_for_uri, single_file = _get_filelist_local(uri) |
|
140 |
- local_list.update(list_for_uri) |
|
139 |
+ list_for_uri, single_file = _get_filelist_local(local_list, uri, cache) |
|
141 | 140 |
|
142 | 141 |
## Single file is True if and only if the user |
143 | 142 |
## specified one local URI and that URI represents |
... | ... |
@@ -147,6 +239,8 @@ def fetch_local_list(args, recursive = None): |
147 | 147 |
if len(local_list) > 1: |
148 | 148 |
single_file = False |
149 | 149 |
|
150 |
+ _maintain_cache(cache, local_list) |
|
151 |
+ |
|
150 | 152 |
return local_list, single_file |
151 | 153 |
|
152 | 154 |
def fetch_remote_list(args, require_attribs = False, recursive = None): |
... | ... |
@@ -196,7 +290,11 @@ def fetch_remote_list(args, require_attribs = False, recursive = None): |
196 | 196 |
'object_key' : object['Key'], |
197 | 197 |
'object_uri_str' : object_uri_str, |
198 | 198 |
'base_uri' : remote_uri, |
199 |
+ 'dev' : None, |
|
200 |
+ 'inode' : None, |
|
199 | 201 |
} |
202 |
+ md5 = object['ETag'][1:-1] |
|
203 |
+ rem_list.record_md5(key, md5) |
|
200 | 204 |
if break_now: |
201 | 205 |
break |
202 | 206 |
return rem_list |
... | ... |
@@ -222,6 +320,7 @@ def fetch_remote_list(args, require_attribs = False, recursive = None): |
222 | 222 |
objectlist = _get_filelist_remote(uri) |
223 | 223 |
for key in objectlist: |
224 | 224 |
remote_list[key] = objectlist[key] |
225 |
+ remote_list.record_md5(key, objectlist.get_md5(key)) |
|
225 | 226 |
else: |
226 | 227 |
for uri in remote_uris: |
227 | 228 |
uri_str = str(uri) |
... | ... |
@@ -259,82 +358,146 @@ def fetch_remote_list(args, require_attribs = False, recursive = None): |
259 | 259 |
'md5': response['headers']['etag'].strip('"\''), |
260 | 260 |
'timestamp' : dateRFC822toUnix(response['headers']['date']) |
261 | 261 |
}) |
262 |
+ # get md5 from header if it's present. We would have set that during upload |
|
263 |
+ if response['headers'].has_key('x-amz-meta-s3cmd-attrs'): |
|
264 |
+ attrs = parse_attrs_header(response['headers']['x-amz-meta-s3cmd-attrs']) |
|
265 |
+ if attrs.has_key('md5'): |
|
266 |
+ remote_item.update({'md5': attrs['md5']}) |
|
267 |
+ |
|
262 | 268 |
remote_list[key] = remote_item |
263 | 269 |
return remote_list |
264 | 270 |
|
265 |
-def compare_filelists(src_list, dst_list, src_remote, dst_remote): |
|
271 |
+def parse_attrs_header(attrs_header): |
|
272 |
+ attrs = {} |
|
273 |
+ for attr in attrs_header.split("/"): |
|
274 |
+ key, val = attr.split(":") |
|
275 |
+ attrs[key] = val |
|
276 |
+ return attrs |
|
277 |
+ |
|
278 |
+ |
|
279 |
+def compare_filelists(src_list, dst_list, src_remote, dst_remote, delay_updates = False): |
|
266 | 280 |
def __direction_str(is_remote): |
267 | 281 |
return is_remote and "remote" or "local" |
268 | 282 |
|
269 |
- # We don't support local->local sync, use 'rsync' or something like that instead ;-) |
|
283 |
+ def _compare(src_list, dst_lst, src_remote, dst_remote, file): |
|
284 |
+ """Return True if src_list[file] matches dst_list[file], else False""" |
|
285 |
+ attribs_match = True |
|
286 |
+ if not (src_list.has_key(file) and dst_list.has_key(file)): |
|
287 |
+ info(u"file does not exist in one side or the other: src_list=%s, dst_list=%s" % (src_list.has_key(file), dst_list.has_key(file))) |
|
288 |
+ return False |
|
289 |
+ |
|
290 |
+ ## check size first |
|
291 |
+ if 'size' in cfg.sync_checks and dst_list[file]['size'] != src_list[file]['size']: |
|
292 |
+ debug(u"xfer: %s (size mismatch: src=%s dst=%s)" % (file, src_list[file]['size'], dst_list[file]['size'])) |
|
293 |
+ attribs_match = False |
|
294 |
+ |
|
295 |
+ ## check md5 |
|
296 |
+ compare_md5 = 'md5' in cfg.sync_checks |
|
297 |
+ # Multipart-uploaded files don't have a valid md5 sum - it ends with "...-nn" |
|
298 |
+ if compare_md5: |
|
299 |
+ if (src_remote == True and src_list[file]['md5'].find("-") >= 0) or (dst_remote == True and dst_list[file]['md5'].find("-") >= 0): |
|
300 |
+ compare_md5 = False |
|
301 |
+ info(u"disabled md5 check for %s" % file) |
|
302 |
+ if attribs_match and compare_md5: |
|
303 |
+ try: |
|
304 |
+ src_md5 = src_list.get_md5(file) |
|
305 |
+ dst_md5 = dst_list.get_md5(file) |
|
306 |
+ except (IOError,OSError), e: |
|
307 |
+ # md5 sum verification failed - ignore that file altogether |
|
308 |
+ debug(u"IGNR: %s (disappeared)" % (file)) |
|
309 |
+ warning(u"%s: file disappeared, ignoring." % (file)) |
|
310 |
+ raise |
|
311 |
+ |
|
312 |
+ if src_md5 != dst_md5: |
|
313 |
+ ## checksums are different. |
|
314 |
+ attribs_match = False |
|
315 |
+ debug(u"XFER: %s (md5 mismatch: src=%s dst=%s)" % (file, src_md5, dst_md5)) |
|
316 |
+ |
|
317 |
+ return attribs_match |
|
318 |
+ |
|
319 |
+ # we don't support local->local sync, use 'rsync' or something like that instead ;-) |
|
270 | 320 |
assert(not(src_remote == False and dst_remote == False)) |
271 | 321 |
|
272 | 322 |
info(u"Verifying attributes...") |
273 | 323 |
cfg = Config() |
274 |
- exists_list = SortedDict(ignore_case = False) |
|
324 |
+ ## Items left on src_list will be transferred |
|
325 |
+ ## Items left on update_list will be transferred after src_list |
|
326 |
+ ## Items left on copy_pairs will be copied from dst1 to dst2 |
|
327 |
+ update_list = SortedDict(ignore_case = False) |
|
328 |
+ ## Items left on dst_list will be deleted |
|
329 |
+ copy_pairs = [] |
|
330 |
+ |
|
275 | 331 |
|
276 | 332 |
debug("Comparing filelists (direction: %s -> %s)" % (__direction_str(src_remote), __direction_str(dst_remote))) |
277 |
- debug("src_list.keys: %s" % src_list.keys()) |
|
278 |
- debug("dst_list.keys: %s" % dst_list.keys()) |
|
279 | 333 |
|
280 |
- for file in src_list.keys(): |
|
281 |
- debug(u"CHECK: %s" % file) |
|
282 |
- if dst_list.has_key(file): |
|
334 |
+ for relative_file in src_list.keys(): |
|
335 |
+ debug(u"CHECK: %s" % (relative_file)) |
|
336 |
+ |
|
337 |
+ if dst_list.has_key(relative_file): |
|
283 | 338 |
## Was --skip-existing requested? |
284 |
- if cfg.skip_existing: |
|
285 |
- debug(u"IGNR: %s (used --skip-existing)" % (file)) |
|
286 |
- exists_list[file] = src_list[file] |
|
287 |
- del(src_list[file]) |
|
288 |
- ## Remove from destination-list, all that is left there will be deleted |
|
289 |
- del(dst_list[file]) |
|
339 |
+ if cfg.skip_existing: |
|
340 |
+ debug(u"IGNR: %s (used --skip-existing)" % (relative_file)) |
|
341 |
+ del(src_list[relative_file]) |
|
342 |
+ del(dst_list[relative_file]) |
|
343 |
+ continue |
|
344 |
+ |
|
345 |
+ try: |
|
346 |
+ compare_result = _compare(src_list, dst_list, src_remote, dst_remote, relative_file) |
|
347 |
+ except (IOError,OSError), e: |
|
348 |
+ del(src_list[relative_file]) |
|
349 |
+ del(dst_list[relative_file]) |
|
290 | 350 |
continue |
291 | 351 |
|
292 |
- attribs_match = True |
|
293 |
- ## Check size first |
|
294 |
- if 'size' in cfg.sync_checks and dst_list[file]['size'] != src_list[file]['size']: |
|
295 |
- debug(u"XFER: %s (size mismatch: src=%s dst=%s)" % (file, src_list[file]['size'], dst_list[file]['size'])) |
|
296 |
- attribs_match = False |
|
352 |
+ if compare_result: |
|
353 |
+ debug(u"IGNR: %s (transfer not needed)" % relative_file) |
|
354 |
+ del(src_list[relative_file]) |
|
355 |
+ del(dst_list[relative_file]) |
|
297 | 356 |
|
298 |
- ## Check MD5 |
|
299 |
- compare_md5 = 'md5' in cfg.sync_checks |
|
300 |
- # Multipart-uploaded files don't have a valid MD5 sum - it ends with "...-NN" |
|
301 |
- if compare_md5: |
|
302 |
- if (src_remote == True and src_list[file]['md5'].find("-") >= 0) or (dst_remote == True and dst_list[file]['md5'].find("-") >= 0): |
|
303 |
- compare_md5 = False |
|
304 |
- info(u"Disabled MD5 check for %s" % file) |
|
305 |
- if attribs_match and compare_md5: |
|
357 |
+ else: |
|
358 |
+ # look for matching file in src |
|
306 | 359 |
try: |
307 |
- if src_remote == False and dst_remote == True: |
|
308 |
- src_md5 = hash_file_md5(src_list[file]['full_name']) |
|
309 |
- dst_md5 = dst_list[file]['md5'] |
|
310 |
- elif src_remote == True and dst_remote == False: |
|
311 |
- src_md5 = src_list[file]['md5'] |
|
312 |
- dst_md5 = hash_file_md5(dst_list[file]['full_name']) |
|
313 |
- elif src_remote == True and dst_remote == True: |
|
314 |
- src_md5 = src_list[file]['md5'] |
|
315 |
- dst_md5 = dst_list[file]['md5'] |
|
316 |
- except (IOError,OSError), e: |
|
317 |
- # MD5 sum verification failed - ignore that file altogether |
|
318 |
- debug(u"IGNR: %s (disappeared)" % (file)) |
|
319 |
- warning(u"%s: file disappeared, ignoring." % (file)) |
|
320 |
- del(src_list[file]) |
|
321 |
- del(dst_list[file]) |
|
322 |
- continue |
|
323 |
- |
|
324 |
- if src_md5 != dst_md5: |
|
325 |
- ## Checksums are different. |
|
326 |
- attribs_match = False |
|
327 |
- debug(u"XFER: %s (md5 mismatch: src=%s dst=%s)" % (file, src_md5, dst_md5)) |
|
328 |
- |
|
329 |
- if attribs_match: |
|
330 |
- ## Remove from source-list, all that is left there will be transferred |
|
331 |
- debug(u"IGNR: %s (transfer not needed)" % file) |
|
332 |
- exists_list[file] = src_list[file] |
|
333 |
- del(src_list[file]) |
|
334 |
- |
|
335 |
- ## Remove from destination-list, all that is left there will be deleted |
|
336 |
- del(dst_list[file]) |
|
337 |
- |
|
338 |
- return src_list, dst_list, exists_list |
|
360 |
+ md5 = src_list.get_md5(relative_file) |
|
361 |
+ except IOError: |
|
362 |
+ md5 = None |
|
363 |
+ if md5 is not None and dst_list.by_md5.has_key(md5): |
|
364 |
+ # Found one, we want to copy |
|
365 |
+ dst1 = list(dst_list.by_md5[md5])[0] |
|
366 |
+ debug(u"REMOTE COPY src: %s -> %s" % (dst1, relative_file)) |
|
367 |
+ copy_pairs.append((src_list[relative_file], dst1, relative_file)) |
|
368 |
+ del(src_list[relative_file]) |
|
369 |
+ del(dst_list[relative_file]) |
|
370 |
+ else: |
|
371 |
+ # record that we will get this file transferred to us (before all the copies), so if we come across it later again, |
|
372 |
+ # we can copy from _this_ copy (e.g. we only upload it once, and copy thereafter). |
|
373 |
+ dst_list.record_md5(relative_file, md5) |
|
374 |
+ update_list[relative_file] = src_list[relative_file] |
|
375 |
+ del src_list[relative_file] |
|
376 |
+ del dst_list[relative_file] |
|
377 |
+ |
|
378 |
+ else: |
|
379 |
+ # dst doesn't have this file |
|
380 |
+ # look for matching file elsewhere in dst |
|
381 |
+ try: |
|
382 |
+ md5 = src_list.get_md5(relative_file) |
|
383 |
+ except IOError: |
|
384 |
+ md5 = None |
|
385 |
+ dst1 = dst_list.find_md5_one(md5) |
|
386 |
+ if dst1 is not None: |
|
387 |
+ # Found one, we want to copy |
|
388 |
+ debug(u"REMOTE COPY dst: %s -> %s" % (dst1, relative_file)) |
|
389 |
+ copy_pairs.append((src_list[relative_file], dst1, relative_file)) |
|
390 |
+ del(src_list[relative_file]) |
|
391 |
+ else: |
|
392 |
+ # we don't have this file, and we don't have a copy of this file elsewhere. Get it. |
|
393 |
+ # record that we will get this file transferred to us (before all the copies), so if we come across it later again, |
|
394 |
+ # we can copy from _this_ copy (e.g. we only upload it once, and copy thereafter). |
|
395 |
+ dst_list.record_md5(relative_file, md5) |
|
396 |
+ |
|
397 |
+ for f in dst_list.keys(): |
|
398 |
+ if not src_list.has_key(f) and not update_list.has_key(f): |
|
399 |
+ # leave only those not on src_list + update_list |
|
400 |
+ del dst_list[f] |
|
401 |
+ |
|
402 |
+ return src_list, dst_list, update_list, copy_pairs |
|
339 | 403 |
|
340 | 404 |
# vim:et:ts=4:sts=4:ai |
341 | 405 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,53 @@ |
0 |
+import cPickle as pickle |
|
1 |
+ |
|
2 |
+class HashCache(object): |
|
3 |
+ def __init__(self): |
|
4 |
+ self.inodes = dict() |
|
5 |
+ |
|
6 |
+ def add(self, dev, inode, mtime, size, md5): |
|
7 |
+ if dev not in self.inodes: |
|
8 |
+ self.inodes[dev] = dict() |
|
9 |
+ if inode not in self.inodes[dev]: |
|
10 |
+ self.inodes[dev][inode] = dict() |
|
11 |
+ self.inodes[dev][inode][mtime] = dict(md5=md5, size=size) |
|
12 |
+ |
|
13 |
+ def md5(self, dev, inode, mtime, size): |
|
14 |
+ try: |
|
15 |
+ d = self.inodes[dev][inode][mtime] |
|
16 |
+ if d['size'] != size: |
|
17 |
+ return None |
|
18 |
+ except: |
|
19 |
+ return None |
|
20 |
+ return d['md5'] |
|
21 |
+ |
|
22 |
+ def mark_all_for_purge(self): |
|
23 |
+ for d in self.inodes.keys(): |
|
24 |
+ for i in self.inodes[d].keys(): |
|
25 |
+ for c in self.inodes[d][i].keys(): |
|
26 |
+ self.inodes[d][i][c]['purge'] = True |
|
27 |
+ |
|
28 |
+ def unmark_for_purge(self, dev, inode, mtime, size): |
|
29 |
+ d = self.inodes[dev][inode][mtime] |
|
30 |
+ if d['size'] == size and 'purge' in d: |
|
31 |
+ del self.inodes[dev][inode][mtime]['purge'] |
|
32 |
+ |
|
33 |
+ def purge(self): |
|
34 |
+ for d in self.inodes.keys(): |
|
35 |
+ for i in self.inodes[d].keys(): |
|
36 |
+ for m in self.inodes[d][i].keys(): |
|
37 |
+ if 'purge' in self.inodes[d][i][m]: |
|
38 |
+ del self.inodes[d][i] |
|
39 |
+ break |
|
40 |
+ |
|
41 |
+ def save(self, f): |
|
42 |
+ d = dict(inodes=self.inodes, version=1) |
|
43 |
+ f = open(f, 'w') |
|
44 |
+ p = pickle.dump(d, f) |
|
45 |
+ f.close() |
|
46 |
+ |
|
47 |
+ def load(self, f): |
|
48 |
+ f = open(f, 'r') |
|
49 |
+ d = pickle.load(f) |
|
50 |
+ f.close() |
|
51 |
+ if d.get('version') == 1 and 'inodes' in d: |
|
52 |
+ self.inodes = d['inodes'] |
... | ... |
@@ -42,32 +42,56 @@ class MultiPartUpload(object): |
42 | 42 |
if not self.upload_id: |
43 | 43 |
raise RuntimeError("Attempting to use a multipart upload that has not been initiated.") |
44 | 44 |
|
45 |
- size_left = file_size = os.stat(self.file.name)[ST_SIZE] |
|
46 |
- self.chunk_size = self.s3.config.multipart_chunk_size_mb * 1024 * 1024 |
|
47 |
- nr_parts = file_size / self.chunk_size + (file_size % self.chunk_size and 1) |
|
48 |
- debug("MultiPart: Uploading %s in %d parts" % (self.file.name, nr_parts)) |
|
45 |
+ self.chunk_size = self.s3.config.multipart_chunk_size_mb * 1024 * 1024 |
|
46 |
+ |
|
47 |
+ if self.file.name != "<stdin>": |
|
48 |
+ size_left = file_size = os.stat(self.file.name)[ST_SIZE] |
|
49 |
+ nr_parts = file_size / self.chunk_size + (file_size % self.chunk_size and 1) |
|
50 |
+ debug("MultiPart: Uploading %s in %d parts" % (self.file.name, nr_parts)) |
|
51 |
+ else: |
|
52 |
+ debug("MultiPart: Uploading from %s" % (self.file.name)) |
|
49 | 53 |
|
50 | 54 |
seq = 1 |
51 |
- while size_left > 0: |
|
52 |
- offset = self.chunk_size * (seq - 1) |
|
53 |
- current_chunk_size = min(file_size - offset, self.chunk_size) |
|
54 |
- size_left -= current_chunk_size |
|
55 |
- labels = { |
|
56 |
- 'source' : unicodise(self.file.name), |
|
57 |
- 'destination' : unicodise(self.uri.uri()), |
|
58 |
- 'extra' : "[part %d of %d, %s]" % (seq, nr_parts, "%d%sB" % formatSize(current_chunk_size, human_readable = True)) |
|
59 |
- } |
|
60 |
- try: |
|
61 |
- self.upload_part(seq, offset, current_chunk_size, labels) |
|
62 |
- except: |
|
63 |
- error(u"Upload of '%s' part %d failed. Aborting multipart upload." % (self.file.name, seq)) |
|
64 |
- self.abort_upload() |
|
65 |
- raise |
|
66 |
- seq += 1 |
|
55 |
+ if self.file.name != "<stdin>": |
|
56 |
+ while size_left > 0: |
|
57 |
+ offset = self.chunk_size * (seq - 1) |
|
58 |
+ current_chunk_size = min(file_size - offset, self.chunk_size) |
|
59 |
+ size_left -= current_chunk_size |
|
60 |
+ labels = { |
|
61 |
+ 'source' : unicodise(self.file.name), |
|
62 |
+ 'destination' : unicodise(self.uri.uri()), |
|
63 |
+ 'extra' : "[part %d of %d, %s]" % (seq, nr_parts, "%d%sB" % formatSize(current_chunk_size, human_readable = True)) |
|
64 |
+ } |
|
65 |
+ try: |
|
66 |
+ self.upload_part(seq, offset, current_chunk_size, labels) |
|
67 |
+ except: |
|
68 |
+ error(u"Upload of '%s' part %d failed. Aborting multipart upload." % (self.file.name, seq)) |
|
69 |
+ self.abort_upload() |
|
70 |
+ raise |
|
71 |
+ seq += 1 |
|
72 |
+ else: |
|
73 |
+ while True: |
|
74 |
+ buffer = self.file.read(self.chunk_size) |
|
75 |
+ offset = self.chunk_size * (seq - 1) |
|
76 |
+ current_chunk_size = len(buffer) |
|
77 |
+ labels = { |
|
78 |
+ 'source' : unicodise(self.file.name), |
|
79 |
+ 'destination' : unicodise(self.uri.uri()), |
|
80 |
+ 'extra' : "[part %d, %s]" % (seq, "%d%sB" % formatSize(current_chunk_size, human_readable = True)) |
|
81 |
+ } |
|
82 |
+ if len(buffer) == 0: # EOF |
|
83 |
+ break |
|
84 |
+ try: |
|
85 |
+ self.upload_part(seq, offset, current_chunk_size, labels, buffer) |
|
86 |
+ except: |
|
87 |
+ error(u"Upload of '%s' part %d failed. Aborting multipart upload." % (self.file.name, seq)) |
|
88 |
+ self.abort_upload() |
|
89 |
+ raise |
|
90 |
+ seq += 1 |
|
67 | 91 |
|
68 | 92 |
debug("MultiPart: Upload finished: %d parts", seq - 1) |
69 | 93 |
|
70 |
- def upload_part(self, seq, offset, chunk_size, labels): |
|
94 |
+ def upload_part(self, seq, offset, chunk_size, labels, buffer = ''): |
|
71 | 95 |
""" |
72 | 96 |
Upload a file chunk |
73 | 97 |
http://docs.amazonwebservices.com/AmazonS3/latest/API/index.html?mpUploadUploadPart.html |
... | ... |
@@ -77,7 +101,7 @@ class MultiPartUpload(object): |
77 | 77 |
headers = { "content-length": chunk_size } |
78 | 78 |
query_string = "?partNumber=%i&uploadId=%s" % (seq, self.upload_id) |
79 | 79 |
request = self.s3.create_request("OBJECT_PUT", uri = self.uri, headers = headers, extra = query_string) |
80 |
- response = self.s3.send_file(request, self.file, labels, offset = offset, chunk_size = chunk_size) |
|
80 |
+ response = self.s3.send_file(request, self.file, labels, buffer, offset = offset, chunk_size = chunk_size) |
|
81 | 81 |
self.parts[seq] = response["headers"]["etag"] |
82 | 82 |
return response |
83 | 83 |
|
... | ... |
@@ -5,10 +5,12 @@ |
5 | 5 |
|
6 | 6 |
import sys |
7 | 7 |
import datetime |
8 |
+import time |
|
8 | 9 |
import Utils |
9 | 10 |
|
10 | 11 |
class Progress(object): |
11 | 12 |
_stdout = sys.stdout |
13 |
+ _last_display = 0 |
|
12 | 14 |
|
13 | 15 |
def __init__(self, labels, total_size): |
14 | 16 |
self._stdout = sys.stdout |
... | ... |
@@ -48,6 +50,13 @@ class Progress(object): |
48 | 48 |
self._stdout.write(u"%(source)s -> %(destination)s %(extra)s\n" % self.labels) |
49 | 49 |
self._stdout.flush() |
50 | 50 |
|
51 |
+ def _display_needed(self): |
|
52 |
+ # We only need to update the display every so often. |
|
53 |
+ if time.time() - self._last_display > 1: |
|
54 |
+ self._last_display = time.time() |
|
55 |
+ return True |
|
56 |
+ return False |
|
57 |
+ |
|
51 | 58 |
def display(self, new_file = False, done_message = None): |
52 | 59 |
""" |
53 | 60 |
display(new_file = False[/True], done = False[/True]) |
... | ... |
@@ -98,6 +107,10 @@ class ProgressANSI(Progress): |
98 | 98 |
self._stdout.flush() |
99 | 99 |
return |
100 | 100 |
|
101 |
+ # Only display progress every so often |
|
102 |
+ if not (new_file or done_message) and not self._display_needed(): |
|
103 |
+ return |
|
104 |
+ |
|
101 | 105 |
timedelta = self.time_current - self.time_start |
102 | 106 |
sec_elapsed = timedelta.days * 86400 + timedelta.seconds + float(timedelta.microseconds)/1000000.0 |
103 | 107 |
if (sec_elapsed > 0): |
... | ... |
@@ -132,6 +145,10 @@ class ProgressCR(Progress): |
132 | 132 |
self.output_labels() |
133 | 133 |
return |
134 | 134 |
|
135 |
+ # Only display progress every so often |
|
136 |
+ if not (new_file or done_message) and not self._display_needed(): |
|
137 |
+ return |
|
138 |
+ |
|
135 | 139 |
timedelta = self.time_current - self.time_start |
136 | 140 |
sec_elapsed = timedelta.days * 86400 + timedelta.seconds + float(timedelta.microseconds)/1000000.0 |
137 | 141 |
if (sec_elapsed > 0): |
... | ... |
@@ -29,18 +29,47 @@ from MultiPart import MultiPartUpload |
29 | 29 |
from S3Uri import S3Uri |
30 | 30 |
|
31 | 31 |
try: |
32 |
- import magic |
|
32 |
+ import magic, gzip |
|
33 | 33 |
try: |
34 | 34 |
## https://github.com/ahupp/python-magic |
35 | 35 |
magic_ = magic.Magic(mime=True) |
36 |
- def mime_magic(file): |
|
36 |
+ def mime_magic_file(file): |
|
37 | 37 |
return magic_.from_file(file) |
38 |
- except (TypeError, AttributeError): |
|
38 |
+ def mime_magic_buffer(buffer): |
|
39 |
+ return magic_.from_buffer(buffer) |
|
40 |
+ except TypeError: |
|
41 |
+ ## http://pypi.python.org/pypi/filemagic |
|
42 |
+ try: |
|
43 |
+ magic_ = magic.Magic(flags=magic.MAGIC_MIME) |
|
44 |
+ def mime_magic_file(file): |
|
45 |
+ return magic_.id_filename(file) |
|
46 |
+ def mime_magic_buffer(buffer): |
|
47 |
+ return magic_.id_buffer(buffer) |
|
48 |
+ except TypeError: |
|
49 |
+ ## file-5.11 built-in python bindings |
|
50 |
+ magic_ = magic.open(magic.MAGIC_MIME) |
|
51 |
+ magic_.load() |
|
52 |
+ def mime_magic_file(file): |
|
53 |
+ return magic_.file(file) |
|
54 |
+ def mime_magic_buffer(buffer): |
|
55 |
+ return magic_.buffer(buffer) |
|
56 |
+ |
|
57 |
+ except AttributeError: |
|
39 | 58 |
## Older python-magic versions |
40 | 59 |
magic_ = magic.open(magic.MAGIC_MIME) |
41 | 60 |
magic_.load() |
42 |
- def mime_magic(file): |
|
61 |
+ def mime_magic_file(file): |
|
43 | 62 |
return magic_.file(file) |
63 |
+ def mime_magic_buffer(buffer): |
|
64 |
+ return magic_.buffer(buffer) |
|
65 |
+ |
|
66 |
+ def mime_magic(file): |
|
67 |
+ type = mime_magic_file(file) |
|
68 |
+ if type != "application/x-gzip; charset=binary": |
|
69 |
+ return (type, None) |
|
70 |
+ else: |
|
71 |
+ return (mime_magic_buffer(gzip.open(file).read(8192)), 'gzip') |
|
72 |
+ |
|
44 | 73 |
except ImportError, e: |
45 | 74 |
if str(e).find("magic") >= 0: |
46 | 75 |
magic_message = "Module python-magic is not available." |
... | ... |
@@ -53,13 +82,16 @@ except ImportError, e: |
53 | 53 |
if (not magic_warned): |
54 | 54 |
warning(magic_message) |
55 | 55 |
magic_warned = True |
56 |
- return mimetypes.guess_type(file)[0] |
|
56 |
+ return mimetypes.guess_type(file) |
|
57 | 57 |
|
58 | 58 |
__all__ = [] |
59 | 59 |
class S3Request(object): |
60 | 60 |
def __init__(self, s3, method_string, resource, headers, params = {}): |
61 | 61 |
self.s3 = s3 |
62 | 62 |
self.headers = SortedDict(headers or {}, ignore_case = True) |
63 |
+ # Add in any extra headers from s3 config object |
|
64 |
+ if self.s3.config.extra_headers: |
|
65 |
+ self.headers.update(self.s3.config.extra_headers) |
|
63 | 66 |
self.resource = resource |
64 | 67 |
self.method_string = method_string |
65 | 68 |
self.params = params |
... | ... |
@@ -211,9 +243,10 @@ class S3(object): |
211 | 211 |
truncated = True |
212 | 212 |
list = [] |
213 | 213 |
prefixes = [] |
214 |
+ conn = self.get_connection(bucket) |
|
214 | 215 |
|
215 | 216 |
while truncated: |
216 |
- response = self.bucket_list_noparse(bucket, prefix, recursive, uri_params) |
|
217 |
+ response = self.bucket_list_noparse(conn, bucket, prefix, recursive, uri_params) |
|
217 | 218 |
current_list = _get_contents(response["data"]) |
218 | 219 |
current_prefixes = _get_common_prefixes(response["data"]) |
219 | 220 |
truncated = _list_truncated(response["data"]) |
... | ... |
@@ -227,17 +260,19 @@ class S3(object): |
227 | 227 |
list += current_list |
228 | 228 |
prefixes += current_prefixes |
229 | 229 |
|
230 |
+ conn.close() |
|
231 |
+ |
|
230 | 232 |
response['list'] = list |
231 | 233 |
response['common_prefixes'] = prefixes |
232 | 234 |
return response |
233 | 235 |
|
234 |
- def bucket_list_noparse(self, bucket, prefix = None, recursive = None, uri_params = {}): |
|
236 |
+ def bucket_list_noparse(self, connection, bucket, prefix = None, recursive = None, uri_params = {}): |
|
235 | 237 |
if prefix: |
236 | 238 |
uri_params['prefix'] = self.urlencode_string(prefix) |
237 | 239 |
if not self.config.recursive and not recursive: |
238 | 240 |
uri_params['delimiter'] = "/" |
239 | 241 |
request = self.create_request("BUCKET_LIST", bucket = bucket, **uri_params) |
240 |
- response = self.send_request(request) |
|
242 |
+ response = self.send_request(request, conn = connection) |
|
241 | 243 |
#debug(response) |
242 | 244 |
return response |
243 | 245 |
|
... | ... |
@@ -339,17 +374,36 @@ class S3(object): |
339 | 339 |
|
340 | 340 |
return response |
341 | 341 |
|
342 |
+ def add_encoding(self, filename, content_type): |
|
343 |
+ if content_type.find("charset=") != -1: |
|
344 |
+ return False |
|
345 |
+ exts = self.config.add_encoding_exts.split(',') |
|
346 |
+ if exts[0]=='': |
|
347 |
+ return False |
|
348 |
+ parts = filename.rsplit('.',2) |
|
349 |
+ if len(parts) < 2: |
|
350 |
+ return False |
|
351 |
+ ext = parts[1] |
|
352 |
+ if ext in exts: |
|
353 |
+ return True |
|
354 |
+ else: |
|
355 |
+ return False |
|
356 |
+ |
|
342 | 357 |
def object_put(self, filename, uri, extra_headers = None, extra_label = ""): |
343 | 358 |
# TODO TODO |
344 | 359 |
# Make it consistent with stream-oriented object_get() |
345 | 360 |
if uri.type != "s3": |
346 | 361 |
raise ValueError("Expected URI type 's3', got '%s'" % uri.type) |
347 | 362 |
|
348 |
- if not os.path.isfile(filename): |
|
363 |
+ if filename != "-" and not os.path.isfile(filename): |
|
349 | 364 |
raise InvalidFileError(u"%s is not a regular file" % unicodise(filename)) |
350 | 365 |
try: |
351 |
- file = open(filename, "rb") |
|
352 |
- size = os.stat(filename)[ST_SIZE] |
|
366 |
+ if filename == "-": |
|
367 |
+ file = sys.stdin |
|
368 |
+ size = 0 |
|
369 |
+ else: |
|
370 |
+ file = open(filename, "rb") |
|
371 |
+ size = os.stat(filename)[ST_SIZE] |
|
353 | 372 |
except (IOError, OSError), e: |
354 | 373 |
raise InvalidFileError(u"%s: %s" % (unicodise(filename), e.strerror)) |
355 | 374 |
|
... | ... |
@@ -359,12 +413,20 @@ class S3(object): |
359 | 359 |
|
360 | 360 |
## MIME-type handling |
361 | 361 |
content_type = self.config.mime_type |
362 |
- if not content_type and self.config.guess_mime_type: |
|
363 |
- content_type = mime_magic(filename) |
|
362 |
+ if filename != "-" and not content_type and self.config.guess_mime_type: |
|
363 |
+ (content_type, content_encoding) = mime_magic(filename) |
|
364 | 364 |
if not content_type: |
365 | 365 |
content_type = self.config.default_mime_type |
366 |
- debug("Content-Type set to '%s'" % content_type) |
|
366 |
+ if not content_encoding: |
|
367 |
+ content_encoding = self.config.encoding.upper() |
|
368 |
+ |
|
369 |
+ ## add charset to content type |
|
370 |
+ if self.add_encoding(filename, content_type) and content_encoding is not None: |
|
371 |
+ content_type = content_type + "; charset=" + content_encoding |
|
372 |
+ |
|
367 | 373 |
headers["content-type"] = content_type |
374 |
+ if content_encoding is not None: |
|
375 |
+ headers["content-encoding"] = content_encoding |
|
368 | 376 |
|
369 | 377 |
## Other Amazon S3 attributes |
370 | 378 |
if self.config.acl_public: |
... | ... |
@@ -374,8 +436,10 @@ class S3(object): |
374 | 374 |
|
375 | 375 |
## Multipart decision |
376 | 376 |
multipart = False |
377 |
+ if not self.config.enable_multipart and filename == "-": |
|
378 |
+ raise ParameterError("Multi-part upload is required to upload from stdin") |
|
377 | 379 |
if self.config.enable_multipart: |
378 |
- if size > self.config.multipart_chunk_size_mb * 1024 * 1024: |
|
380 |
+ if size > self.config.multipart_chunk_size_mb * 1024 * 1024 or filename == "-": |
|
379 | 381 |
multipart = True |
380 | 382 |
if multipart: |
381 | 383 |
# Multipart requests are quite different... drop here |
... | ... |
@@ -456,6 +520,17 @@ class S3(object): |
456 | 456 |
response = self.send_request(request, body) |
457 | 457 |
return response |
458 | 458 |
|
459 |
+ def set_policy(self, uri, policy): |
|
460 |
+ if uri.has_object(): |
|
461 |
+ request = self.create_request("OBJECT_PUT", uri = uri, extra = "?policy") |
|
462 |
+ else: |
|
463 |
+ request = self.create_request("BUCKET_CREATE", bucket = uri.bucket(), extra = "?policy") |
|
464 |
+ |
|
465 |
+ body = str(policy) |
|
466 |
+ debug(u"set_policy(%s): policy-json: %s" % (uri, body)) |
|
467 |
+ response = self.send_request(request, body) |
|
468 |
+ return response |
|
469 |
+ |
|
459 | 470 |
def get_accesslog(self, uri): |
460 | 471 |
request = self.create_request("BUCKET_LIST", bucket = uri.bucket(), extra = "?logging") |
461 | 472 |
response = self.send_request(request) |
... | ... |
@@ -571,7 +646,7 @@ class S3(object): |
571 | 571 |
# Wait a few seconds. The more it fails the more we wait. |
572 | 572 |
return (self._max_retries - retries + 1) * 3 |
573 | 573 |
|
574 |
- def send_request(self, request, body = None, retries = _max_retries): |
|
574 |
+ def send_request(self, request, body = None, retries = _max_retries, conn = None): |
|
575 | 575 |
method_string, resource, headers = request.get_triplet() |
576 | 576 |
debug("Processing request, please wait...") |
577 | 577 |
if not headers.has_key('content-length'): |
... | ... |
@@ -580,7 +655,13 @@ class S3(object): |
580 | 580 |
# "Stringify" all headers |
581 | 581 |
for header in headers.keys(): |
582 | 582 |
headers[header] = str(headers[header]) |
583 |
- conn = self.get_connection(resource['bucket']) |
|
583 |
+ if conn is None: |
|
584 |
+ debug("Establishing connection") |
|
585 |
+ conn = self.get_connection(resource['bucket']) |
|
586 |
+ close_conn = True |
|
587 |
+ else: |
|
588 |
+ debug("Using existing connection") |
|
589 |
+ close_conn = False |
|
584 | 590 |
uri = self.format_uri(resource) |
585 | 591 |
debug("Sending request method_string=%r, uri=%r, headers=%r, body=(%i bytes)" % (method_string, uri, headers, len(body or ""))) |
586 | 592 |
conn.request(method_string, uri, body, headers) |
... | ... |
@@ -591,7 +672,8 @@ class S3(object): |
591 | 591 |
response["headers"] = convertTupleListToDict(http_response.getheaders()) |
592 | 592 |
response["data"] = http_response.read() |
593 | 593 |
debug("Response: " + str(response)) |
594 |
- conn.close() |
|
594 |
+ if close_conn is True: |
|
595 |
+ conn.close() |
|
595 | 596 |
except Exception, e: |
596 | 597 |
if retries: |
597 | 598 |
warning("Retrying failed request: %s (%s)" % (resource['uri'], e)) |
... | ... |
@@ -625,7 +707,7 @@ class S3(object): |
625 | 625 |
|
626 | 626 |
return response |
627 | 627 |
|
628 |
- def send_file(self, request, file, labels, throttle = 0, retries = _max_retries, offset = 0, chunk_size = -1): |
|
628 |
+ def send_file(self, request, file, labels, buffer = '', throttle = 0, retries = _max_retries, offset = 0, chunk_size = -1): |
|
629 | 629 |
method_string, resource, headers = request.get_triplet() |
630 | 630 |
size_left = size_total = headers.get("content-length") |
631 | 631 |
if self.config.progress_meter: |
... | ... |
@@ -648,15 +730,19 @@ class S3(object): |
648 | 648 |
warning("Waiting %d sec..." % self._fail_wait(retries)) |
649 | 649 |
time.sleep(self._fail_wait(retries)) |
650 | 650 |
# Connection error -> same throttle value |
651 |
- return self.send_file(request, file, labels, throttle, retries - 1, offset, chunk_size) |
|
651 |
+ return self.send_file(request, file, labels, buffer, throttle, retries - 1, offset, chunk_size) |
|
652 | 652 |
else: |
653 | 653 |
raise S3UploadError("Upload failed for: %s" % resource['uri']) |
654 |
- file.seek(offset) |
|
654 |
+ if buffer == '': |
|
655 |
+ file.seek(offset) |
|
655 | 656 |
md5_hash = md5() |
656 | 657 |
try: |
657 | 658 |
while (size_left > 0): |
658 |
- #debug("SendFile: Reading up to %d bytes from '%s'" % (self.config.send_chunk, file.name)) |
|
659 |
- data = file.read(min(self.config.send_chunk, size_left)) |
|
659 |
+ #debug("SendFile: Reading up to %d bytes from '%s' - remaining bytes: %s" % (self.config.send_chunk, file.name, size_left)) |
|
660 |
+ if buffer == '': |
|
661 |
+ data = file.read(min(self.config.send_chunk, size_left)) |
|
662 |
+ else: |
|
663 |
+ data = buffer |
|
660 | 664 |
md5_hash.update(data) |
661 | 665 |
conn.send(data) |
662 | 666 |
if self.config.progress_meter: |
... | ... |
@@ -685,7 +771,7 @@ class S3(object): |
685 | 685 |
warning("Waiting %d sec..." % self._fail_wait(retries)) |
686 | 686 |
time.sleep(self._fail_wait(retries)) |
687 | 687 |
# Connection error -> same throttle value |
688 |
- return self.send_file(request, file, labels, throttle, retries - 1, offset, chunk_size) |
|
688 |
+ return self.send_file(request, file, labels, buffer, throttle, retries - 1, offset, chunk_size) |
|
689 | 689 |
else: |
690 | 690 |
debug("Giving up on '%s' %s" % (file.name, e)) |
691 | 691 |
raise S3UploadError("Upload failed for: %s" % resource['uri']) |
... | ... |
@@ -707,7 +793,7 @@ class S3(object): |
707 | 707 |
redir_hostname = getTextFromXml(response['data'], ".//Endpoint") |
708 | 708 |
self.set_hostname(redir_bucket, redir_hostname) |
709 | 709 |
warning("Redirected to: %s" % (redir_hostname)) |
710 |
- return self.send_file(request, file, labels, offset = offset, chunk_size = chunk_size) |
|
710 |
+ return self.send_file(request, file, labels, buffer, offset = offset, chunk_size = chunk_size) |
|
711 | 711 |
|
712 | 712 |
# S3 from time to time doesn't send ETag back in a response :-( |
713 | 713 |
# Force re-upload here. |
... | ... |
@@ -730,7 +816,7 @@ class S3(object): |
730 | 730 |
warning("Upload failed: %s (%s)" % (resource['uri'], S3Error(response))) |
731 | 731 |
warning("Waiting %d sec..." % self._fail_wait(retries)) |
732 | 732 |
time.sleep(self._fail_wait(retries)) |
733 |
- return self.send_file(request, file, labels, throttle, retries - 1, offset, chunk_size) |
|
733 |
+ return self.send_file(request, file, labels, buffer, throttle, retries - 1, offset, chunk_size) |
|
734 | 734 |
else: |
735 | 735 |
warning("Too many failures. Giving up on '%s'" % (file.name)) |
736 | 736 |
raise S3UploadError |
... | ... |
@@ -743,7 +829,7 @@ class S3(object): |
743 | 743 |
warning("MD5 Sums don't match!") |
744 | 744 |
if retries: |
745 | 745 |
warning("Retrying upload of %s" % (file.name)) |
746 |
- return self.send_file(request, file, labels, throttle, retries - 1, offset, chunk_size) |
|
746 |
+ return self.send_file(request, file, labels, buffer, throttle, retries - 1, offset, chunk_size) |
|
747 | 747 |
else: |
748 | 748 |
warning("Too many failures. Giving up on '%s'" % (file.name)) |
749 | 749 |
raise S3UploadError |
... | ... |
@@ -826,6 +912,9 @@ class S3(object): |
826 | 826 |
while (current_position < size_total): |
827 | 827 |
this_chunk = size_left > self.config.recv_chunk and self.config.recv_chunk or size_left |
828 | 828 |
data = http_response.read(this_chunk) |
829 |
+ if len(data) == 0: |
|
830 |
+ raise S3Error("EOF from S3!") |
|
831 |
+ |
|
829 | 832 |
stream.write(data) |
830 | 833 |
if start_position == 0: |
831 | 834 |
md5_hash.update(data) |
... | ... |
@@ -10,6 +10,7 @@ from BidirMap import BidirMap |
10 | 10 |
from logging import debug |
11 | 11 |
import S3 |
12 | 12 |
from Utils import unicodise, check_bucket_name_dns_conformity |
13 |
+import Config |
|
13 | 14 |
|
14 | 15 |
class S3Uri(object): |
15 | 16 |
type = None |
... | ... |
@@ -80,9 +81,9 @@ class S3UriS3(S3Uri): |
80 | 80 |
|
81 | 81 |
def public_url(self): |
82 | 82 |
if self.is_dns_compatible(): |
83 |
- return "http://%s.s3.amazonaws.com/%s" % (self._bucket, self._object) |
|
83 |
+ return "http://%s.%s/%s" % (self._bucket, Config.Config().host_base, self._object) |
|
84 | 84 |
else: |
85 |
- return "http://s3.amazonaws.com/%s/%s" % (self._bucket, self._object) |
|
85 |
+ return "http://%s/%s/%s" % (self._bucket, Config.Config().host_base, self._object) |
|
86 | 86 |
|
87 | 87 |
def host_name(self): |
88 | 88 |
if self.is_dns_compatible(): |
... | ... |
@@ -4,6 +4,7 @@ |
4 | 4 |
## License: GPL Version 2 |
5 | 5 |
|
6 | 6 |
from BidirMap import BidirMap |
7 |
+import Utils |
|
7 | 8 |
|
8 | 9 |
class SortedDictIterator(object): |
9 | 10 |
def __init__(self, sorted_dict, keys): |
... | ... |
@@ -26,6 +27,8 @@ class SortedDict(dict): |
26 | 26 |
""" |
27 | 27 |
dict.__init__(self, mapping, **kwargs) |
28 | 28 |
self.ignore_case = ignore_case |
29 |
+ self.hardlinks = dict() # { dev: { inode : {'md5':, 'relative_files':}}} |
|
30 |
+ self.by_md5 = dict() # {md5: set(relative_files)} |
|
29 | 31 |
|
30 | 32 |
def keys(self): |
31 | 33 |
keys = dict.keys(self) |
... | ... |
@@ -45,6 +48,47 @@ class SortedDict(dict): |
45 | 45 |
def __iter__(self): |
46 | 46 |
return SortedDictIterator(self, self.keys()) |
47 | 47 |
|
48 |
+ |
|
49 |
+ def record_md5(self, relative_file, md5): |
|
50 |
+ if md5 not in self.by_md5: |
|
51 |
+ self.by_md5[md5] = set() |
|
52 |
+ self.by_md5[md5].add(relative_file) |
|
53 |
+ |
|
54 |
+ def find_md5_one(self, md5): |
|
55 |
+ try: |
|
56 |
+ return list(self.by_md5.get(md5, set()))[0] |
|
57 |
+ except: |
|
58 |
+ return None |
|
59 |
+ |
|
60 |
+ def get_md5(self, relative_file): |
|
61 |
+ """returns md5 if it can, or raises IOError if file is unreadable""" |
|
62 |
+ md5 = None |
|
63 |
+ if 'md5' in self[relative_file]: |
|
64 |
+ return self[relative_file]['md5'] |
|
65 |
+ md5 = self.get_hardlink_md5(relative_file) |
|
66 |
+ if md5 is None: |
|
67 |
+ md5 = Utils.hash_file_md5(self[relative_file]['full_name']) |
|
68 |
+ self.record_md5(relative_file, md5) |
|
69 |
+ self[relative_file]['md5'] = md5 |
|
70 |
+ return md5 |
|
71 |
+ |
|
72 |
+ def record_hardlink(self, relative_file, dev, inode, md5): |
|
73 |
+ if dev not in self.hardlinks: |
|
74 |
+ self.hardlinks[dev] = dict() |
|
75 |
+ if inode not in self.hardlinks[dev]: |
|
76 |
+ self.hardlinks[dev][inode] = dict(md5=md5, relative_files=set()) |
|
77 |
+ self.hardlinks[dev][inode]['relative_files'].add(relative_file) |
|
78 |
+ |
|
79 |
+ def get_hardlink_md5(self, relative_file): |
|
80 |
+ md5 = None |
|
81 |
+ dev = self[relative_file]['dev'] |
|
82 |
+ inode = self[relative_file]['inode'] |
|
83 |
+ try: |
|
84 |
+ md5 = self.hardlinks[dev][inode]['md5'] |
|
85 |
+ except: |
|
86 |
+ pass |
|
87 |
+ return md5 |
|
88 |
+ |
|
48 | 89 |
if __name__ == "__main__": |
49 | 90 |
d = { 'AWS' : 1, 'Action' : 2, 'america' : 3, 'Auckland' : 4, 'America' : 5 } |
50 | 91 |
sd = SortedDict(d) |
... | ... |
@@ -3,6 +3,7 @@ |
3 | 3 |
## http://www.logix.cz/michal |
4 | 4 |
## License: GPL Version 2 |
5 | 5 |
|
6 |
+import datetime |
|
6 | 7 |
import os |
7 | 8 |
import sys |
8 | 9 |
import time |
... | ... |
@@ -13,9 +14,11 @@ import rfc822 |
13 | 13 |
import hmac |
14 | 14 |
import base64 |
15 | 15 |
import errno |
16 |
+import urllib |
|
16 | 17 |
|
17 | 18 |
from logging import debug, info, warning, error |
18 | 19 |
|
20 |
+ |
|
19 | 21 |
import Config |
20 | 22 |
import Exceptions |
21 | 23 |
|
... | ... |
@@ -163,7 +166,14 @@ def formatSize(size, human_readable = False, floating_point = False): |
163 | 163 |
__all__.append("formatSize") |
164 | 164 |
|
165 | 165 |
def formatDateTime(s3timestamp): |
166 |
- return time.strftime("%Y-%m-%d %H:%M", dateS3toPython(s3timestamp)) |
|
166 |
+ try: |
|
167 |
+ import pytz |
|
168 |
+ timezone = pytz.timezone(os.environ.get('TZ', 'UTC')) |
|
169 |
+ utc_dt = datetime.datetime(*dateS3toPython(s3timestamp)[0:6], tzinfo=pytz.timezone('UTC')) |
|
170 |
+ dt_object = utc_dt.astimezone(timezone) |
|
171 |
+ except ImportError: |
|
172 |
+ dt_object = datetime.datetime(*dateS3toPython(s3timestamp)[0:6]) |
|
173 |
+ return dt_object.strftime("%Y-%m-%d %H:%M") |
|
167 | 174 |
__all__.append("formatDateTime") |
168 | 175 |
|
169 | 176 |
def convertTupleListToDict(list): |
... | ... |
@@ -319,12 +329,73 @@ def replace_nonprintables(string): |
319 | 319 |
__all__.append("replace_nonprintables") |
320 | 320 |
|
321 | 321 |
def sign_string(string_to_sign): |
322 |
- #debug("string_to_sign: %s" % string_to_sign) |
|
322 |
+ """Sign a string with the secret key, returning base64 encoded results. |
|
323 |
+ By default the configured secret key is used, but may be overridden as |
|
324 |
+ an argument. |
|
325 |
+ |
|
326 |
+ Useful for REST authentication. See http://s3.amazonaws.com/doc/s3-developer-guide/RESTAuthentication.html |
|
327 |
+ """ |
|
323 | 328 |
signature = base64.encodestring(hmac.new(Config.Config().secret_key, string_to_sign, sha1).digest()).strip() |
324 |
- #debug("signature: %s" % signature) |
|
325 | 329 |
return signature |
326 | 330 |
__all__.append("sign_string") |
327 | 331 |
|
332 |
+def sign_url(url_to_sign, expiry): |
|
333 |
+ """Sign a URL in s3://bucket/object form with the given expiry |
|
334 |
+ time. The object will be accessible via the signed URL until the |
|
335 |
+ AWS key and secret are revoked or the expiry time is reached, even |
|
336 |
+ if the object is otherwise private. |
|
337 |
+ |
|
338 |
+ See: http://s3.amazonaws.com/doc/s3-developer-guide/RESTAuthentication.html |
|
339 |
+ """ |
|
340 |
+ return sign_url_base( |
|
341 |
+ bucket = url_to_sign.bucket(), |
|
342 |
+ object = url_to_sign.object(), |
|
343 |
+ expiry = expiry |
|
344 |
+ ) |
|
345 |
+__all__.append("sign_url") |
|
346 |
+ |
|
347 |
+def sign_url_base(**parms): |
|
348 |
+ """Shared implementation of sign_url methods. Takes a hash of 'bucket', 'object' and 'expiry' as args.""" |
|
349 |
+ parms['expiry']=time_to_epoch(parms['expiry']) |
|
350 |
+ parms['access_key']=Config.Config().access_key |
|
351 |
+ debug("Expiry interpreted as epoch time %s", parms['expiry']) |
|
352 |
+ signtext = 'GET\n\n\n%(expiry)d\n/%(bucket)s/%(object)s' % parms |
|
353 |
+ debug("Signing plaintext: %r", signtext) |
|
354 |
+ parms['sig'] = urllib.quote_plus(sign_string(signtext)) |
|
355 |
+ debug("Urlencoded signature: %s", parms['sig']) |
|
356 |
+ return "http://%(bucket)s.s3.amazonaws.com/%(object)s?AWSAccessKeyId=%(access_key)s&Expires=%(expiry)d&Signature=%(sig)s" % parms |
|
357 |
+ |
|
358 |
+def time_to_epoch(t): |
|
359 |
+ """Convert time specified in a variety of forms into UNIX epoch time. |
|
360 |
+ Accepts datetime.datetime, int, anything that has a strftime() method, and standard time 9-tuples |
|
361 |
+ """ |
|
362 |
+ if isinstance(t, int): |
|
363 |
+ # Already an int |
|
364 |
+ return t |
|
365 |
+ elif isinstance(t, tuple) or isinstance(t, time.struct_time): |
|
366 |
+ # Assume it's a time 9-tuple |
|
367 |
+ return int(time.mktime(t)) |
|
368 |
+ elif hasattr(t, 'timetuple'): |
|
369 |
+ # Looks like a datetime object or compatible |
|
370 |
+ return int(time.mktime(ex.timetuple())) |
|
371 |
+ elif hasattr(t, 'strftime'): |
|
372 |
+ # Looks like the object supports standard srftime() |
|
373 |
+ return int(t.strftime('%s')) |
|
374 |
+ elif isinstance(t, str) or isinstance(t, unicode): |
|
375 |
+ # See if it's a string representation of an epoch |
|
376 |
+ try: |
|
377 |
+ return int(t) |
|
378 |
+ except ValueError: |
|
379 |
+ # Try to parse it as a timestamp string |
|
380 |
+ try: |
|
381 |
+ return time.strptime(t) |
|
382 |
+ except ValueError as ex: |
|
383 |
+ # Will fall through |
|
384 |
+ debug("Failed to parse date with strptime: %s", ex) |
|
385 |
+ pass |
|
386 |
+ raise Exceptions.ParameterError('Unable to convert %r to an epoch time. Pass an epoch time. Try `date -d \'now + 1 year\' +%%s` (shell) or time.mktime (Python).' % t) |
|
387 |
+ |
|
388 |
+ |
|
328 | 389 |
def check_bucket_name(bucket, dns_strict = True): |
329 | 390 |
if dns_strict: |
330 | 391 |
invalid = re.search("([^a-z0-9\.-])", bucket) |
331 | 392 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,63 @@ |
0 |
+# Additional magic for common web file types |
|
1 |
+ |
|
2 |
+0 string/b {\ " JSON data |
|
3 |
+!:mime application/json |
|
4 |
+0 string/b {\ } JSON data |
|
5 |
+!:mime application/json |
|
6 |
+0 string/b [ JSON data |
|
7 |
+!:mime application/json |
|
8 |
+ |
|
9 |
+0 search/4000 function |
|
10 |
+>&0 search/32/b )\ { JavaScript program |
|
11 |
+!:mime application/javascript |
|
12 |
+ |
|
13 |
+0 search/4000 @media CSS stylesheet |
|
14 |
+!:mime text/css |
|
15 |
+0 search/4000 @import CSS stylesheet |
|
16 |
+!:mime text/css |
|
17 |
+0 search/4000 @namespace CSS stylesheet |
|
18 |
+!:mime text/css |
|
19 |
+0 search/4000/b {\ background CSS stylesheet |
|
20 |
+!:mime text/css |
|
21 |
+0 search/4000/b {\ border CSS stylesheet |
|
22 |
+!:mime text/css |
|
23 |
+0 search/4000/b {\ bottom CSS stylesheet |
|
24 |
+!:mime text/css |
|
25 |
+0 search/4000/b {\ color CSS stylesheet |
|
26 |
+!:mime text/css |
|
27 |
+0 search/4000/b {\ cursor CSS stylesheet |
|
28 |
+!:mime text/css |
|
29 |
+0 search/4000/b {\ direction CSS stylesheet |
|
30 |
+!:mime text/css |
|
31 |
+0 search/4000/b {\ display CSS stylesheet |
|
32 |
+!:mime text/css |
|
33 |
+0 search/4000/b {\ float CSS stylesheet |
|
34 |
+!:mime text/css |
|
35 |
+0 search/4000/b {\ font CSS stylesheet |
|
36 |
+!:mime text/css |
|
37 |
+0 search/4000/b {\ height CSS stylesheet |
|
38 |
+!:mime text/css |
|
39 |
+0 search/4000/b {\ left CSS stylesheet |
|
40 |
+!:mime text/css |
|
41 |
+0 search/4000/b {\ line- CSS stylesheet |
|
42 |
+!:mime text/css |
|
43 |
+0 search/4000/b {\ margin CSS stylesheet |
|
44 |
+!:mime text/css |
|
45 |
+0 search/4000/b {\ padding CSS stylesheet |
|
46 |
+!:mime text/css |
|
47 |
+0 search/4000/b {\ position CSS stylesheet |
|
48 |
+!:mime text/css |
|
49 |
+0 search/4000/b {\ right CSS stylesheet |
|
50 |
+!:mime text/css |
|
51 |
+0 search/4000/b {\ text- CSS stylesheet |
|
52 |
+!:mime text/css |
|
53 |
+0 search/4000/b {\ top CSS stylesheet |
|
54 |
+!:mime text/css |
|
55 |
+0 search/4000/b {\ width CSS stylesheet |
|
56 |
+!:mime text/css |
|
57 |
+0 search/4000/b {\ visibility CSS stylesheet |
|
58 |
+!:mime text/css |
|
59 |
+0 search/4000/b {\ -moz- CSS stylesheet |
|
60 |
+!:mime text/css |
|
61 |
+0 search/4000/b {\ -webkit- CSS stylesheet |
|
62 |
+!:mime text/css |
... | ... |
@@ -23,6 +23,9 @@ import locale |
23 | 23 |
import subprocess |
24 | 24 |
import htmlentitydefs |
25 | 25 |
import socket |
26 |
+import shutil |
|
27 |
+import tempfile |
|
28 |
+import S3.Exceptions |
|
26 | 29 |
|
27 | 30 |
from copy import copy |
28 | 31 |
from optparse import OptionParser, Option, OptionValueError, IndentedHelpFormatter |
... | ... |
@@ -31,6 +34,7 @@ from distutils.spawn import find_executable |
31 | 31 |
|
32 | 32 |
def output(message): |
33 | 33 |
sys.stdout.write(message + "\n") |
34 |
+ sys.stdout.flush() |
|
34 | 35 |
|
35 | 36 |
def check_args_type(args, type, verbose_type): |
36 | 37 |
for arg in args: |
... | ... |
@@ -65,18 +69,28 @@ def subcmd_bucket_usage(s3, uri): |
65 | 65 |
|
66 | 66 |
if object.endswith('*'): |
67 | 67 |
object = object[:-1] |
68 |
- try: |
|
69 |
- response = s3.bucket_list(bucket, prefix = object, recursive = True) |
|
70 |
- except S3Error, e: |
|
71 |
- if S3.codes.has_key(e.info["Code"]): |
|
72 |
- error(S3.codes[e.info["Code"]] % bucket) |
|
73 |
- return |
|
74 |
- else: |
|
75 |
- raise |
|
68 |
+ |
|
76 | 69 |
bucket_size = 0 |
77 |
- for object in response["list"]: |
|
78 |
- size, size_coeff = formatSize(object["Size"], False) |
|
79 |
- bucket_size += size |
|
70 |
+ # iterate and store directories to traverse, while summing objects: |
|
71 |
+ dirs = [object] |
|
72 |
+ while dirs: |
|
73 |
+ try: |
|
74 |
+ response = s3.bucket_list(bucket, prefix=dirs.pop()) |
|
75 |
+ except S3Error, e: |
|
76 |
+ if S3.codes.has_key(e.info["Code"]): |
|
77 |
+ error(S3.codes[e.info["Code"]] % bucket) |
|
78 |
+ return |
|
79 |
+ else: |
|
80 |
+ raise |
|
81 |
+ |
|
82 |
+ # objects in the current scope: |
|
83 |
+ for obj in response["list"]: |
|
84 |
+ bucket_size += int(obj["Size"]) |
|
85 |
+ |
|
86 |
+ # directories found in current scope: |
|
87 |
+ for obj in response["common_prefixes"]: |
|
88 |
+ dirs.append(obj["Prefix"]) |
|
89 |
+ |
|
80 | 90 |
total_size, size_coeff = formatSize(bucket_size, Config().human_readable_sizes) |
81 | 91 |
total_size_str = str(total_size) + size_coeff |
82 | 92 |
output(u"%s %s" % (total_size_str.ljust(8), uri)) |
... | ... |
@@ -266,7 +280,13 @@ def cmd_object_put(args): |
266 | 266 |
info(u"Summary: %d local files to upload" % local_count) |
267 | 267 |
|
268 | 268 |
if local_count > 0: |
269 |
- if not destination_base.endswith("/"): |
|
269 |
+ if not single_file_local: |
|
270 |
+ for key in local_list: |
|
271 |
+ if key == "-": |
|
272 |
+ raise ParameterError("Cannot specify multiple local files if uploading from '-' (ie stdin)") |
|
273 |
+ elif single_file_local and local_list.keys()[0] == "-" and destination_base.endswith("/"): |
|
274 |
+ raise ParameterError("Destination S3 URI must not end with '/' when uploading from stdin.") |
|
275 |
+ elif not destination_base.endswith("/"): |
|
270 | 276 |
if not single_file_local: |
271 | 277 |
raise ParameterError("Destination S3 URI must end with '/' (ie must refer to a directory on the remote side).") |
272 | 278 |
local_list[local_list.keys()[0]]['remote_uri'] = unicodise(destination_base) |
... | ... |
@@ -278,7 +298,11 @@ def cmd_object_put(args): |
278 | 278 |
for key in exclude_list: |
279 | 279 |
output(u"exclude: %s" % unicodise(key)) |
280 | 280 |
for key in local_list: |
281 |
- output(u"upload: %s -> %s" % (local_list[key]['full_name_unicode'], local_list[key]['remote_uri'])) |
|
281 |
+ if key != "-": |
|
282 |
+ nicekey = local_list[key]['full_name_unicode'] |
|
283 |
+ else: |
|
284 |
+ nicekey = "<stdin>" |
|
285 |
+ output(u"upload: %s -> %s" % (nicekey, local_list[key]['remote_uri'])) |
|
282 | 286 |
|
283 | 287 |
warning(u"Exitting now because of --dry-run") |
284 | 288 |
return |
... | ... |
@@ -455,6 +479,9 @@ def cmd_object_get(args): |
455 | 455 |
speed_fmt = formatSize(response["speed"], human_readable = True, floating_point = True) |
456 | 456 |
output(u"File %s saved as '%s' (%d bytes in %0.1f seconds, %0.2f %sB/s)" % |
457 | 457 |
(uri, destination, response["size"], response["elapsed"], speed_fmt[0], speed_fmt[1])) |
458 |
+ if Config().delete_after_fetch: |
|
459 |
+ s3.object_delete(uri) |
|
460 |
+ output(u"File %s removed after fetch" % (uri)) |
|
458 | 461 |
|
459 | 462 |
def cmd_object_del(args): |
460 | 463 |
for uri_str in args: |
... | ... |
@@ -589,6 +616,17 @@ def cmd_info(args): |
589 | 589 |
raise |
590 | 590 |
|
591 | 591 |
def cmd_sync_remote2remote(args): |
592 |
+ def _do_deletes(s3, dst_list): |
|
593 |
+ # Delete items in destination that are not in source |
|
594 |
+ if cfg.dry_run: |
|
595 |
+ for key in dst_list: |
|
596 |
+ output(u"delete: %s" % dst_list[key]['object_uri_str']) |
|
597 |
+ else: |
|
598 |
+ for key in dst_list: |
|
599 |
+ uri = S3Uri(dst_list[key]['object_uri_str']) |
|
600 |
+ s3.object_delete(uri) |
|
601 |
+ output(u"deleted: '%s'" % uri) |
|
602 |
+ |
|
592 | 603 |
s3 = S3(Config()) |
593 | 604 |
|
594 | 605 |
# Normalise s3://uri (e.g. assert trailing slash) |
... | ... |
@@ -604,9 +642,10 @@ def cmd_sync_remote2remote(args): |
604 | 604 |
|
605 | 605 |
src_list, exclude_list = filter_exclude_include(src_list) |
606 | 606 |
|
607 |
- src_list, dst_list, existing_list = compare_filelists(src_list, dst_list, src_remote = True, dst_remote = True) |
|
607 |
+ src_list, dst_list, update_list, copy_pairs = compare_filelists(src_list, dst_list, src_remote = True, dst_remote = True, delay_updates = cfg.delay_updates) |
|
608 | 608 |
|
609 | 609 |
src_count = len(src_list) |
610 |
+ update_count = len(update_list) |
|
610 | 611 |
dst_count = len(dst_list) |
611 | 612 |
|
612 | 613 |
print(u"Summary: %d source files to copy, %d files at destination to delete" % (src_count, dst_count)) |
... | ... |
@@ -627,34 +666,39 @@ def cmd_sync_remote2remote(args): |
627 | 627 |
warning(u"Exitting now because of --dry-run") |
628 | 628 |
return |
629 | 629 |
|
630 |
+ # if there are copy pairs, we can't do delete_before, on the chance |
|
631 |
+ # we need one of the to-be-deleted files as a copy source. |
|
632 |
+ if len(copy_pairs) > 0: |
|
633 |
+ cfg.delete_after = True |
|
634 |
+ |
|
630 | 635 |
# Delete items in destination that are not in source |
631 |
- if cfg.delete_removed: |
|
632 |
- if cfg.dry_run: |
|
633 |
- for key in dst_list: |
|
634 |
- output(u"delete: %s" % dst_list[key]['object_uri_str']) |
|
635 |
- else: |
|
636 |
- for key in dst_list: |
|
637 |
- uri = S3Uri(dst_list[key]['object_uri_str']) |
|
638 |
- s3.object_delete(uri) |
|
639 |
- output(u"deleted: '%s'" % uri) |
|
636 |
+ if cfg.delete_removed and not cfg.delete_after: |
|
637 |
+ _do_deletes(s3, dst_list) |
|
638 |
+ |
|
639 |
+ def _upload(src_list, seq, src_count): |
|
640 |
+ file_list = src_list.keys() |
|
641 |
+ file_list.sort() |
|
642 |
+ for file in file_list: |
|
643 |
+ seq += 1 |
|
644 |
+ item = src_list[file] |
|
645 |
+ src_uri = S3Uri(item['object_uri_str']) |
|
646 |
+ dst_uri = S3Uri(item['target_uri']) |
|
647 |
+ seq_label = "[%d of %d]" % (seq, src_count) |
|
648 |
+ extra_headers = copy(cfg.extra_headers) |
|
649 |
+ try: |
|
650 |
+ response = s3.object_copy(src_uri, dst_uri, extra_headers) |
|
651 |
+ output("File %(src)s copied to %(dst)s" % { "src" : src_uri, "dst" : dst_uri }) |
|
652 |
+ except S3Error, e: |
|
653 |
+ error("File %(src)s could not be copied: %(e)s" % { "src" : src_uri, "e" : e }) |
|
654 |
+ return seq |
|
640 | 655 |
|
641 | 656 |
# Perform the synchronization of files |
642 | 657 |
timestamp_start = time.time() |
643 | 658 |
seq = 0 |
644 |
- file_list = src_list.keys() |
|
645 |
- file_list.sort() |
|
646 |
- for file in file_list: |
|
647 |
- seq += 1 |
|
648 |
- item = src_list[file] |
|
649 |
- src_uri = S3Uri(item['object_uri_str']) |
|
650 |
- dst_uri = S3Uri(item['target_uri']) |
|
651 |
- seq_label = "[%d of %d]" % (seq, src_count) |
|
652 |
- extra_headers = copy(cfg.extra_headers) |
|
653 |
- try: |
|
654 |
- response = s3.object_copy(src_uri, dst_uri, extra_headers) |
|
655 |
- output("File %(src)s copied to %(dst)s" % { "src" : src_uri, "dst" : dst_uri }) |
|
656 |
- except S3Error, e: |
|
657 |
- error("File %(src)s could not be copied: %(e)s" % { "src" : src_uri, "e" : e }) |
|
659 |
+ seq = _upload(src_list, seq, src_count + update_count) |
|
660 |
+ seq = _upload(update_list, seq, src_count + update_count) |
|
661 |
+ n_copied, bytes_saved = remote_copy(s3, copy_pairs, destination_base) |
|
662 |
+ |
|
658 | 663 |
total_elapsed = time.time() - timestamp_start |
659 | 664 |
outstr = "Done. Copied %d files in %0.1f seconds, %0.2f files/s" % (seq, total_elapsed, seq/total_elapsed) |
660 | 665 |
if seq > 0: |
... | ... |
@@ -662,13 +706,15 @@ def cmd_sync_remote2remote(args): |
662 | 662 |
else: |
663 | 663 |
info(outstr) |
664 | 664 |
|
665 |
+ # Delete items in destination that are not in source |
|
666 |
+ if cfg.delete_removed and cfg.delete_after: |
|
667 |
+ _do_deletes(s3, dst_list) |
|
668 |
+ |
|
665 | 669 |
def cmd_sync_remote2local(args): |
666 |
- def _parse_attrs_header(attrs_header): |
|
667 |
- attrs = {} |
|
668 |
- for attr in attrs_header.split("/"): |
|
669 |
- key, val = attr.split(":") |
|
670 |
- attrs[key] = val |
|
671 |
- return attrs |
|
670 |
+ def _do_deletes(local_list): |
|
671 |
+ for key in local_list: |
|
672 |
+ os.unlink(local_list[key]['full_name']) |
|
673 |
+ output(u"deleted: %s" % local_list[key]['full_name_unicode']) |
|
672 | 674 |
|
673 | 675 |
s3 = S3(Config()) |
674 | 676 |
|
... | ... |
@@ -683,27 +729,33 @@ def cmd_sync_remote2local(args): |
683 | 683 |
|
684 | 684 |
remote_list, exclude_list = filter_exclude_include(remote_list) |
685 | 685 |
|
686 |
- remote_list, local_list, existing_list = compare_filelists(remote_list, local_list, src_remote = True, dst_remote = False) |
|
686 |
+ remote_list, local_list, update_list, copy_pairs = compare_filelists(remote_list, local_list, src_remote = True, dst_remote = False, delay_updates = cfg.delay_updates) |
|
687 | 687 |
|
688 | 688 |
local_count = len(local_list) |
689 | 689 |
remote_count = len(remote_list) |
690 |
+ update_count = len(update_list) |
|
691 |
+ copy_pairs_count = len(copy_pairs) |
|
690 | 692 |
|
691 |
- info(u"Summary: %d remote files to download, %d local files to delete" % (remote_count, local_count)) |
|
692 |
- |
|
693 |
- if not os.path.isdir(destination_base): |
|
694 |
- ## We were either given a file name (existing or not) or want STDOUT |
|
695 |
- if remote_count > 1: |
|
696 |
- raise ParameterError("Destination must be a directory when downloading multiple sources.") |
|
697 |
- remote_list[remote_list.keys()[0]]['local_filename'] = deunicodise(destination_base) |
|
698 |
- else: |
|
699 |
- if destination_base[-1] != os.path.sep: |
|
700 |
- destination_base += os.path.sep |
|
701 |
- for key in remote_list: |
|
702 |
- local_filename = destination_base + key |
|
703 |
- if os.path.sep != "/": |
|
704 |
- local_filename = os.path.sep.join(local_filename.split("/")) |
|
705 |
- remote_list[key]['local_filename'] = deunicodise(local_filename) |
|
693 |
+ info(u"Summary: %d remote files to download, %d local files to delete, %d local files to hardlink" % (remote_count + update_count, local_count, copy_pairs_count)) |
|
706 | 694 |
|
695 |
+ def _set_local_filename(remote_list, destination_base): |
|
696 |
+ if not os.path.isdir(destination_base): |
|
697 |
+ ## We were either given a file name (existing or not) or want STDOUT |
|
698 |
+ if len(remote_list) > 1: |
|
699 |
+ raise ParameterError("Destination must be a directory when downloading multiple sources.") |
|
700 |
+ remote_list[remote_list.keys()[0]]['local_filename'] = deunicodise(destination_base) |
|
701 |
+ else: |
|
702 |
+ if destination_base[-1] != os.path.sep: |
|
703 |
+ destination_base += os.path.sep |
|
704 |
+ for key in remote_list: |
|
705 |
+ local_filename = destination_base + key |
|
706 |
+ if os.path.sep != "/": |
|
707 |
+ local_filename = os.path.sep.join(local_filename.split("/")) |
|
708 |
+ remote_list[key]['local_filename'] = deunicodise(local_filename) |
|
709 |
+ |
|
710 |
+ _set_local_filename(remote_list, destination_base) |
|
711 |
+ _set_local_filename(update_list, destination_base) |
|
712 |
+ |
|
707 | 713 |
if cfg.dry_run: |
708 | 714 |
for key in exclude_list: |
709 | 715 |
output(u"exclude: %s" % unicodise(key)) |
... | ... |
@@ -712,94 +764,111 @@ def cmd_sync_remote2local(args): |
712 | 712 |
output(u"delete: %s" % local_list[key]['full_name_unicode']) |
713 | 713 |
for key in remote_list: |
714 | 714 |
output(u"download: %s -> %s" % (remote_list[key]['object_uri_str'], remote_list[key]['local_filename'])) |
715 |
+ for key in update_list: |
|
716 |
+ output(u"download: %s -> %s" % (update_list[key]['object_uri_str'], update_list[key]['local_filename'])) |
|
715 | 717 |
|
716 | 718 |
warning(u"Exitting now because of --dry-run") |
717 | 719 |
return |
718 | 720 |
|
719 |
- if cfg.delete_removed: |
|
720 |
- for key in local_list: |
|
721 |
- os.unlink(local_list[key]['full_name']) |
|
722 |
- output(u"deleted: %s" % local_list[key]['full_name_unicode']) |
|
723 |
- |
|
724 |
- total_size = 0 |
|
725 |
- total_elapsed = 0.0 |
|
726 |
- timestamp_start = time.time() |
|
727 |
- seq = 0 |
|
728 |
- dir_cache = {} |
|
729 |
- file_list = remote_list.keys() |
|
730 |
- file_list.sort() |
|
731 |
- for file in file_list: |
|
732 |
- seq += 1 |
|
733 |
- item = remote_list[file] |
|
734 |
- uri = S3Uri(item['object_uri_str']) |
|
735 |
- dst_file = item['local_filename'] |
|
736 |
- seq_label = "[%d of %d]" % (seq, remote_count) |
|
737 |
- try: |
|
738 |
- dst_dir = os.path.dirname(dst_file) |
|
739 |
- if not dir_cache.has_key(dst_dir): |
|
740 |
- dir_cache[dst_dir] = Utils.mkdir_with_parents(dst_dir) |
|
741 |
- if dir_cache[dst_dir] == False: |
|
742 |
- warning(u"%s: destination directory not writable: %s" % (file, dst_dir)) |
|
743 |
- continue |
|
721 |
+ # if there are copy pairs, we can't do delete_before, on the chance |
|
722 |
+ # we need one of the to-be-deleted files as a copy source. |
|
723 |
+ if len(copy_pairs) > 0: |
|
724 |
+ cfg.delete_after = True |
|
725 |
+ |
|
726 |
+ if cfg.delete_removed and not cfg.delete_after: |
|
727 |
+ _do_deletes(local_list) |
|
728 |
+ |
|
729 |
+ def _download(remote_list, seq, total, total_size, dir_cache): |
|
730 |
+ file_list = remote_list.keys() |
|
731 |
+ file_list.sort() |
|
732 |
+ for file in file_list: |
|
733 |
+ seq += 1 |
|
734 |
+ item = remote_list[file] |
|
735 |
+ uri = S3Uri(item['object_uri_str']) |
|
736 |
+ dst_file = item['local_filename'] |
|
737 |
+ seq_label = "[%d of %d]" % (seq, total) |
|
744 | 738 |
try: |
745 |
- open_flags = os.O_CREAT |
|
746 |
- open_flags |= os.O_TRUNC |
|
747 |
- # open_flags |= os.O_EXCL |
|
748 |
- |
|
749 |
- debug(u"dst_file=%s" % unicodise(dst_file)) |
|
750 |
- # This will have failed should the file exist |
|
751 |
- os.close(os.open(dst_file, open_flags)) |
|
752 |
- # Yeah I know there is a race condition here. Sadly I don't know how to open() in exclusive mode. |
|
753 |
- dst_stream = open(dst_file, "wb") |
|
754 |
- response = s3.object_get(uri, dst_stream, extra_label = seq_label) |
|
755 |
- dst_stream.close() |
|
756 |
- if response['headers'].has_key('x-amz-meta-s3cmd-attrs') and cfg.preserve_attrs: |
|
757 |
- attrs = _parse_attrs_header(response['headers']['x-amz-meta-s3cmd-attrs']) |
|
758 |
- if attrs.has_key('mode'): |
|
759 |
- os.chmod(dst_file, int(attrs['mode'])) |
|
760 |
- if attrs.has_key('mtime') or attrs.has_key('atime'): |
|
761 |
- mtime = attrs.has_key('mtime') and int(attrs['mtime']) or int(time.time()) |
|
762 |
- atime = attrs.has_key('atime') and int(attrs['atime']) or int(time.time()) |
|
763 |
- os.utime(dst_file, (atime, mtime)) |
|
764 |
- ## FIXME: uid/gid / uname/gname handling comes here! TODO |
|
765 |
- except OSError, e: |
|
766 |
- try: dst_stream.close() |
|
767 |
- except: pass |
|
768 |
- if e.errno == errno.EEXIST: |
|
769 |
- warning(u"%s exists - not overwriting" % (dst_file)) |
|
770 |
- continue |
|
771 |
- if e.errno in (errno.EPERM, errno.EACCES): |
|
772 |
- warning(u"%s not writable: %s" % (dst_file, e.strerror)) |
|
739 |
+ dst_dir = os.path.dirname(dst_file) |
|
740 |
+ if not dir_cache.has_key(dst_dir): |
|
741 |
+ dir_cache[dst_dir] = Utils.mkdir_with_parents(dst_dir) |
|
742 |
+ if dir_cache[dst_dir] == False: |
|
743 |
+ warning(u"%s: destination directory not writable: %s" % (file, dst_dir)) |
|
773 | 744 |
continue |
774 |
- if e.errno == errno.EISDIR: |
|
775 |
- warning(u"%s is a directory - skipping over" % dst_file) |
|
745 |
+ try: |
|
746 |
+ debug(u"dst_file=%s" % unicodise(dst_file)) |
|
747 |
+ # create temporary files (of type .s3cmd.XXXX.tmp) in the same directory |
|
748 |
+ # for downloading and then rename once downloaded |
|
749 |
+ chkptfd, chkptfname = tempfile.mkstemp(".tmp",".s3cmd.",os.path.dirname(dst_file)) |
|
750 |
+ debug(u"created chkptfname=%s" % unicodise(chkptfname)) |
|
751 |
+ dst_stream = os.fdopen(chkptfd, "wb") |
|
752 |
+ response = s3.object_get(uri, dst_stream, extra_label = seq_label) |
|
753 |
+ dst_stream.close() |
|
754 |
+ # download completed, rename the file to destination |
|
755 |
+ os.rename(chkptfname, dst_file) |
|
756 |
+ debug(u"renamed chkptfname=%s to dst_file=%s" % (unicodise(chkptfname), unicodise(dst_file))) |
|
757 |
+ if response['headers'].has_key('x-amz-meta-s3cmd-attrs') and cfg.preserve_attrs: |
|
758 |
+ attrs = parse_attrs_header(response['headers']['x-amz-meta-s3cmd-attrs']) |
|
759 |
+ if attrs.has_key('mode'): |
|
760 |
+ os.chmod(dst_file, int(attrs['mode'])) |
|
761 |
+ if attrs.has_key('mtime') or attrs.has_key('atime'): |
|
762 |
+ mtime = attrs.has_key('mtime') and int(attrs['mtime']) or int(time.time()) |
|
763 |
+ atime = attrs.has_key('atime') and int(attrs['atime']) or int(time.time()) |
|
764 |
+ os.utime(dst_file, (atime, mtime)) |
|
765 |
+ ## FIXME: uid/gid / uname/gname handling comes here! TODO |
|
766 |
+ except OSError, e: |
|
767 |
+ try: dst_stream.close() |
|
768 |
+ except: pass |
|
769 |
+ if e.errno == errno.EEXIST: |
|
770 |
+ warning(u"%s exists - not overwriting" % (dst_file)) |
|
771 |
+ continue |
|
772 |
+ if e.errno in (errno.EPERM, errno.EACCES): |
|
773 |
+ warning(u"%s not writable: %s" % (dst_file, e.strerror)) |
|
774 |
+ continue |
|
775 |
+ if e.errno == errno.EISDIR: |
|
776 |
+ warning(u"%s is a directory - skipping over" % dst_file) |
|
777 |
+ continue |
|
778 |
+ raise e |
|
779 |
+ except KeyboardInterrupt: |
|
780 |
+ try: dst_stream.close() |
|
781 |
+ except: pass |
|
782 |
+ warning(u"Exiting after keyboard interrupt") |
|
783 |
+ return |
|
784 |
+ except Exception, e: |
|
785 |
+ try: dst_stream.close() |
|
786 |
+ except: pass |
|
787 |
+ error(u"%s: %s" % (file, e)) |
|
776 | 788 |
continue |
777 |
- raise e |
|
778 |
- except KeyboardInterrupt: |
|
779 |
- try: dst_stream.close() |
|
780 |
- except: pass |
|
781 |
- warning(u"Exiting after keyboard interrupt") |
|
782 |
- return |
|
783 |
- except Exception, e: |
|
789 |
+ # We have to keep repeating this call because |
|
790 |
+ # Python 2.4 doesn't support try/except/finally |
|
791 |
+ # construction :-( |
|
784 | 792 |
try: dst_stream.close() |
785 | 793 |
except: pass |
786 |
- error(u"%s: %s" % (file, e)) |
|
794 |
+ except S3DownloadError, e: |
|
795 |
+ error(u"%s: download failed too many times. Skipping that file." % file) |
|
787 | 796 |
continue |
788 |
- # We have to keep repeating this call because |
|
789 |
- # Python 2.4 doesn't support try/except/finally |
|
790 |
- # construction :-( |
|
791 |
- try: dst_stream.close() |
|
792 |
- except: pass |
|
793 |
- except S3DownloadError, e: |
|
794 |
- error(u"%s: download failed too many times. Skipping that file." % file) |
|
795 |
- continue |
|
796 |
- speed_fmt = formatSize(response["speed"], human_readable = True, floating_point = True) |
|
797 |
- if not Config().progress_meter: |
|
798 |
- output(u"File '%s' stored as '%s' (%d bytes in %0.1f seconds, %0.2f %sB/s) %s" % |
|
799 |
- (uri, unicodise(dst_file), response["size"], response["elapsed"], speed_fmt[0], speed_fmt[1], |
|
800 |
- seq_label)) |
|
801 |
- total_size += response["size"] |
|
797 |
+ speed_fmt = formatSize(response["speed"], human_readable = True, floating_point = True) |
|
798 |
+ if not Config().progress_meter: |
|
799 |
+ output(u"File '%s' stored as '%s' (%d bytes in %0.1f seconds, %0.2f %sB/s) %s" % |
|
800 |
+ (uri, unicodise(dst_file), response["size"], response["elapsed"], speed_fmt[0], speed_fmt[1], |
|
801 |
+ seq_label)) |
|
802 |
+ total_size += response["size"] |
|
803 |
+ if Config().delete_after_fetch: |
|
804 |
+ s3.object_delete(uri) |
|
805 |
+ output(u"File '%s' removed after syncing" % (uri)) |
|
806 |
+ return seq, total_size |
|
807 |
+ |
|
808 |
+ total_size = 0 |
|
809 |
+ total_elapsed = 0.0 |
|
810 |
+ timestamp_start = time.time() |
|
811 |
+ dir_cache = {} |
|
812 |
+ seq = 0 |
|
813 |
+ seq, total_size = _download(remote_list, seq, remote_count + update_count, total_size, dir_cache) |
|
814 |
+ seq, total_size = _download(update_list, seq, remote_count + update_count, total_size, dir_cache) |
|
802 | 815 |
|
816 |
+ failed_hardlink_list = local_hardlink(copy_pairs, destination_base) |
|
817 |
+ _set_local_filename(failed_hardlink_list, destination_base) |
|
818 |
+ seq, total_size = _download(failed_hardlink_list, seq, len(failed_hardlink_list) + remote_count + update_count, total_size, dir_cache) |
|
819 |
+ |
|
803 | 820 |
total_elapsed = time.time() - timestamp_start |
804 | 821 |
speed_fmt = formatSize(total_size/total_elapsed, human_readable = True, floating_point = True) |
805 | 822 |
|
... | ... |
@@ -811,158 +880,276 @@ def cmd_sync_remote2local(args): |
811 | 811 |
else: |
812 | 812 |
info(outstr) |
813 | 813 |
|
814 |
+ if cfg.delete_removed and cfg.delete_after: |
|
815 |
+ _do_deletes(local_list) |
|
816 |
+ |
|
817 |
+def local_hardlink(copy_pairs, destination_base): |
|
818 |
+ failed_hardlink_list = SortedDict() |
|
819 |
+ for (src_obj, dst1, relative_file) in copy_pairs: |
|
820 |
+ try: |
|
821 |
+ os.link(destination_base + dst1, destination_base + relative_file) |
|
822 |
+ debug(u"Hardlinking %s to %s" % (destination_base + dst1, destination_base + relative_file)) |
|
823 |
+ except (IOError, OSError): |
|
824 |
+ try: |
|
825 |
+ shutil.copy2(destination_base + dst1, destination_base + relative_file) |
|
826 |
+ debug(u"Hardlinking unavailable, copying %s to %s" % (destination_base + dst1, destination_base + relative_file)) |
|
827 |
+ except IOError, e: |
|
828 |
+ warning(u'Unable to hardlink or copy files %s -> %s: %s' % (destination_base + dst1, destination_base + relative_file, e)) |
|
829 |
+ failed_hardlink_list[relative_file] = src_obj |
|
830 |
+ return failed_hardlink_list |
|
831 |
+ |
|
832 |
+def remote_copy(s3, copy_pairs, destination_base): |
|
833 |
+ saved_bytes = 0 |
|
834 |
+ for (src_obj, dst1, dst2) in copy_pairs: |
|
835 |
+ debug(u"Remote Copying from %s to %s" % (dst1, dst2)) |
|
836 |
+ dst1_uri = S3Uri(destination_base + dst1) |
|
837 |
+ dst2_uri = S3Uri(destination_base + dst2) |
|
838 |
+ extra_headers = copy(cfg.extra_headers) |
|
839 |
+ try: |
|
840 |
+ s3.object_copy(dst1_uri, dst2_uri, extra_headers) |
|
841 |
+ info = s3.object_info(dst2_uri) |
|
842 |
+ saved_bytes = saved_bytes + int(info['headers']['content-length']) |
|
843 |
+ output(u"remote copy: %s -> %s" % (dst1, dst2)) |
|
844 |
+ except: |
|
845 |
+ raise |
|
846 |
+ return (len(copy_pairs), saved_bytes) |
|
847 |
+ |
|
848 |
+ |
|
814 | 849 |
def cmd_sync_local2remote(args): |
815 |
- def _build_attr_header(src): |
|
850 |
+ def _build_attr_header(local_list, src): |
|
816 | 851 |
import pwd, grp |
817 | 852 |
attrs = {} |
818 |
- src = deunicodise(src) |
|
819 |
- try: |
|
820 |
- st = os.stat_result(os.stat(src)) |
|
821 |
- except OSError, e: |
|
822 |
- raise InvalidFileError(u"%s: %s" % (unicodise(src), e.strerror)) |
|
823 | 853 |
for attr in cfg.preserve_attrs_list: |
824 | 854 |
if attr == 'uname': |
825 | 855 |
try: |
826 |
- val = pwd.getpwuid(st.st_uid).pw_name |
|
856 |
+ val = pwd.getpwuid(local_list[src]['uid']).pw_name |
|
827 | 857 |
except KeyError: |
828 | 858 |
attr = "uid" |
829 |
- val = st.st_uid |
|
830 |
- warning(u"%s: Owner username not known. Storing UID=%d instead." % (unicodise(src), val)) |
|
859 |
+ val = local_list[src].get('uid') |
|
860 |
+ warning(u"%s: Owner username not known. Storing UID=%d instead." % (src, val)) |
|
831 | 861 |
elif attr == 'gname': |
832 | 862 |
try: |
833 |
- val = grp.getgrgid(st.st_gid).gr_name |
|
863 |
+ val = grp.getgrgid(local_list[src].get('gid')).gr_name |
|
834 | 864 |
except KeyError: |
835 | 865 |
attr = "gid" |
836 |
- val = st.st_gid |
|
837 |
- warning(u"%s: Owner groupname not known. Storing GID=%d instead." % (unicodise(src), val)) |
|
866 |
+ val = local_list[src].get('gid') |
|
867 |
+ warning(u"%s: Owner groupname not known. Storing GID=%d instead." % (src, val)) |
|
868 |
+ elif attr == 'md5': |
|
869 |
+ try: |
|
870 |
+ val = local_list.get_md5(src) |
|
871 |
+ except IOError: |
|
872 |
+ val = None |
|
838 | 873 |
else: |
839 |
- val = getattr(st, 'st_' + attr) |
|
874 |
+ val = getattr(local_list[src]['sr'], 'st_' + attr) |
|
840 | 875 |
attrs[attr] = val |
876 |
+ |
|
877 |
+ if 'md5' in attrs and attrs['md5'] is None: |
|
878 |
+ del attrs['md5'] |
|
879 |
+ |
|
841 | 880 |
result = "" |
842 | 881 |
for k in attrs: result += "%s:%s/" % (k, attrs[k]) |
843 | 882 |
return { 'x-amz-meta-s3cmd-attrs' : result[:-1] } |
844 | 883 |
|
845 |
- s3 = S3(cfg) |
|
884 |
+ def _do_deletes(s3, remote_list): |
|
885 |
+ for key in remote_list: |
|
886 |
+ uri = S3Uri(remote_list[key]['object_uri_str']) |
|
887 |
+ s3.object_delete(uri) |
|
888 |
+ output(u"deleted: '%s'" % uri) |
|
846 | 889 |
|
847 |
- if cfg.encrypt: |
|
848 |
- error(u"S3cmd 'sync' doesn't yet support GPG encryption, sorry.") |
|
849 |
- error(u"Either use unconditional 's3cmd put --recursive'") |
|
850 |
- error(u"or disable encryption with --no-encrypt parameter.") |
|
851 |
- sys.exit(1) |
|
890 |
+ def _single_process(local_list): |
|
891 |
+ for dest in destinations: |
|
892 |
+ ## Normalize URI to convert s3://bkt to s3://bkt/ (trailing slash) |
|
893 |
+ destination_base_uri = S3Uri(dest) |
|
894 |
+ if destination_base_uri.type != 's3': |
|
895 |
+ raise ParameterError("Destination must be S3Uri. Got: %s" % destination_base_uri) |
|
896 |
+ destination_base = str(destination_base_uri) |
|
897 |
+ _child(destination_base, local_list) |
|
898 |
+ return destination_base_uri |
|
899 |
+ |
|
900 |
+ def _parent(): |
|
901 |
+ # Now that we've done all the disk I/O to look at the local file system and |
|
902 |
+ # calculate the md5 for each file, fork for each destination to upload to them separately |
|
903 |
+ # and in parallel |
|
904 |
+ child_pids = [] |
|
905 |
+ |
|
906 |
+ for dest in destinations: |
|
907 |
+ ## Normalize URI to convert s3://bkt to s3://bkt/ (trailing slash) |
|
908 |
+ destination_base_uri = S3Uri(dest) |
|
909 |
+ if destination_base_uri.type != 's3': |
|
910 |
+ raise ParameterError("Destination must be S3Uri. Got: %s" % destination_base_uri) |
|
911 |
+ destination_base = str(destination_base_uri) |
|
912 |
+ child_pid = os.fork() |
|
913 |
+ if child_pid == 0: |
|
914 |
+ _child(destination_base, local_list) |
|
915 |
+ os._exit(0) |
|
916 |
+ else: |
|
917 |
+ child_pids.append(child_pid) |
|
852 | 918 |
|
853 |
- ## Normalize URI to convert s3://bkt to s3://bkt/ (trailing slash) |
|
854 |
- destination_base_uri = S3Uri(args[-1]) |
|
855 |
- if destination_base_uri.type != 's3': |
|
856 |
- raise ParameterError("Destination must be S3Uri. Got: %s" % destination_base_uri) |
|
857 |
- destination_base = str(destination_base_uri) |
|
919 |
+ while len(child_pids): |
|
920 |
+ (pid, status) = os.wait() |
|
921 |
+ child_pids.remove(pid) |
|
858 | 922 |
|
859 |
- local_list, single_file_local = fetch_local_list(args[:-1], recursive = True) |
|
860 |
- remote_list = fetch_remote_list(destination_base, recursive = True, require_attribs = True) |
|
923 |
+ return |
|
861 | 924 |
|
862 |
- local_count = len(local_list) |
|
863 |
- remote_count = len(remote_list) |
|
925 |
+ def _child(destination_base, local_list): |
|
926 |
+ def _set_remote_uri(local_list, destination_base, single_file_local): |
|
927 |
+ if len(local_list) > 0: |
|
928 |
+ ## Populate 'remote_uri' only if we've got something to upload |
|
929 |
+ if not destination_base.endswith("/"): |
|
930 |
+ if not single_file_local: |
|
931 |
+ raise ParameterError("Destination S3 URI must end with '/' (ie must refer to a directory on the remote side).") |
|
932 |
+ local_list[local_list.keys()[0]]['remote_uri'] = unicodise(destination_base) |
|
933 |
+ else: |
|
934 |
+ for key in local_list: |
|
935 |
+ local_list[key]['remote_uri'] = unicodise(destination_base + key) |
|
936 |
+ |
|
937 |
+ def _upload(local_list, seq, total, total_size): |
|
938 |
+ file_list = local_list.keys() |
|
939 |
+ file_list.sort() |
|
940 |
+ for file in file_list: |
|
941 |
+ seq += 1 |
|
942 |
+ item = local_list[file] |
|
943 |
+ src = item['full_name'] |
|
944 |
+ uri = S3Uri(item['remote_uri']) |
|
945 |
+ seq_label = "[%d of %d]" % (seq, total) |
|
946 |
+ extra_headers = copy(cfg.extra_headers) |
|
947 |
+ try: |
|
948 |
+ if cfg.preserve_attrs: |
|
949 |
+ attr_header = _build_attr_header(local_list, file) |
|
950 |
+ debug(u"attr_header: %s" % attr_header) |
|
951 |
+ extra_headers.update(attr_header) |
|
952 |
+ response = s3.object_put(src, uri, extra_headers, extra_label = seq_label) |
|
953 |
+ except InvalidFileError, e: |
|
954 |
+ warning(u"File can not be uploaded: %s" % e) |
|
955 |
+ continue |
|
956 |
+ except S3UploadError, e: |
|
957 |
+ error(u"%s: upload failed too many times. Skipping that file." % item['full_name_unicode']) |
|
958 |
+ continue |
|
959 |
+ speed_fmt = formatSize(response["speed"], human_readable = True, floating_point = True) |
|
960 |
+ if not cfg.progress_meter: |
|
961 |
+ output(u"File '%s' stored as '%s' (%d bytes in %0.1f seconds, %0.2f %sB/s) %s" % |
|
962 |
+ (item['full_name_unicode'], uri, response["size"], response["elapsed"], |
|
963 |
+ speed_fmt[0], speed_fmt[1], seq_label)) |
|
964 |
+ total_size += response["size"] |
|
965 |
+ uploaded_objects_list.append(uri.object()) |
|
966 |
+ return seq, total_size |
|
864 | 967 |
|
865 |
- info(u"Found %d local files, %d remote files" % (local_count, remote_count)) |
|
968 |
+ remote_list = fetch_remote_list(destination_base, recursive = True, require_attribs = True) |
|
866 | 969 |
|
867 |
- local_list, exclude_list = filter_exclude_include(local_list) |
|
970 |
+ local_count = len(local_list) |
|
971 |
+ remote_count = len(remote_list) |
|
868 | 972 |
|
869 |
- if single_file_local and len(local_list) == 1 and len(remote_list) == 1: |
|
870 |
- ## Make remote_key same as local_key for comparison if we're dealing with only one file |
|
871 |
- remote_list_entry = remote_list[remote_list.keys()[0]] |
|
872 |
- # Flush remote_list, by the way |
|
873 |
- remote_list = { local_list.keys()[0] : remote_list_entry } |
|
973 |
+ info(u"Found %d local files, %d remote files" % (local_count, remote_count)) |
|
874 | 974 |
|
875 |
- local_list, remote_list, existing_list = compare_filelists(local_list, remote_list, src_remote = False, dst_remote = True) |
|
975 |
+ local_list, exclude_list = filter_exclude_include(local_list) |
|
876 | 976 |
|
877 |
- local_count = len(local_list) |
|
878 |
- remote_count = len(remote_list) |
|
977 |
+ if single_file_local and len(local_list) == 1 and len(remote_list) == 1: |
|
978 |
+ ## Make remote_key same as local_key for comparison if we're dealing with only one file |
|
979 |
+ remote_list_entry = remote_list[remote_list.keys()[0]] |
|
980 |
+ # Flush remote_list, by the way |
|
981 |
+ remote_list = { local_list.keys()[0] : remote_list_entry } |
|
879 | 982 |
|
880 |
- info(u"Summary: %d local files to upload, %d remote files to delete" % (local_count, remote_count)) |
|
983 |
+ local_list, remote_list, update_list, copy_pairs = compare_filelists(local_list, remote_list, src_remote = False, dst_remote = True, delay_updates = cfg.delay_updates) |
|
881 | 984 |
|
882 |
- if local_count > 0: |
|
883 |
- ## Populate 'remote_uri' only if we've got something to upload |
|
884 |
- if not destination_base.endswith("/"): |
|
885 |
- if not single_file_local: |
|
886 |
- raise ParameterError("Destination S3 URI must end with '/' (ie must refer to a directory on the remote side).") |
|
887 |
- local_list[local_list.keys()[0]]['remote_uri'] = unicodise(destination_base) |
|
888 |
- else: |
|
985 |
+ local_count = len(local_list) |
|
986 |
+ update_count = len(update_list) |
|
987 |
+ copy_count = len(copy_pairs) |
|
988 |
+ remote_count = len(remote_list) |
|
989 |
+ |
|
990 |
+ info(u"Summary: %d local files to upload, %d files to remote copy, %d remote files to delete" % (local_count + update_count, copy_count, remote_count)) |
|
991 |
+ |
|
992 |
+ _set_remote_uri(local_list, destination_base, single_file_local) |
|
993 |
+ _set_remote_uri(update_list, destination_base, single_file_local) |
|
994 |
+ |
|
995 |
+ if cfg.dry_run: |
|
996 |
+ for key in exclude_list: |
|
997 |
+ output(u"exclude: %s" % unicodise(key)) |
|
889 | 998 |
for key in local_list: |
890 |
- local_list[key]['remote_uri'] = unicodise(destination_base + key) |
|
999 |
+ output(u"upload: %s -> %s" % (local_list[key]['full_name_unicode'], local_list[key]['remote_uri'])) |
|
1000 |
+ for key in update_list: |
|
1001 |
+ output(u"upload: %s -> %s" % (update_list[key]['full_name_unicode'], update_list[key]['remote_uri'])) |
|
1002 |
+ for (src_obj, dst1, dst2) in copy_pairs: |
|
1003 |
+ output(u"remote copy: %s -> %s" % (dst1['object_key'], remote_list[dst2]['object_key'])) |
|
1004 |
+ if cfg.delete_removed: |
|
1005 |
+ for key in remote_list: |
|
1006 |
+ output(u"delete: %s" % remote_list[key]['object_uri_str']) |
|
1007 |
+ |
|
1008 |
+ warning(u"Exitting now because of --dry-run") |
|
1009 |
+ return |
|
891 | 1010 |
|
892 |
- if cfg.dry_run: |
|
893 |
- for key in exclude_list: |
|
894 |
- output(u"exclude: %s" % unicodise(key)) |
|
895 |
- if cfg.delete_removed: |
|
896 |
- for key in remote_list: |
|
897 |
- output(u"delete: %s" % remote_list[key]['object_uri_str']) |
|
898 |
- for key in local_list: |
|
899 |
- output(u"upload: %s -> %s" % (local_list[key]['full_name_unicode'], local_list[key]['remote_uri'])) |
|
1011 |
+ # if there are copy pairs, we can't do delete_before, on the chance |
|
1012 |
+ # we need one of the to-be-deleted files as a copy source. |
|
1013 |
+ if len(copy_pairs) > 0: |
|
1014 |
+ cfg.delete_after = True |
|
1015 |
+ |
|
1016 |
+ if cfg.delete_removed and not cfg.delete_after: |
|
1017 |
+ _do_deletes(s3, remote_list) |
|
1018 |
+ |
|
1019 |
+ total_size = 0 |
|
1020 |
+ total_elapsed = 0.0 |
|
1021 |
+ timestamp_start = time.time() |
|
1022 |
+ n, total_size = _upload(local_list, 0, local_count, total_size) |
|
1023 |
+ n, total_size = _upload(update_list, n, local_count, total_size) |
|
1024 |
+ n_copies, saved_bytes = remote_copy(s3, copy_pairs, destination_base) |
|
1025 |
+ if cfg.delete_removed and cfg.delete_after: |
|
1026 |
+ _do_deletes(s3, remote_list) |
|
1027 |
+ total_elapsed = time.time() - timestamp_start |
|
1028 |
+ total_speed = total_elapsed and total_size/total_elapsed or 0.0 |
|
1029 |
+ speed_fmt = formatSize(total_speed, human_readable = True, floating_point = True) |
|
1030 |
+ |
|
1031 |
+ # Only print out the result if any work has been done or |
|
1032 |
+ # if the user asked for verbose output |
|
1033 |
+ outstr = "Done. Uploaded %d bytes in %0.1f seconds, %0.2f %sB/s. Copied %d files saving %d bytes transfer." % (total_size, total_elapsed, speed_fmt[0], speed_fmt[1], n_copies, saved_bytes) |
|
1034 |
+ if total_size + saved_bytes > 0: |
|
1035 |
+ output(outstr) |
|
1036 |
+ else: |
|
1037 |
+ info(outstr) |
|
900 | 1038 |
|
901 |
- warning(u"Exitting now because of --dry-run") |
|
902 | 1039 |
return |
903 | 1040 |
|
904 |
- if cfg.delete_removed: |
|
905 |
- for key in remote_list: |
|
906 |
- uri = S3Uri(remote_list[key]['object_uri_str']) |
|
907 |
- s3.object_delete(uri) |
|
908 |
- output(u"deleted: '%s'" % uri) |
|
1041 |
+ def _invalidate_on_cf(destination_base_uri): |
|
1042 |
+ cf = CloudFront(cfg) |
|
1043 |
+ default_index_file = None |
|
1044 |
+ if cfg.invalidate_default_index_on_cf or cfg.invalidate_default_index_root_on_cf: |
|
1045 |
+ info_response = s3.website_info(destination_base_uri, cfg.bucket_location) |
|
1046 |
+ if info_response: |
|
1047 |
+ default_index_file = info_response['index_document'] |
|
1048 |
+ if len(default_index_file) < 1: |
|
1049 |
+ default_index_file = None |
|
1050 |
+ |
|
1051 |
+ result = cf.InvalidateObjects(destination_base_uri, uploaded_objects_list, default_index_file, cfg.invalidate_default_index_on_cf, cfg.invalidate_default_index_root_on_cf) |
|
1052 |
+ if result['status'] == 201: |
|
1053 |
+ output("Created invalidation request for %d paths" % len(uploaded_objects_list)) |
|
1054 |
+ output("Check progress with: s3cmd cfinvalinfo cf://%s/%s" % (result['dist_id'], result['request_id'])) |
|
909 | 1055 |
|
1056 |
+ |
|
1057 |
+ # main execution |
|
1058 |
+ s3 = S3(cfg) |
|
910 | 1059 |
uploaded_objects_list = [] |
911 |
- total_size = 0 |
|
912 |
- total_elapsed = 0.0 |
|
913 |
- timestamp_start = time.time() |
|
914 |
- seq = 0 |
|
915 |
- file_list = local_list.keys() |
|
916 |
- file_list.sort() |
|
917 |
- for file in file_list: |
|
918 |
- seq += 1 |
|
919 |
- item = local_list[file] |
|
920 |
- src = item['full_name'] |
|
921 |
- uri = S3Uri(item['remote_uri']) |
|
922 |
- seq_label = "[%d of %d]" % (seq, local_count) |
|
923 |
- extra_headers = copy(cfg.extra_headers) |
|
924 |
- try: |
|
925 |
- if cfg.preserve_attrs: |
|
926 |
- attr_header = _build_attr_header(src) |
|
927 |
- debug(u"attr_header: %s" % attr_header) |
|
928 |
- extra_headers.update(attr_header) |
|
929 |
- response = s3.object_put(src, uri, extra_headers, extra_label = seq_label) |
|
930 |
- except InvalidFileError, e: |
|
931 |
- warning(u"File can not be uploaded: %s" % e) |
|
932 |
- continue |
|
933 |
- except S3UploadError, e: |
|
934 |
- error(u"%s: upload failed too many times. Skipping that file." % item['full_name_unicode']) |
|
935 |
- continue |
|
936 |
- speed_fmt = formatSize(response["speed"], human_readable = True, floating_point = True) |
|
937 |
- if not cfg.progress_meter: |
|
938 |
- output(u"File '%s' stored as '%s' (%d bytes in %0.1f seconds, %0.2f %sB/s) %s" % |
|
939 |
- (item['full_name_unicode'], uri, response["size"], response["elapsed"], |
|
940 |
- speed_fmt[0], speed_fmt[1], seq_label)) |
|
941 |
- total_size += response["size"] |
|
942 |
- uploaded_objects_list.append(uri.object()) |
|
943 | 1060 |
|
944 |
- total_elapsed = time.time() - timestamp_start |
|
945 |
- total_speed = total_elapsed and total_size/total_elapsed or 0.0 |
|
946 |
- speed_fmt = formatSize(total_speed, human_readable = True, floating_point = True) |
|
1061 |
+ if cfg.encrypt: |
|
1062 |
+ error(u"S3cmd 'sync' doesn't yet support GPG encryption, sorry.") |
|
1063 |
+ error(u"Either use unconditional 's3cmd put --recursive'") |
|
1064 |
+ error(u"or disable encryption with --no-encrypt parameter.") |
|
1065 |
+ sys.exit(1) |
|
947 | 1066 |
|
948 |
- # Only print out the result if any work has been done or |
|
949 |
- # if the user asked for verbose output |
|
950 |
- outstr = "Done. Uploaded %d bytes in %0.1f seconds, %0.2f %sB/s" % (total_size, total_elapsed, speed_fmt[0], speed_fmt[1]) |
|
951 |
- if total_size > 0: |
|
952 |
- output(outstr) |
|
953 |
- else: |
|
954 |
- info(outstr) |
|
1067 |
+ local_list, single_file_local = fetch_local_list(args[:-1], recursive = True) |
|
955 | 1068 |
|
956 |
- if cfg.invalidate_on_cf: |
|
957 |
- if len(uploaded_objects_list) == 0: |
|
958 |
- info("Nothing to invalidate in CloudFront") |
|
959 |
- else: |
|
960 |
- # 'uri' from the last iteration is still valid at this point |
|
961 |
- cf = CloudFront(cfg) |
|
962 |
- result = cf.InvalidateObjects(uri, uploaded_objects_list) |
|
963 |
- if result['status'] == 201: |
|
964 |
- output("Created invalidation request for %d paths" % len(uploaded_objects_list)) |
|
965 |
- output("Check progress with: s3cmd cfinvalinfo cf://%s/%s" % (result['dist_id'], result['request_id'])) |
|
1069 |
+ destinations = [args[-1]] |
|
1070 |
+ if cfg.additional_destinations: |
|
1071 |
+ destinations = destinations + cfg.additional_destinations |
|
1072 |
+ |
|
1073 |
+ if 'fork' not in os.__all__ or len(destinations) < 2: |
|
1074 |
+ destination_base_uri = _single_process(local_list) |
|
1075 |
+ if cfg.invalidate_on_cf: |
|
1076 |
+ if len(uploaded_objects_list) == 0: |
|
1077 |
+ info("Nothing to invalidate in CloudFront") |
|
1078 |
+ else: |
|
1079 |
+ _invalidate_on_cf(destination_base_uri) |
|
1080 |
+ else: |
|
1081 |
+ _parent() |
|
1082 |
+ if cfg.invalidate_on_cf: |
|
1083 |
+ error(u"You cannot use both --cf-invalidate and --add-destination.") |
|
966 | 1084 |
|
967 | 1085 |
def cmd_sync(args): |
968 | 1086 |
if (len(args) < 2): |
... | ... |
@@ -977,45 +1164,6 @@ def cmd_sync(args): |
977 | 977 |
raise ParameterError("Invalid source/destination: '%s'" % "' '".join(args)) |
978 | 978 |
|
979 | 979 |
def cmd_setacl(args): |
980 |
- def _update_acl(uri, seq_label = ""): |
|
981 |
- something_changed = False |
|
982 |
- acl = s3.get_acl(uri) |
|
983 |
- debug(u"acl: %s - %r" % (uri, acl.grantees)) |
|
984 |
- if cfg.acl_public == True: |
|
985 |
- if acl.isAnonRead(): |
|
986 |
- info(u"%s: already Public, skipping %s" % (uri, seq_label)) |
|
987 |
- else: |
|
988 |
- acl.grantAnonRead() |
|
989 |
- something_changed = True |
|
990 |
- elif cfg.acl_public == False: # we explicitely check for False, because it could be None |
|
991 |
- if not acl.isAnonRead(): |
|
992 |
- info(u"%s: already Private, skipping %s" % (uri, seq_label)) |
|
993 |
- else: |
|
994 |
- acl.revokeAnonRead() |
|
995 |
- something_changed = True |
|
996 |
- |
|
997 |
- # update acl with arguments |
|
998 |
- # grant first and revoke later, because revoke has priority |
|
999 |
- if cfg.acl_grants: |
|
1000 |
- something_changed = True |
|
1001 |
- for grant in cfg.acl_grants: |
|
1002 |
- acl.grant(**grant); |
|
1003 |
- |
|
1004 |
- if cfg.acl_revokes: |
|
1005 |
- something_changed = True |
|
1006 |
- for revoke in cfg.acl_revokes: |
|
1007 |
- acl.revoke(**revoke); |
|
1008 |
- |
|
1009 |
- if not something_changed: |
|
1010 |
- return |
|
1011 |
- |
|
1012 |
- retsponse = s3.set_acl(uri, acl) |
|
1013 |
- if retsponse['status'] == 200: |
|
1014 |
- if cfg.acl_public in (True, False): |
|
1015 |
- output(u"%s: ACL set to %s %s" % (uri, set_to_acl, seq_label)) |
|
1016 |
- else: |
|
1017 |
- output(u"%s: ACL updated" % uri) |
|
1018 |
- |
|
1019 | 980 |
s3 = S3(cfg) |
1020 | 981 |
|
1021 | 982 |
set_to_acl = cfg.acl_public and "Public" or "Private" |
... | ... |
@@ -1031,7 +1179,7 @@ def cmd_setacl(args): |
1031 | 1031 |
else: |
1032 | 1032 |
info("Setting bucket-level ACL for %s" % (uri.uri())) |
1033 | 1033 |
if not cfg.dry_run: |
1034 |
- _update_acl(uri) |
|
1034 |
+ update_acl(s3, uri) |
|
1035 | 1035 |
else: |
1036 | 1036 |
args.append(arg) |
1037 | 1037 |
|
... | ... |
@@ -1056,7 +1204,19 @@ def cmd_setacl(args): |
1056 | 1056 |
seq += 1 |
1057 | 1057 |
seq_label = "[%d of %d]" % (seq, remote_count) |
1058 | 1058 |
uri = S3Uri(remote_list[key]['object_uri_str']) |
1059 |
- _update_acl(uri, seq_label) |
|
1059 |
+ update_acl(s3, uri, seq_label) |
|
1060 |
+ |
|
1061 |
+def cmd_setpolicy(args): |
|
1062 |
+ s3 = S3(cfg) |
|
1063 |
+ uri = args.pop(0) |
|
1064 |
+ bucket_uri = S3Uri(uri) |
|
1065 |
+ if bucket_uri.object(): |
|
1066 |
+ raise ParameterError("Only bucket name is required for [setpolicy] command") |
|
1067 |
+ policy = args.pop() |
|
1068 |
+ info("Setting access policy for bucket %s to:\n\n%s" % (bucket_uri.uri(), policy)) |
|
1069 |
+ response = s3.set_policy(bucket_uri, policy) |
|
1070 |
+ if response['status'] == 204: |
|
1071 |
+ output(u"%s: Policy updated" % uri) |
|
1060 | 1072 |
|
1061 | 1073 |
def cmd_accesslog(args): |
1062 | 1074 |
s3 = S3(cfg) |
... | ... |
@@ -1085,6 +1245,15 @@ def cmd_sign(args): |
1085 | 1085 |
signature = Utils.sign_string(string_to_sign) |
1086 | 1086 |
output("Signature: %s" % signature) |
1087 | 1087 |
|
1088 |
+def cmd_signurl(args): |
|
1089 |
+ expiry = args.pop() |
|
1090 |
+ url_to_sign = S3Uri(args.pop()) |
|
1091 |
+ if url_to_sign.type != 's3': |
|
1092 |
+ raise ParameterError("Must be S3Uri. Got: %s" % url_to_sign) |
|
1093 |
+ debug("url to sign: %r" % url_to_sign) |
|
1094 |
+ signed_url = Utils.sign_url(url_to_sign, expiry) |
|
1095 |
+ output(signed_url) |
|
1096 |
+ |
|
1088 | 1097 |
def cmd_fixbucket(args): |
1089 | 1098 |
def _unescape(text): |
1090 | 1099 |
## |
... | ... |
@@ -1399,8 +1568,10 @@ def get_commands_list(): |
1399 | 1399 |
{"cmd":"cp", "label":"Copy object", "param":"s3://BUCKET1/OBJECT1 s3://BUCKET2[/OBJECT2]", "func":cmd_cp, "argc":2}, |
1400 | 1400 |
{"cmd":"mv", "label":"Move object", "param":"s3://BUCKET1/OBJECT1 s3://BUCKET2[/OBJECT2]", "func":cmd_mv, "argc":2}, |
1401 | 1401 |
{"cmd":"setacl", "label":"Modify Access control list for Bucket or Files", "param":"s3://BUCKET[/OBJECT]", "func":cmd_setacl, "argc":1}, |
1402 |
+ {"cmd":"setpolicy", "label":"Set an access policy for a bucket", "param":"s3://BUCKET POLICY_STRING", "func":cmd_setpolicy, "argc":2}, |
|
1402 | 1403 |
{"cmd":"accesslog", "label":"Enable/disable bucket access logging", "param":"s3://BUCKET", "func":cmd_accesslog, "argc":1}, |
1403 | 1404 |
{"cmd":"sign", "label":"Sign arbitrary string using the secret key", "param":"STRING-TO-SIGN", "func":cmd_sign, "argc":1}, |
1405 |
+ {"cmd":"signurl", "label":"Sign an S3 URL to provide limited public access with expiry", "param":"s3://BUCKET/OBJECT expiry_epoch", "func":cmd_signurl, "argc":2}, |
|
1404 | 1406 |
{"cmd":"fixbucket", "label":"Fix invalid file names in a bucket", "param":"s3://BUCKET[/PREFIX]", "func":cmd_fixbucket, "argc":1}, |
1405 | 1407 |
|
1406 | 1408 |
## Website commands |
... | ... |
@@ -1424,6 +1595,47 @@ def format_commands(progname, commands_list): |
1424 | 1424 |
help += " %s\n %s %s %s\n" % (cmd["label"], progname, cmd["cmd"], cmd["param"]) |
1425 | 1425 |
return help |
1426 | 1426 |
|
1427 |
+ |
|
1428 |
+def update_acl(s3, uri, seq_label=""): |
|
1429 |
+ something_changed = False |
|
1430 |
+ acl = s3.get_acl(uri) |
|
1431 |
+ debug(u"acl: %s - %r" % (uri, acl.grantees)) |
|
1432 |
+ if cfg.acl_public == True: |
|
1433 |
+ if acl.isAnonRead(): |
|
1434 |
+ info(u"%s: already Public, skipping %s" % (uri, seq_label)) |
|
1435 |
+ else: |
|
1436 |
+ acl.grantAnonRead() |
|
1437 |
+ something_changed = True |
|
1438 |
+ elif cfg.acl_public == False: # we explicitely check for False, because it could be None |
|
1439 |
+ if not acl.isAnonRead(): |
|
1440 |
+ info(u"%s: already Private, skipping %s" % (uri, seq_label)) |
|
1441 |
+ else: |
|
1442 |
+ acl.revokeAnonRead() |
|
1443 |
+ something_changed = True |
|
1444 |
+ |
|
1445 |
+ # update acl with arguments |
|
1446 |
+ # grant first and revoke later, because revoke has priority |
|
1447 |
+ if cfg.acl_grants: |
|
1448 |
+ something_changed = True |
|
1449 |
+ for grant in cfg.acl_grants: |
|
1450 |
+ acl.grant(**grant) |
|
1451 |
+ |
|
1452 |
+ if cfg.acl_revokes: |
|
1453 |
+ something_changed = True |
|
1454 |
+ for revoke in cfg.acl_revokes: |
|
1455 |
+ acl.revoke(**revoke) |
|
1456 |
+ |
|
1457 |
+ if not something_changed: |
|
1458 |
+ return |
|
1459 |
+ |
|
1460 |
+ retsponse = s3.set_acl(uri, acl) |
|
1461 |
+ if retsponse['status'] == 200: |
|
1462 |
+ if cfg.acl_public in (True, False): |
|
1463 |
+ set_to_acl = cfg.acl_public and "Public" or "Private" |
|
1464 |
+ output(u"%s: ACL set to %s %s" % (uri, set_to_acl, seq_label)) |
|
1465 |
+ else: |
|
1466 |
+ output(u"%s: ACL updated" % uri) |
|
1467 |
+ |
|
1427 | 1468 |
class OptionMimeType(Option): |
1428 | 1469 |
def check_mimetype(option, opt, value): |
1429 | 1470 |
if re.compile("^[a-z0-9]+/[a-z0-9+\.-]+(;.*)?$", re.IGNORECASE).match(value): |
... | ... |
@@ -1488,6 +1700,8 @@ def main(): |
1488 | 1488 |
optparser.add_option( "--configure", dest="run_configure", action="store_true", help="Invoke interactive (re)configuration tool. Optionally use as '--configure s3://come-bucket' to test access to a specific bucket instead of attempting to list them all.") |
1489 | 1489 |
optparser.add_option("-c", "--config", dest="config", metavar="FILE", help="Config file name. Defaults to %default") |
1490 | 1490 |
optparser.add_option( "--dump-config", dest="dump_config", action="store_true", help="Dump current configuration after parsing config files and command line options and exit.") |
1491 |
+ optparser.add_option( "--access_key", dest="access_key", help="AWS Access Key") |
|
1492 |
+ optparser.add_option( "--secret_key", dest="secret_key", help="AWS Secret Key") |
|
1491 | 1493 |
|
1492 | 1494 |
optparser.add_option("-n", "--dry-run", dest="dry_run", action="store_true", help="Only show what should be uploaded or downloaded but don't actually do it. May still perform S3 requests to get bucket listings and other information though (only for file transfer commands)") |
1493 | 1495 |
|
... | ... |
@@ -1506,6 +1720,10 @@ def main(): |
1506 | 1506 |
|
1507 | 1507 |
optparser.add_option( "--delete-removed", dest="delete_removed", action="store_true", help="Delete remote objects with no corresponding local file [sync]") |
1508 | 1508 |
optparser.add_option( "--no-delete-removed", dest="delete_removed", action="store_false", help="Don't delete remote objects.") |
1509 |
+ optparser.add_option( "--delete-after", dest="delete_after", action="store_true", help="Perform deletes after new uploads [sync]") |
|
1510 |
+ optparser.add_option( "--delay-updates", dest="delay_updates", action="store_true", help="Put all updated files into place at end [sync]") |
|
1511 |
+ optparser.add_option( "--add-destination", dest="additional_destinations", action="append", help="Additional destination for parallel uploads, in addition to last arg. May be repeated.") |
|
1512 |
+ optparser.add_option( "--delete-after-fetch", dest="delete_after_fetch", action="store_true", help="Delete remote objects after fetching to local file (only for [get] and [sync] commands).") |
|
1509 | 1513 |
optparser.add_option("-p", "--preserve", dest="preserve_attrs", action="store_true", help="Preserve filesystem attributes (mode, ownership, timestamps). Default for [sync] command.") |
1510 | 1514 |
optparser.add_option( "--no-preserve", dest="preserve_attrs", action="store_false", help="Don't store FS attributes") |
1511 | 1515 |
optparser.add_option( "--exclude", dest="exclude", action="append", metavar="GLOB", help="Filenames and paths matching GLOB will be excluded from sync") |
... | ... |
@@ -1524,13 +1742,14 @@ def main(): |
1524 | 1524 |
optparser.add_option( "--no-access-logging", dest="log_target_prefix", action="store_false", help="Disable access logging (for [cfmodify] and [accesslog] commands)") |
1525 | 1525 |
|
1526 | 1526 |
optparser.add_option( "--default-mime-type", dest="default_mime_type", action="store_true", help="Default MIME-type for stored objects. Application default is binary/octet-stream.") |
1527 |
- optparser.add_option( "--guess-mime-type", dest="guess_mime_type", action="store_true", help="Guess MIME-type of files by their extension or mime magic. Fall back to default MIME-Type as specified by --default-mime-type option") |
|
1527 |
+ optparser.add_option("-M", "--guess-mime-type", dest="guess_mime_type", action="store_true", help="Guess MIME-type of files by their extension or mime magic. Fall back to default MIME-Type as specified by --default-mime-type option") |
|
1528 | 1528 |
optparser.add_option( "--no-guess-mime-type", dest="guess_mime_type", action="store_false", help="Don't guess MIME-type and use the default type instead.") |
1529 | 1529 |
optparser.add_option("-m", "--mime-type", dest="mime_type", type="mimetype", metavar="MIME/TYPE", help="Force MIME-type. Override both --default-mime-type and --guess-mime-type.") |
1530 | 1530 |
|
1531 | 1531 |
optparser.add_option( "--add-header", dest="add_header", action="append", metavar="NAME:VALUE", help="Add a given HTTP header to the upload request. Can be used multiple times. For instance set 'Expires' or 'Cache-Control' headers (or both) using this options if you like.") |
1532 | 1532 |
|
1533 | 1533 |
optparser.add_option( "--encoding", dest="encoding", metavar="ENCODING", help="Override autodetected terminal and filesystem encoding (character set). Autodetected: %s" % preferred_encoding) |
1534 |
+ optparser.add_option( "--add-encoding-exts", dest="add_encoding_exts", metavar="EXTENSIONs", help="Add encoding to these comma delimited extensions i.e. (css,js,html) when uploading to S3 )") |
|
1534 | 1535 |
optparser.add_option( "--verbatim", dest="urlencoding_mode", action="store_const", const="verbatim", help="Use the S3 name as given on the command line. No pre-processing, encoding, etc. Use with caution!") |
1535 | 1536 |
|
1536 | 1537 |
optparser.add_option( "--disable-multipart", dest="enable_multipart", action="store_false", help="Disable multipart upload on files bigger than --multipart-chunk-size-mb") |
... | ... |
@@ -1547,6 +1766,10 @@ def main(): |
1547 | 1547 |
optparser.add_option( "--enable", dest="enable", action="store_true", help="Enable given CloudFront distribution (only for [cfmodify] command)") |
1548 | 1548 |
optparser.add_option( "--disable", dest="enable", action="store_false", help="Enable given CloudFront distribution (only for [cfmodify] command)") |
1549 | 1549 |
optparser.add_option( "--cf-invalidate", dest="invalidate_on_cf", action="store_true", help="Invalidate the uploaded filed in CloudFront. Also see [cfinval] command.") |
1550 |
+ # joseprio: adding options to invalidate the default index and the default |
|
1551 |
+ # index root |
|
1552 |
+ optparser.add_option( "--cf-invalidate-default-index", dest="invalidate_default_index_on_cf", action="store_true", help="When using Custom Origin and S3 static website, invalidate the default index file.") |
|
1553 |
+ optparser.add_option( "--cf-no-invalidate-default-index-root", dest="invalidate_default_index_root_on_cf", action="store_false", help="When using Custom Origin and S3 static website, don't invalidate the path to the default index file.") |
|
1550 | 1554 |
optparser.add_option( "--cf-add-cname", dest="cf_cnames_add", action="append", metavar="CNAME", help="Add given CNAME to a CloudFront distribution (only for [cfcreate] and [cfmodify] commands)") |
1551 | 1555 |
optparser.add_option( "--cf-remove-cname", dest="cf_cnames_remove", action="append", metavar="CNAME", help="Remove given CNAME from a CloudFront distribution (only for [cfmodify] command)") |
1552 | 1556 |
optparser.add_option( "--cf-comment", dest="cf_comment", action="store", metavar="COMMENT", help="Set COMMENT for a given CloudFront distribution (only for [cfcreate] and [cfmodify] commands)") |
... | ... |
@@ -1555,6 +1778,7 @@ def main(): |
1555 | 1555 |
optparser.add_option("-d", "--debug", dest="verbosity", action="store_const", const=logging.DEBUG, help="Enable debug output.") |
1556 | 1556 |
optparser.add_option( "--version", dest="show_version", action="store_true", help="Show s3cmd version (%s) and exit." % (PkgInfo.version)) |
1557 | 1557 |
optparser.add_option("-F", "--follow-symlinks", dest="follow_symlinks", action="store_true", default=False, help="Follow symbolic links as if they are regular files") |
1558 |
+ optparser.add_option( "--cache-file", dest="cache_file", action="store", default="", metavar="FILE", help="Cache FILE containing local source MD5 values") |
|
1558 | 1559 |
|
1559 | 1560 |
optparser.set_usage(optparser.usage + " COMMAND [parameters]") |
1560 | 1561 |
optparser.set_description('S3cmd is a tool for managing objects in '+ |
... | ... |
@@ -1681,6 +1905,9 @@ def main(): |
1681 | 1681 |
## Some CloudFront.Cmd.Options() options are not settable from command line |
1682 | 1682 |
pass |
1683 | 1683 |
|
1684 |
+ if options.additional_destinations: |
|
1685 |
+ cfg.additional_destinations = options.additional_destinations |
|
1686 |
+ |
|
1684 | 1687 |
## Set output and filesystem encoding for printing out filenames. |
1685 | 1688 |
sys.stdout = codecs.getwriter(cfg.encoding)(sys.stdout, "replace") |
1686 | 1689 |
sys.stderr = codecs.getwriter(cfg.encoding)(sys.stderr, "replace") |
... | ... |
@@ -63,6 +63,23 @@ Enable/disable bucket access logging |
63 | 63 |
s3cmd \fBsign\fR \fISTRING-TO-SIGN\fR |
64 | 64 |
Sign arbitrary string using the secret key |
65 | 65 |
.TP |
66 |
+s3cmd \fBsignurl\fR \fIs3://BUCKET[/OBJECT]\fR \fIexpiry-in-epoch-seconds\fR |
|
67 |
+Sign an S3 URL with the secret key, producing a URL that allows access to |
|
68 |
+the named object using the credentials used to sign the URL until the date of |
|
69 |
+expiry specified in epoch-seconds has passed. This is most useful for publishing |
|
70 |
+time- or distribution-limited URLs to otherwise-private S3 objects. |
|
71 |
+.br |
|
72 |
+This is a purely offline operation. Your API key and secret are not sent on |
|
73 |
+the wire, though your public API key is included in the generated URL. Because |
|
74 |
+it's offline, no validation is done to ensure that the bucket and object actually |
|
75 |
+exist, or that this API key has permission to access them. |
|
76 |
+.br |
|
77 |
+The URL generated is http:// but you can simply change to https:// if you want. |
|
78 |
+.br |
|
79 |
+See |
|
80 |
+.B http://s3.amazonaws.com/doc/s3-developer-guide/RESTAuthentication.html |
|
81 |
+for more information on signed URLs, and the examples section below. |
|
82 |
+.TP |
|
66 | 83 |
s3cmd \fBfixbucket\fR \fIs3://BUCKET[/PREFIX]\fR |
67 | 84 |
Fix invalid file names in a bucket |
68 | 85 |
|
... | ... |
@@ -186,6 +203,12 @@ Delete remote objects with no corresponding local file |
186 | 186 |
\fB\-\-no\-delete\-removed\fR |
187 | 187 |
Don't delete remote objects. |
188 | 188 |
.TP |
189 |
+\fB\-\-delete\-after\fR |
|
190 |
+Perform deletes after new uploads [sync]. |
|
191 |
+.TP |
|
192 |
+\fB\-\-delay\-updates\fR |
|
193 |
+Put all updated files into place at end [sync] |
|
194 |
+.TP |
|
189 | 195 |
\fB\-p\fR, \fB\-\-preserve\fR |
190 | 196 |
Preserve filesystem attributes (mode, ownership, |
191 | 197 |
timestamps). Default for [sync] command. |
... | ... |
@@ -421,6 +444,12 @@ about matching file names against exclude and include rules. |
421 | 421 |
For example to exclude all files with ".jpg" extension except those beginning with a number use: |
422 | 422 |
.PP |
423 | 423 |
\-\-exclude '*.jpg' \-\-rinclude '[0-9].*\.jpg' |
424 |
+.PP |
|
425 |
+To produce a signed HTTP URL that allows access to the normally private s3 object |
|
426 |
+s3://mybucket/someobj (which you must have permission to access) to anybody |
|
427 |
+with the URL for one week from today, use: |
|
428 |
+.PP |
|
429 |
+ s3cmd signurl s3://mybucket/someobj `date -d 'today + 1 week' +%s` |
|
424 | 430 |
.SH SEE ALSO |
425 | 431 |
For the most up to date list of options run |
426 | 432 |
.B s3cmd \-\-help |