* ansible-galaxy - increase page size and add retry decorator for throttling (#74240)
* Get available collection versions with page_size=100 for v2 and limit=100 for v3
* Update unit tests for larger page sizes
* Add a generic retry decorator in module_utils/api.py that accepts an Iterable of delays and a callable to determine if an exception inheriting from Exception should be retried
* Use the new decorator to handle Galaxy API rate limiting
* Add unit tests for new retry decorator
* Preserve the decorated function's metadata with functools.wraps
ci_complete
Co-authored-by: Matt Martz <matt@sivel.net>
Co-authored-by: Sviatoslav Sydorenko <wk.cvs.github@sydorenko.org.ua>
(cherry picked from commit ee725846f070fc6b0dd79b5e8c5199ec652faf87)
* Add changelog for ansible-galaxy improvements (#74738)
Changelog for #74240
(cherry picked from commit 9cfedcd9c911f994f830d494739a6bfe4de1b080)
1 | 1 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,6 @@ |
0 |
+bugfixes: |
|
1 |
+- >- |
|
2 |
+ Improve resilience of ``ansible-galaxy collection`` by increasing the page |
|
3 |
+ size to make fewer requests overall and retrying queries with a jittered |
|
4 |
+ exponential backoff when rate limiting HTTP codes (520 and 429) occur. |
|
5 |
+ (https://github.com/ansible/ansible/issues/74191) |
... | ... |
@@ -15,9 +15,11 @@ import time |
15 | 15 |
from ansible import constants as C |
16 | 16 |
from ansible.errors import AnsibleError |
17 | 17 |
from ansible.galaxy.user_agent import user_agent |
18 |
+from ansible.module_utils.api import retry_with_delays_and_condition |
|
19 |
+from ansible.module_utils.api import generate_jittered_backoff |
|
18 | 20 |
from ansible.module_utils.six import string_types |
19 | 21 |
from ansible.module_utils.six.moves.urllib.error import HTTPError |
20 |
-from ansible.module_utils.six.moves.urllib.parse import quote as urlquote, urlencode, urlparse |
|
22 |
+from ansible.module_utils.six.moves.urllib.parse import quote as urlquote, urlencode, urlparse, parse_qs, urljoin |
|
21 | 23 |
from ansible.module_utils._text import to_bytes, to_native, to_text |
22 | 24 |
from ansible.module_utils.urls import open_url, prepare_multipart |
23 | 25 |
from ansible.utils.display import Display |
... | ... |
@@ -30,6 +32,18 @@ except ImportError: |
30 | 30 |
from urlparse import urlparse |
31 | 31 |
|
32 | 32 |
display = Display() |
33 |
+COLLECTION_PAGE_SIZE = 100 |
|
34 |
+RETRY_HTTP_ERROR_CODES = [ # TODO: Allow user-configuration |
|
35 |
+ 429, # Too Many Requests |
|
36 |
+ 520, # Galaxy rate limit error code (Cloudflare unknown error) |
|
37 |
+] |
|
38 |
+ |
|
39 |
+ |
|
40 |
+def is_rate_limit_exception(exception): |
|
41 |
+ # Note: cloud.redhat.com masks rate limit errors with 403 (Forbidden) error codes. |
|
42 |
+ # Since 403 could reflect the actual problem (such as an expired token), we should |
|
43 |
+ # not retry by default. |
|
44 |
+ return isinstance(exception, GalaxyError) and exception.http_code in RETRY_HTTP_ERROR_CODES |
|
33 | 45 |
|
34 | 46 |
|
35 | 47 |
def g_connect(versions): |
... | ... |
@@ -187,6 +201,10 @@ class GalaxyAPI: |
187 | 187 |
# Calling g_connect will populate self._available_api_versions |
188 | 188 |
return self._available_api_versions |
189 | 189 |
|
190 |
+ @retry_with_delays_and_condition( |
|
191 |
+ backoff_iterator=generate_jittered_backoff(retries=6, delay_base=2, delay_threshold=40), |
|
192 |
+ should_retry_error=is_rate_limit_exception |
|
193 |
+ ) |
|
190 | 194 |
def _call_galaxy(self, url, args=None, headers=None, method=None, auth_required=False, error_context_msg=None): |
191 | 195 |
headers = headers or {} |
192 | 196 |
self._add_auth_token(headers, url, required=auth_required) |
... | ... |
@@ -554,7 +572,9 @@ class GalaxyAPI: |
554 | 554 |
results_key = 'results' |
555 | 555 |
pagination_path = ['next'] |
556 | 556 |
|
557 |
- n_url = _urljoin(self.api_server, api_path, 'collections', namespace, name, 'versions', '/') |
|
557 |
+ page_size_name = 'limit' if 'v3' in self.available_api_versions else 'page_size' |
|
558 |
+ n_url = _urljoin(self.api_server, api_path, 'collections', namespace, name, 'versions', '/?%s=%d' % (page_size_name, COLLECTION_PAGE_SIZE)) |
|
559 |
+ n_url_info = urlparse(n_url) |
|
558 | 560 |
|
559 | 561 |
error_context_msg = 'Error when getting available collection versions for %s.%s from %s (%s)' \ |
560 | 562 |
% (namespace, name, self.name, self.api_server) |
... | ... |
@@ -573,7 +593,10 @@ class GalaxyAPI: |
573 | 573 |
elif relative_link: |
574 | 574 |
# TODO: This assumes the pagination result is relative to the root server. Will need to be verified |
575 | 575 |
# with someone who knows the AH API. |
576 |
- next_link = n_url.replace(urlparse(n_url).path, next_link) |
|
576 |
+ |
|
577 |
+ # Remove the query string from the versions_url to use the next_link's query |
|
578 |
+ n_url = urljoin(n_url, urlparse(n_url).path) |
|
579 |
+ next_link = n_url.replace(n_url_info.path, next_link) |
|
577 | 580 |
|
578 | 581 |
data = self._call_galaxy(to_native(next_link, errors='surrogate_or_strict'), |
579 | 582 |
error_context_msg=error_context_msg) |
... | ... |
@@ -26,6 +26,8 @@ The 'api' module provides the following common argument specs: |
26 | 26 |
from __future__ import (absolute_import, division, print_function) |
27 | 27 |
__metaclass__ = type |
28 | 28 |
|
29 |
+import functools |
|
30 |
+import random |
|
29 | 31 |
import sys |
30 | 32 |
import time |
31 | 33 |
|
... | ... |
@@ -114,3 +116,51 @@ def retry(retries=None, retry_pause=1): |
114 | 114 |
|
115 | 115 |
return retried |
116 | 116 |
return wrapper |
117 |
+ |
|
118 |
+ |
|
119 |
+def generate_jittered_backoff(retries=10, delay_base=3, delay_threshold=60): |
|
120 |
+ """The "Full Jitter" backoff strategy. |
|
121 |
+ |
|
122 |
+ Ref: https://www.awsarchitectureblog.com/2015/03/backoff.html |
|
123 |
+ |
|
124 |
+ :param retries: The number of delays to generate. |
|
125 |
+ :param delay_base: The base time in seconds used to calculate the exponential backoff. |
|
126 |
+ :param delay_threshold: The maximum time in seconds for any delay. |
|
127 |
+ """ |
|
128 |
+ for retry in range(0, retries): |
|
129 |
+ yield random.randint(0, min(delay_threshold, delay_base * 2 ** retry)) |
|
130 |
+ |
|
131 |
+ |
|
132 |
+def retry_never(exception_or_result): |
|
133 |
+ return False |
|
134 |
+ |
|
135 |
+ |
|
136 |
+def retry_with_delays_and_condition(backoff_iterator, should_retry_error=None): |
|
137 |
+ """Generic retry decorator. |
|
138 |
+ |
|
139 |
+ :param backoff_iterator: An iterable of delays in seconds. |
|
140 |
+ :param should_retry_error: A callable that takes an exception of the decorated function and decides whether to retry or not (returns a bool). |
|
141 |
+ """ |
|
142 |
+ if should_retry_error is None: |
|
143 |
+ should_retry_error = retry_never |
|
144 |
+ |
|
145 |
+ def function_wrapper(function): |
|
146 |
+ @functools.wraps(function) |
|
147 |
+ def run_function(*args, **kwargs): |
|
148 |
+ """This assumes the function has not already been called. |
|
149 |
+ If backoff_iterator is empty, we should still run the function a single time with no delay. |
|
150 |
+ """ |
|
151 |
+ call_retryable_function = functools.partial(function, *args, **kwargs) |
|
152 |
+ |
|
153 |
+ for delay in backoff_iterator: |
|
154 |
+ try: |
|
155 |
+ return call_retryable_function() |
|
156 |
+ except Exception as e: |
|
157 |
+ if not should_retry_error(e): |
|
158 |
+ raise |
|
159 |
+ time.sleep(delay) |
|
160 |
+ |
|
161 |
+ # Only or final attempt |
|
162 |
+ return call_retryable_function() |
|
163 |
+ return run_function |
|
164 |
+ return function_wrapper |
... | ... |
@@ -727,9 +727,10 @@ def test_get_collection_versions(api_version, token_type, token_ins, response, m |
727 | 727 |
actual = api.get_collection_versions('namespace', 'collection') |
728 | 728 |
assert actual == [u'1.0.0', u'1.0.1'] |
729 | 729 |
|
730 |
+ page_query = '?limit=100' if api_version == 'v3' else '?page_size=100' |
|
730 | 731 |
assert mock_open.call_count == 1 |
731 | 732 |
assert mock_open.mock_calls[0][1][0] == 'https://galaxy.server.com/api/%s/collections/namespace/collection/' \ |
732 |
- 'versions/' % api_version |
|
733 |
+ 'versions/%s' % (api_version, page_query) |
|
733 | 734 |
if token_ins: |
734 | 735 |
assert mock_open.mock_calls[0][2]['headers']['Authorization'] == '%s my token' % token_type |
735 | 736 |
|
... | ... |
@@ -738,9 +739,9 @@ def test_get_collection_versions(api_version, token_type, token_ins, response, m |
738 | 738 |
('v2', None, None, [ |
739 | 739 |
{ |
740 | 740 |
'count': 6, |
741 |
- 'next': 'https://galaxy.server.com/api/v2/collections/namespace/collection/versions/?page=2', |
|
741 |
+ 'next': 'https://galaxy.server.com/api/v2/collections/namespace/collection/versions/?page=2&page_size=100', |
|
742 | 742 |
'previous': None, |
743 |
- 'results': [ |
|
743 |
+ 'results': [ # Pay no mind, using more manageable results than page_size would indicate |
|
744 | 744 |
{ |
745 | 745 |
'version': '1.0.0', |
746 | 746 |
'href': 'https://galaxy.server.com/api/v2/collections/namespace/collection/versions/1.0.0', |
... | ... |
@@ -753,7 +754,7 @@ def test_get_collection_versions(api_version, token_type, token_ins, response, m |
753 | 753 |
}, |
754 | 754 |
{ |
755 | 755 |
'count': 6, |
756 |
- 'next': 'https://galaxy.server.com/api/v2/collections/namespace/collection/versions/?page=3', |
|
756 |
+ 'next': 'https://galaxy.server.com/api/v2/collections/namespace/collection/versions/?page=3&page_size=100', |
|
757 | 757 |
'previous': 'https://galaxy.server.com/api/v2/collections/namespace/collection/versions', |
758 | 758 |
'results': [ |
759 | 759 |
{ |
... | ... |
@@ -769,7 +770,7 @@ def test_get_collection_versions(api_version, token_type, token_ins, response, m |
769 | 769 |
{ |
770 | 770 |
'count': 6, |
771 | 771 |
'next': None, |
772 |
- 'previous': 'https://galaxy.server.com/api/v2/collections/namespace/collection/versions/?page=2', |
|
772 |
+ 'previous': 'https://galaxy.server.com/api/v2/collections/namespace/collection/versions/?page=2&page_size=100', |
|
773 | 773 |
'results': [ |
774 | 774 |
{ |
775 | 775 |
'version': '1.0.4', |
... | ... |
@@ -786,7 +787,8 @@ def test_get_collection_versions(api_version, token_type, token_ins, response, m |
786 | 786 |
{ |
787 | 787 |
'count': 6, |
788 | 788 |
'links': { |
789 |
- 'next': '/api/v3/collections/namespace/collection/versions/?page=2', |
|
789 |
+ # v3 links are relative and the limit is included during pagination |
|
790 |
+ 'next': '/api/v3/collections/namespace/collection/versions/?limit=100&offset=100', |
|
790 | 791 |
'previous': None, |
791 | 792 |
}, |
792 | 793 |
'data': [ |
... | ... |
@@ -803,7 +805,7 @@ def test_get_collection_versions(api_version, token_type, token_ins, response, m |
803 | 803 |
{ |
804 | 804 |
'count': 6, |
805 | 805 |
'links': { |
806 |
- 'next': '/api/v3/collections/namespace/collection/versions/?page=3', |
|
806 |
+ 'next': '/api/v3/collections/namespace/collection/versions/?limit=100&offset=200', |
|
807 | 807 |
'previous': '/api/v3/collections/namespace/collection/versions', |
808 | 808 |
}, |
809 | 809 |
'data': [ |
... | ... |
@@ -821,7 +823,7 @@ def test_get_collection_versions(api_version, token_type, token_ins, response, m |
821 | 821 |
'count': 6, |
822 | 822 |
'links': { |
823 | 823 |
'next': None, |
824 |
- 'previous': '/api/v3/collections/namespace/collection/versions/?page=2', |
|
824 |
+ 'previous': '/api/v3/collections/namespace/collection/versions/?limit=100&offset=100', |
|
825 | 825 |
}, |
826 | 826 |
'data': [ |
827 | 827 |
{ |
... | ... |
@@ -852,12 +854,22 @@ def test_get_collection_versions_pagination(api_version, token_type, token_ins, |
852 | 852 |
assert actual == [u'1.0.0', u'1.0.1', u'1.0.2', u'1.0.3', u'1.0.4', u'1.0.5'] |
853 | 853 |
|
854 | 854 |
assert mock_open.call_count == 3 |
855 |
+ |
|
856 |
+ if api_version == 'v3': |
|
857 |
+ query_1 = 'limit=100' |
|
858 |
+ query_2 = 'limit=100&offset=100' |
|
859 |
+ query_3 = 'limit=100&offset=200' |
|
860 |
+ else: |
|
861 |
+ query_1 = 'page_size=100' |
|
862 |
+ query_2 = 'page=2&page_size=100' |
|
863 |
+ query_3 = 'page=3&page_size=100' |
|
864 |
+ |
|
855 | 865 |
assert mock_open.mock_calls[0][1][0] == 'https://galaxy.server.com/api/%s/collections/namespace/collection/' \ |
856 |
- 'versions/' % api_version |
|
866 |
+ 'versions/?%s' % (api_version, query_1) |
|
857 | 867 |
assert mock_open.mock_calls[1][1][0] == 'https://galaxy.server.com/api/%s/collections/namespace/collection/' \ |
858 |
- 'versions/?page=2' % api_version |
|
868 |
+ 'versions/?%s' % (api_version, query_2) |
|
859 | 869 |
assert mock_open.mock_calls[2][1][0] == 'https://galaxy.server.com/api/%s/collections/namespace/collection/' \ |
860 |
- 'versions/?page=3' % api_version |
|
870 |
+ 'versions/?%s' % (api_version, query_3) |
|
861 | 871 |
|
862 | 872 |
if token_type: |
863 | 873 |
assert mock_open.mock_calls[0][2]['headers']['Authorization'] == '%s my token' % token_type |
... | ... |
@@ -7,11 +7,19 @@ from __future__ import (absolute_import, division, print_function) |
7 | 7 |
__metaclass__ = type |
8 | 8 |
|
9 | 9 |
|
10 |
-from ansible.module_utils.api import rate_limit, retry |
|
10 |
+from ansible.module_utils.api import rate_limit, retry, retry_with_delays_and_condition |
|
11 | 11 |
|
12 | 12 |
import pytest |
13 | 13 |
|
14 | 14 |
|
15 |
+class CustomException(Exception): |
|
16 |
+ pass |
|
17 |
+ |
|
18 |
+ |
|
19 |
+class CustomBaseException(BaseException): |
|
20 |
+ pass |
|
21 |
+ |
|
22 |
+ |
|
15 | 23 |
class TestRateLimit: |
16 | 24 |
|
17 | 25 |
def test_ratelimit(self): |
... | ... |
@@ -26,17 +34,16 @@ class TestRateLimit: |
26 | 26 |
class TestRetry: |
27 | 27 |
|
28 | 28 |
def test_no_retry_required(self): |
29 |
- self.counter = 0 |
|
30 |
- |
|
31 | 29 |
@retry(retries=4, retry_pause=2) |
32 | 30 |
def login_database(): |
33 |
- self.counter += 1 |
|
31 |
+ login_database.counter += 1 |
|
34 | 32 |
return 'success' |
35 | 33 |
|
34 |
+ login_database.counter = 0 |
|
36 | 35 |
r = login_database() |
37 | 36 |
|
38 | 37 |
assert r == 'success' |
39 |
- assert self.counter == 1 |
|
38 |
+ assert login_database.counter == 1 |
|
40 | 39 |
|
41 | 40 |
def test_catch_exception(self): |
42 | 41 |
|
... | ... |
@@ -44,5 +51,71 @@ class TestRetry: |
44 | 44 |
def login_database(): |
45 | 45 |
return 'success' |
46 | 46 |
|
47 |
- with pytest.raises(Exception): |
|
47 |
+ with pytest.raises(Exception, match="Retry"): |
|
48 | 48 |
login_database() |
49 |
+ |
|
50 |
+ def test_no_retries(self): |
|
51 |
+ |
|
52 |
+ @retry() |
|
53 |
+ def login_database(): |
|
54 |
+ assert False, "Should not execute" |
|
55 |
+ |
|
56 |
+ login_database() |
|
57 |
+ |
|
58 |
+ |
|
59 |
+class TestRetryWithDelaysAndCondition: |
|
60 |
+ |
|
61 |
+ def test_empty_retry_iterator(self): |
|
62 |
+ @retry_with_delays_and_condition(backoff_iterator=[]) |
|
63 |
+ def login_database(): |
|
64 |
+ login_database.counter += 1 |
|
65 |
+ |
|
66 |
+ login_database.counter = 0 |
|
67 |
+ r = login_database() |
|
68 |
+ assert login_database.counter == 1 |
|
69 |
+ |
|
70 |
+ def test_no_retry_exception(self): |
|
71 |
+ @retry_with_delays_and_condition( |
|
72 |
+ backoff_iterator=[1], |
|
73 |
+ should_retry_error=lambda x: False, |
|
74 |
+ ) |
|
75 |
+ def login_database(): |
|
76 |
+ login_database.counter += 1 |
|
77 |
+ if login_database.counter == 1: |
|
78 |
+ raise CustomException("Error") |
|
79 |
+ |
|
80 |
+ login_database.counter = 0 |
|
81 |
+ with pytest.raises(CustomException, match="Error"): |
|
82 |
+ login_database() |
|
83 |
+ assert login_database.counter == 1 |
|
84 |
+ |
|
85 |
+ def test_no_retry_baseexception(self): |
|
86 |
+ @retry_with_delays_and_condition( |
|
87 |
+ backoff_iterator=[1], |
|
88 |
+ should_retry_error=lambda x: True, # Retry all exceptions inheriting from Exception |
|
89 |
+ ) |
|
90 |
+ def login_database(): |
|
91 |
+ login_database.counter += 1 |
|
92 |
+ if login_database.counter == 1: |
|
93 |
+ # Raise an exception inheriting from BaseException |
|
94 |
+ raise CustomBaseException("Error") |
|
95 |
+ |
|
96 |
+ login_database.counter = 0 |
|
97 |
+ with pytest.raises(CustomBaseException, match="Error"): |
|
98 |
+ login_database() |
|
99 |
+ assert login_database.counter == 1 |
|
100 |
+ |
|
101 |
+ def test_retry_exception(self): |
|
102 |
+ @retry_with_delays_and_condition( |
|
103 |
+ backoff_iterator=[1], |
|
104 |
+ should_retry_error=lambda x: isinstance(x, CustomException), |
|
105 |
+ ) |
|
106 |
+ def login_database(): |
|
107 |
+ login_database.counter += 1 |
|
108 |
+ if login_database.counter == 1: |
|
109 |
+ raise CustomException("Retry") |
|
110 |
+ return 'success' |
|
111 |
+ |
|
112 |
+ login_database.counter = 0 |
|
113 |
+ assert login_database() == 'success' |
|
114 |
+ assert login_database.counter == 2 |