Browse code

[2.10] ansible-galaxy - increase page size and add retry decorator (#74649)

* ansible-galaxy - increase page size and add retry decorator for throttling (#74240)

* Get available collection versions with page_size=100 for v2 and limit=100 for v3

* Update unit tests for larger page sizes

* Add a generic retry decorator in module_utils/api.py that accepts an Iterable of delays and a callable to determine if an exception inheriting from Exception should be retried

* Use the new decorator to handle Galaxy API rate limiting

* Add unit tests for new retry decorator

* Preserve the decorated function's metadata with functools.wraps

ci_complete

Co-authored-by: Matt Martz <matt@sivel.net>
Co-authored-by: Sviatoslav Sydorenko <wk.cvs.github@sydorenko.org.ua>
(cherry picked from commit ee725846f070fc6b0dd79b5e8c5199ec652faf87)

* Add changelog for ansible-galaxy improvements (#74738)

Changelog for #74240

(cherry picked from commit 9cfedcd9c911f994f830d494739a6bfe4de1b080)

Sloane Hertel authored on 2021/05/18 06:04:16
Showing 5 changed files
1 1
new file mode 100644
... ...
@@ -0,0 +1,6 @@
0
+bugfixes:
1
+- >-
2
+  Improve resilience of ``ansible-galaxy collection`` by increasing the page
3
+  size to make fewer requests overall and retrying queries with a jittered
4
+  exponential backoff when rate limiting HTTP codes (520 and 429) occur.
5
+  (https://github.com/ansible/ansible/issues/74191)
... ...
@@ -15,9 +15,11 @@ import time
15 15
 from ansible import constants as C
16 16
 from ansible.errors import AnsibleError
17 17
 from ansible.galaxy.user_agent import user_agent
18
+from ansible.module_utils.api import retry_with_delays_and_condition
19
+from ansible.module_utils.api import generate_jittered_backoff
18 20
 from ansible.module_utils.six import string_types
19 21
 from ansible.module_utils.six.moves.urllib.error import HTTPError
20
-from ansible.module_utils.six.moves.urllib.parse import quote as urlquote, urlencode, urlparse
22
+from ansible.module_utils.six.moves.urllib.parse import quote as urlquote, urlencode, urlparse, parse_qs, urljoin
21 23
 from ansible.module_utils._text import to_bytes, to_native, to_text
22 24
 from ansible.module_utils.urls import open_url, prepare_multipart
23 25
 from ansible.utils.display import Display
... ...
@@ -30,6 +32,18 @@ except ImportError:
30 30
     from urlparse import urlparse
31 31
 
32 32
 display = Display()
33
+COLLECTION_PAGE_SIZE = 100
34
+RETRY_HTTP_ERROR_CODES = [  # TODO: Allow user-configuration
35
+    429,  # Too Many Requests
36
+    520,  # Galaxy rate limit error code (Cloudflare unknown error)
37
+]
38
+
39
+
40
+def is_rate_limit_exception(exception):
41
+    # Note: cloud.redhat.com masks rate limit errors with 403 (Forbidden) error codes.
42
+    # Since 403 could reflect the actual problem (such as an expired token), we should
43
+    # not retry by default.
44
+    return isinstance(exception, GalaxyError) and exception.http_code in RETRY_HTTP_ERROR_CODES
33 45
 
34 46
 
35 47
 def g_connect(versions):
... ...
@@ -187,6 +201,10 @@ class GalaxyAPI:
187 187
         # Calling g_connect will populate self._available_api_versions
188 188
         return self._available_api_versions
189 189
 
190
+    @retry_with_delays_and_condition(
191
+        backoff_iterator=generate_jittered_backoff(retries=6, delay_base=2, delay_threshold=40),
192
+        should_retry_error=is_rate_limit_exception
193
+    )
190 194
     def _call_galaxy(self, url, args=None, headers=None, method=None, auth_required=False, error_context_msg=None):
191 195
         headers = headers or {}
192 196
         self._add_auth_token(headers, url, required=auth_required)
... ...
@@ -554,7 +572,9 @@ class GalaxyAPI:
554 554
             results_key = 'results'
555 555
             pagination_path = ['next']
556 556
 
557
-        n_url = _urljoin(self.api_server, api_path, 'collections', namespace, name, 'versions', '/')
557
+        page_size_name = 'limit' if 'v3' in self.available_api_versions else 'page_size'
558
+        n_url = _urljoin(self.api_server, api_path, 'collections', namespace, name, 'versions', '/?%s=%d' % (page_size_name, COLLECTION_PAGE_SIZE))
559
+        n_url_info = urlparse(n_url)
558 560
 
559 561
         error_context_msg = 'Error when getting available collection versions for %s.%s from %s (%s)' \
560 562
                             % (namespace, name, self.name, self.api_server)
... ...
@@ -573,7 +593,10 @@ class GalaxyAPI:
573 573
             elif relative_link:
574 574
                 # TODO: This assumes the pagination result is relative to the root server. Will need to be verified
575 575
                 # with someone who knows the AH API.
576
-                next_link = n_url.replace(urlparse(n_url).path, next_link)
576
+
577
+                # Remove the query string from the versions_url to use the next_link's query
578
+                n_url = urljoin(n_url, urlparse(n_url).path)
579
+                next_link = n_url.replace(n_url_info.path, next_link)
577 580
 
578 581
             data = self._call_galaxy(to_native(next_link, errors='surrogate_or_strict'),
579 582
                                      error_context_msg=error_context_msg)
... ...
@@ -26,6 +26,8 @@ The 'api' module provides the following common argument specs:
26 26
 from __future__ import (absolute_import, division, print_function)
27 27
 __metaclass__ = type
28 28
 
29
+import functools
30
+import random
29 31
 import sys
30 32
 import time
31 33
 
... ...
@@ -114,3 +116,51 @@ def retry(retries=None, retry_pause=1):
114 114
 
115 115
         return retried
116 116
     return wrapper
117
+
118
+
119
+def generate_jittered_backoff(retries=10, delay_base=3, delay_threshold=60):
120
+    """The "Full Jitter" backoff strategy.
121
+
122
+    Ref: https://www.awsarchitectureblog.com/2015/03/backoff.html
123
+
124
+    :param retries: The number of delays to generate.
125
+    :param delay_base: The base time in seconds used to calculate the exponential backoff.
126
+    :param delay_threshold: The maximum time in seconds for any delay.
127
+    """
128
+    for retry in range(0, retries):
129
+        yield random.randint(0, min(delay_threshold, delay_base * 2 ** retry))
130
+
131
+
132
+def retry_never(exception_or_result):
133
+    return False
134
+
135
+
136
+def retry_with_delays_and_condition(backoff_iterator, should_retry_error=None):
137
+    """Generic retry decorator.
138
+
139
+    :param backoff_iterator: An iterable of delays in seconds.
140
+    :param should_retry_error: A callable that takes an exception of the decorated function and decides whether to retry or not (returns a bool).
141
+    """
142
+    if should_retry_error is None:
143
+        should_retry_error = retry_never
144
+
145
+    def function_wrapper(function):
146
+        @functools.wraps(function)
147
+        def run_function(*args, **kwargs):
148
+            """This assumes the function has not already been called.
149
+            If backoff_iterator is empty, we should still run the function a single time with no delay.
150
+            """
151
+            call_retryable_function = functools.partial(function, *args, **kwargs)
152
+
153
+            for delay in backoff_iterator:
154
+                try:
155
+                    return call_retryable_function()
156
+                except Exception as e:
157
+                    if not should_retry_error(e):
158
+                        raise
159
+                time.sleep(delay)
160
+
161
+            # Only or final attempt
162
+            return call_retryable_function()
163
+        return run_function
164
+    return function_wrapper
... ...
@@ -727,9 +727,10 @@ def test_get_collection_versions(api_version, token_type, token_ins, response, m
727 727
     actual = api.get_collection_versions('namespace', 'collection')
728 728
     assert actual == [u'1.0.0', u'1.0.1']
729 729
 
730
+    page_query = '?limit=100' if api_version == 'v3' else '?page_size=100'
730 731
     assert mock_open.call_count == 1
731 732
     assert mock_open.mock_calls[0][1][0] == 'https://galaxy.server.com/api/%s/collections/namespace/collection/' \
732
-                                            'versions/' % api_version
733
+                                            'versions/%s' % (api_version, page_query)
733 734
     if token_ins:
734 735
         assert mock_open.mock_calls[0][2]['headers']['Authorization'] == '%s my token' % token_type
735 736
 
... ...
@@ -738,9 +739,9 @@ def test_get_collection_versions(api_version, token_type, token_ins, response, m
738 738
     ('v2', None, None, [
739 739
         {
740 740
             'count': 6,
741
-            'next': 'https://galaxy.server.com/api/v2/collections/namespace/collection/versions/?page=2',
741
+            'next': 'https://galaxy.server.com/api/v2/collections/namespace/collection/versions/?page=2&page_size=100',
742 742
             'previous': None,
743
-            'results': [
743
+            'results': [  # Pay no mind, using more manageable results than page_size would indicate
744 744
                 {
745 745
                     'version': '1.0.0',
746 746
                     'href': 'https://galaxy.server.com/api/v2/collections/namespace/collection/versions/1.0.0',
... ...
@@ -753,7 +754,7 @@ def test_get_collection_versions(api_version, token_type, token_ins, response, m
753 753
         },
754 754
         {
755 755
             'count': 6,
756
-            'next': 'https://galaxy.server.com/api/v2/collections/namespace/collection/versions/?page=3',
756
+            'next': 'https://galaxy.server.com/api/v2/collections/namespace/collection/versions/?page=3&page_size=100',
757 757
             'previous': 'https://galaxy.server.com/api/v2/collections/namespace/collection/versions',
758 758
             'results': [
759 759
                 {
... ...
@@ -769,7 +770,7 @@ def test_get_collection_versions(api_version, token_type, token_ins, response, m
769 769
         {
770 770
             'count': 6,
771 771
             'next': None,
772
-            'previous': 'https://galaxy.server.com/api/v2/collections/namespace/collection/versions/?page=2',
772
+            'previous': 'https://galaxy.server.com/api/v2/collections/namespace/collection/versions/?page=2&page_size=100',
773 773
             'results': [
774 774
                 {
775 775
                     'version': '1.0.4',
... ...
@@ -786,7 +787,8 @@ def test_get_collection_versions(api_version, token_type, token_ins, response, m
786 786
         {
787 787
             'count': 6,
788 788
             'links': {
789
-                'next': '/api/v3/collections/namespace/collection/versions/?page=2',
789
+                # v3 links are relative and the limit is included during pagination
790
+                'next': '/api/v3/collections/namespace/collection/versions/?limit=100&offset=100',
790 791
                 'previous': None,
791 792
             },
792 793
             'data': [
... ...
@@ -803,7 +805,7 @@ def test_get_collection_versions(api_version, token_type, token_ins, response, m
803 803
         {
804 804
             'count': 6,
805 805
             'links': {
806
-                'next': '/api/v3/collections/namespace/collection/versions/?page=3',
806
+                'next': '/api/v3/collections/namespace/collection/versions/?limit=100&offset=200',
807 807
                 'previous': '/api/v3/collections/namespace/collection/versions',
808 808
             },
809 809
             'data': [
... ...
@@ -821,7 +823,7 @@ def test_get_collection_versions(api_version, token_type, token_ins, response, m
821 821
             'count': 6,
822 822
             'links': {
823 823
                 'next': None,
824
-                'previous': '/api/v3/collections/namespace/collection/versions/?page=2',
824
+                'previous': '/api/v3/collections/namespace/collection/versions/?limit=100&offset=100',
825 825
             },
826 826
             'data': [
827 827
                 {
... ...
@@ -852,12 +854,22 @@ def test_get_collection_versions_pagination(api_version, token_type, token_ins,
852 852
     assert actual == [u'1.0.0', u'1.0.1', u'1.0.2', u'1.0.3', u'1.0.4', u'1.0.5']
853 853
 
854 854
     assert mock_open.call_count == 3
855
+
856
+    if api_version == 'v3':
857
+        query_1 = 'limit=100'
858
+        query_2 = 'limit=100&offset=100'
859
+        query_3 = 'limit=100&offset=200'
860
+    else:
861
+        query_1 = 'page_size=100'
862
+        query_2 = 'page=2&page_size=100'
863
+        query_3 = 'page=3&page_size=100'
864
+
855 865
     assert mock_open.mock_calls[0][1][0] == 'https://galaxy.server.com/api/%s/collections/namespace/collection/' \
856
-                                            'versions/' % api_version
866
+                                            'versions/?%s' % (api_version, query_1)
857 867
     assert mock_open.mock_calls[1][1][0] == 'https://galaxy.server.com/api/%s/collections/namespace/collection/' \
858
-                                            'versions/?page=2' % api_version
868
+                                            'versions/?%s' % (api_version, query_2)
859 869
     assert mock_open.mock_calls[2][1][0] == 'https://galaxy.server.com/api/%s/collections/namespace/collection/' \
860
-                                            'versions/?page=3' % api_version
870
+                                            'versions/?%s' % (api_version, query_3)
861 871
 
862 872
     if token_type:
863 873
         assert mock_open.mock_calls[0][2]['headers']['Authorization'] == '%s my token' % token_type
... ...
@@ -7,11 +7,19 @@ from __future__ import (absolute_import, division, print_function)
7 7
 __metaclass__ = type
8 8
 
9 9
 
10
-from ansible.module_utils.api import rate_limit, retry
10
+from ansible.module_utils.api import rate_limit, retry, retry_with_delays_and_condition
11 11
 
12 12
 import pytest
13 13
 
14 14
 
15
+class CustomException(Exception):
16
+    pass
17
+
18
+
19
+class CustomBaseException(BaseException):
20
+    pass
21
+
22
+
15 23
 class TestRateLimit:
16 24
 
17 25
     def test_ratelimit(self):
... ...
@@ -26,17 +34,16 @@ class TestRateLimit:
26 26
 class TestRetry:
27 27
 
28 28
     def test_no_retry_required(self):
29
-        self.counter = 0
30
-
31 29
         @retry(retries=4, retry_pause=2)
32 30
         def login_database():
33
-            self.counter += 1
31
+            login_database.counter += 1
34 32
             return 'success'
35 33
 
34
+        login_database.counter = 0
36 35
         r = login_database()
37 36
 
38 37
         assert r == 'success'
39
-        assert self.counter == 1
38
+        assert login_database.counter == 1
40 39
 
41 40
     def test_catch_exception(self):
42 41
 
... ...
@@ -44,5 +51,71 @@ class TestRetry:
44 44
         def login_database():
45 45
             return 'success'
46 46
 
47
-        with pytest.raises(Exception):
47
+        with pytest.raises(Exception, match="Retry"):
48 48
             login_database()
49
+
50
+    def test_no_retries(self):
51
+
52
+        @retry()
53
+        def login_database():
54
+            assert False, "Should not execute"
55
+
56
+        login_database()
57
+
58
+
59
+class TestRetryWithDelaysAndCondition:
60
+
61
+    def test_empty_retry_iterator(self):
62
+        @retry_with_delays_and_condition(backoff_iterator=[])
63
+        def login_database():
64
+            login_database.counter += 1
65
+
66
+        login_database.counter = 0
67
+        r = login_database()
68
+        assert login_database.counter == 1
69
+
70
+    def test_no_retry_exception(self):
71
+        @retry_with_delays_and_condition(
72
+            backoff_iterator=[1],
73
+            should_retry_error=lambda x: False,
74
+        )
75
+        def login_database():
76
+            login_database.counter += 1
77
+            if login_database.counter == 1:
78
+                raise CustomException("Error")
79
+
80
+        login_database.counter = 0
81
+        with pytest.raises(CustomException, match="Error"):
82
+            login_database()
83
+        assert login_database.counter == 1
84
+
85
+    def test_no_retry_baseexception(self):
86
+        @retry_with_delays_and_condition(
87
+            backoff_iterator=[1],
88
+            should_retry_error=lambda x: True,  # Retry all exceptions inheriting from Exception
89
+        )
90
+        def login_database():
91
+            login_database.counter += 1
92
+            if login_database.counter == 1:
93
+                # Raise an exception inheriting from BaseException
94
+                raise CustomBaseException("Error")
95
+
96
+        login_database.counter = 0
97
+        with pytest.raises(CustomBaseException, match="Error"):
98
+            login_database()
99
+        assert login_database.counter == 1
100
+
101
+    def test_retry_exception(self):
102
+        @retry_with_delays_and_condition(
103
+            backoff_iterator=[1],
104
+            should_retry_error=lambda x: isinstance(x, CustomException),
105
+        )
106
+        def login_database():
107
+            login_database.counter += 1
108
+            if login_database.counter == 1:
109
+                raise CustomException("Retry")
110
+            return 'success'
111
+
112
+        login_database.counter = 0
113
+        assert login_database() == 'success'
114
+        assert login_database.counter == 2