Browse code

Refactoring split_args into sub-functions

James Cammarata authored on 2014/07/25 10:00:57
Showing 1 changed files
... ...
@@ -15,6 +15,39 @@
15 15
 # You should have received a copy of the GNU General Public License
16 16
 # along with Ansible.  If not, see <http://www.gnu.org/licenses/>.
17 17
 
18
+def _get_quote_state(token, quote_char):
19
+    '''
20
+    the goal of this block is to determine if the quoted string
21
+    is unterminated in which case it needs to be put back together
22
+    '''
23
+    # the char before the current one, used to see if
24
+    # the current character is escaped
25
+    prev_char = None
26
+    for idx, cur_char in enumerate(token):
27
+        if idx > 0:
28
+            prev_char = token[idx-1]
29
+        if cur_char in '"\'':
30
+            if quote_char:
31
+                if cur_char == quote_char and prev_char != '\\':
32
+                    quote_char = None
33
+            else:
34
+                quote_char = cur_char
35
+    return quote_char
36
+
37
+def _count_jinja2_blocks(token, cur_depth, open_token, close_token):
38
+    '''
39
+    this function counts the number of opening/closing blocks for a
40
+    given opening/closing type and adjusts the current depth for that
41
+    block based on the difference
42
+    '''
43
+    num_open  = token.count(open_token)
44
+    num_close = token.count(close_token)
45
+    if num_open != num_close:
46
+        cur_depth += (num_open - num_close)
47
+        if cur_depth < 0:
48
+            cur_depth = 0
49
+    return cur_depth
50
+
18 51
 def split_args(args):
19 52
     '''
20 53
     Splits args on whitespace, but intelligently reassembles
... ...
@@ -24,15 +57,13 @@ def split_args(args):
24 24
     jinja2 blocks, however this function is/will be used in the
25 25
     core portions as well before the args are templated.
26 26
 
27
-    example input: a=b c=d
28
-    example output: dict(a='b', c='d')
27
+    example input: a=b c="foo bar"
28
+    example output: ['a=b', 'c="foo bar"']
29 29
 
30 30
     Basically this is a variation shlex that has some more intelligence for
31 31
     how Ansible needs to use it.
32 32
     '''
33 33
 
34
-    # FIXME: refactoring into smaller functions
35
-
36 34
     # the list of params parsed out of the arg string
37 35
     # this is going to be the result value when we are donei
38 36
     params = []
... ...
@@ -40,52 +71,32 @@ def split_args(args):
40 40
     # here we encode the args, so we have a uniform charset to
41 41
     # work with, and split on white space
42 42
     args = args.encode('utf-8')
43
-    items = args.split()
43
+    tokens = args.split()
44 44
 
45
-    # iterate over the items, and reassemble any that may have been
46
-    # split on a space inside a jinja2 block. 
45
+    # iterate over the tokens, and reassemble any that may have been
46
+    # split on a space inside a jinja2 block.
47 47
     # ex if tokens are "{{", "foo", "}}" these go together
48 48
 
49 49
     # These variables are used
50 50
     # to keep track of the state of the parsing, since blocks and quotes
51 51
     # may be nested within each other.
52 52
 
53
-    inside_quotes = False
54 53
     quote_char = None
55
-    split_print_depth = 0
56
-    split_block_depth = 0
57
-    split_comment_depth = 0
54
+    inside_quotes = False
55
+    print_depth   = 0 # used to count nested jinja2 {{ }} blocks
56
+    block_depth   = 0 # used to count nested jinja2 {% %} blocks
57
+    comment_depth = 0 # used to count nested jinja2 {# #} blocks
58 58
 
59
-    # now we loop over each split item, coalescing items if the white space
59
+    # now we loop over each split token, coalescing tokens if the white space
60 60
     # split occurred within quotes or a jinja2 block of some kind
61
+    for token in tokens:
61 62
 
62
-    for item in items:
63
-
64
-        item = item.strip()
63
+        token = token.strip()
65 64
 
66 65
         # store the previous quoting state for checking later
67 66
         was_inside_quotes = inside_quotes
68
-
69
-        # determine the current quoting state
70
-        # the goal of this block is to determine if the quoted string
71
-        # is unterminated in which case it needs to be put back together
72
-
73
-        bc = None # before_char
74
-        for i in range(0, len(item)):  # use enumerate
75
-
76
-            c = item[i]  # current_char
77
-
78
-            if i > 0:
79
-                bc = item[i-1]
80
-
81
-            if c in ('"', "'"):
82
-                if inside_quotes:
83
-                    if c == quote_char and bc != '\\':
84
-                        inside_quotes = False
85
-                        quote_char = None
86
-                else:
87
-                    inside_quotes = True
88
-                    quote_char = c
67
+        quote_char = _get_quote_state(token, quote_char)
68
+        inside_quotes = quote_char is not None
89 69
 
90 70
         # multiple conditions may append a token to the list of params,
91 71
         # so we keep track with this flag to make sure it only happens once
... ...
@@ -93,69 +104,45 @@ def split_args(args):
93 93
         # it to the end of the last token
94 94
         appended = False
95 95
 
96
-        # if we're inside quotes now, but weren't before, append the item
96
+        # if we're inside quotes now, but weren't before, append the token 
97 97
         # to the end of the list, since we'll tack on more to it later
98
-
98
+        # otherwise, if we're inside any jinja2 block, inside quotes, or we were
99
+        # inside quotes (but aren't now) concat this token to the last param
99 100
         if inside_quotes and not was_inside_quotes:
100
-            params.append(item)
101
+            params.append(token)
101 102
             appended = True
102
-
103
-        # otherwise, if we're inside any jinja2 block, inside quotes, or we were
104
-        # inside quotes (but aren't now) concat this item to the last param
105
-        # FIXME: just or these all together
106
-        elif (split_print_depth or split_block_depth or split_comment_depth or inside_quotes or was_inside_quotes):
107
-            params[-1] = "%s %s" % (params[-1], item)
103
+        elif print_depth or block_depth or comment_depth or inside_quotes or was_inside_quotes:
104
+            params[-1] = "%s %s" % (params[-1], token)
108 105
             appended = True
109 106
 
110
-        # these variables are used to determine the current depth of each jinja2
111
-        # block type, by counting the number of openings and closing tags
112
-        # FIXME: assumes Jinja2 seperators aren't changeable (also true elsewhere in ansible ATM)
107
+        # if the number of paired block tags is not the same, the depth has changed, so we calculate that here
108
+        # and may append the current token to the params (if we haven't previously done so)
109
+        prev_print_depth = print_depth
110
+        print_depth = _count_jinja2_blocks(token, print_depth, "{{", "}}")
111
+        if print_depth != prev_print_depth and not appended:
112
+            params.append(token)
113
+            appended = True
113 114
 
114
-        num_print_open    = item.count('{{')
115
-        num_print_close   = item.count('}}')
116
-        num_block_open    = item.count('{%')
117
-        num_block_close   = item.count('%}')
118
-        num_comment_open  = item.count('{#')
119
-        num_comment_close = item.count('#}')
115
+        prev_block_depth = block_depth
116
+        block_depth = _count_jinja2_blocks(token, block_depth, "{%", "%}")
117
+        if block_depth != prev_block_depth and not appended:
118
+            params.append(token)
119
+            appended = True
120 120
 
121
-        # if the number of paired block tags is not the same, the depth has changed, so we calculate that here
122
-        # and may append the current item to the params (if we haven't previously done so)
123
-
124
-        # FIXME: DRY a bit
125
-        if num_print_open != num_print_close:
126
-            split_print_depth += (num_print_open - num_print_close)
127
-            if not appended:
128
-                params.append(item)
129
-                appended = True
130
-            if split_print_depth < 0:
131
-                split_print_depth = 0
132
-
133
-        if num_block_open != num_block_close:
134
-            split_block_depth += (num_block_open - num_block_close)
135
-            if not appended:
136
-                params.append(item)
137
-                appended = True
138
-            if split_block_depth < 0:
139
-                split_block_depth = 0
140
-
141
-        if num_comment_open != num_comment_close:
142
-            split_comment_depth += (num_comment_open - num_comment_close)
143
-            if not appended:
144
-                params.append(item)
145
-                appended = True
146
-            if split_comment_depth < 0:
147
-                split_comment_depth = 0
121
+        prev_comment_depth = comment_depth
122
+        comment_depth = _count_jinja2_blocks(token, comment_depth, "{#", "#}")
123
+        if comment_depth != prev_comment_depth and not appended:
124
+            params.append(token)
125
+            appended = True
148 126
 
149 127
         # finally, if we're at zero depth for all blocks and not inside quotes, and have not
150 128
         # yet appended anything to the list of params, we do so now
151
-
152
-        if not (split_print_depth or split_block_depth or split_comment_depth) and not inside_quotes and not appended:
153
-            params.append(item)
129
+        if not (print_depth or block_depth or comment_depth) and not inside_quotes and not appended:
130
+            params.append(token)
154 131
 
155 132
     # If we're done and things are not at zero depth or we're still inside quotes,
156 133
     # raise an error to indicate that the args were unbalanced
157
-
158
-    if (split_print_depth or split_block_depth or split_comment_depth) or inside_quotes:
134
+    if print_depth or block_depth or comment_depth or inside_quotes:
159 135
         raise Exception("error while splitting arguments, either an unbalanced jinja2 block or quotes")
160 136
 
161 137
     # finally, we decode each param back to the unicode it was in the arg string