Browse code

python3: CVE-2019-9636

Change-Id: I1c5283b9cf096946af4779bb3e362a07d6e87a06
Reviewed-on: http://photon-jenkins.eng.vmware.com:8082/6858
Tested-by: gerrit-photon <photon-checkins@vmware.com>
Reviewed-by: Anish Swaminathan <anishs@vmware.com>

Tapas Kundu authored on 2019/03/13 23:29:59
Showing 2 changed files
1 1
new file mode 100644
... ...
@@ -0,0 +1,142 @@
0
+commit 23fc0416454c4ad5b9b23d520fbe6d89be3efc24
1
+Author: Steve Dower <steve.dower@microsoft.com>
2
+Date:   Mon Mar 11 21:34:03 2019 -0700
3
+
4
+    [3.6] bpo-36216: Add check for characters in netloc that normalize to separators (GH-12201) (GH-12215)
5
+
6
+diff --git a/Doc/library/urllib.parse.rst b/Doc/library/urllib.parse.rst
7
+index d991254..647af61 100644
8
+--- a/Doc/library/urllib.parse.rst
9
+@@ -121,6 +121,11 @@ or on combining URL components into a URL string.
10
+    Unmatched square brackets in the :attr:`netloc` attribute will raise a
11
+    :exc:`ValueError`.
12
+ 
13
++   Characters in the :attr:`netloc` attribute that decompose under NFKC
14
++   normalization (as used by the IDNA encoding) into any of ``/``, ``?``,
15
++   ``#``, ``@``, or ``:`` will raise a :exc:`ValueError`. If the URL is
16
++   decomposed before parsing, no error will be raised.
17
++
18
+    .. versionchanged:: 3.2
19
+       Added IPv6 URL parsing capabilities.
20
+
21
+@@ -138,6 +138,9 @@ or on combining URL components into a UR
22
+       Out-of-range port numbers now raise :exc:`ValueError`, instead of
23
+       returning :const:`None`.
24
+ 
25
++   .. versionchanged:: 3.6.9
26
++      Characters that affect netloc parsing under NFKC normalization will
27
++      now raise :exc:`ValueError`.
28
+ 
29
+ .. function:: parse_qs(qs, keep_blank_values=False, strict_parsing=False, encoding='utf-8', errors='replace')
30
+
31
+ 
32
+@@ -256,10 +265,19 @@ or on combining URL components into a URL string.
33
+    Unmatched square brackets in the :attr:`netloc` attribute will raise a
34
+    :exc:`ValueError`.
35
+ 
36
++   Characters in the :attr:`netloc` attribute that decompose under NFKC
37
++   normalization (as used by the IDNA encoding) into any of ``/``, ``?``,
38
++   ``#``, ``@``, or ``:`` will raise a :exc:`ValueError`. If the URL is
39
++   decomposed before parsing, no error will be raised.
40
++
41
+    .. versionchanged:: 3.6
42
+       Out-of-range port numbers now raise :exc:`ValueError`, instead of
43
+       returning :const:`None`.
44
+ 
45
++   .. versionchanged:: 3.6.9
46
++      Characters that affect netloc parsing under NFKC normalization will
47
++      now raise :exc:`ValueError`.
48
++
49
+ 
50
+ .. function:: urlunsplit(parts)
51
+ 
52
+diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py
53
+index be50b47..e6638ae 100644
54
+--- a/Lib/test/test_urlparse.py
55
+@@ -1,3 +1,5 @@
56
++import sys
57
++import unicodedata
58
+ import unittest
59
+ import urllib.parse
60
+ 
61
+@@ -984,6 +986,27 @@ class UrlParseTestCase(unittest.TestCase):
62
+                 expected.append(name)
63
+         self.assertCountEqual(urllib.parse.__all__, expected)
64
+ 
65
++    def test_urlsplit_normalization(self):
66
++        # Certain characters should never occur in the netloc,
67
++        # including under normalization.
68
++        # Ensure that ALL of them are detected and cause an error
69
++        illegal_chars = '/:#?@'
70
++        hex_chars = {'{:04X}'.format(ord(c)) for c in illegal_chars}
71
++        denorm_chars = [
72
++            c for c in map(chr, range(128, sys.maxunicode))
73
++            if (hex_chars & set(unicodedata.decomposition(c).split()))
74
++            and c not in illegal_chars
75
++        ]
76
++        # Sanity check that we found at least one such character
77
++        self.assertIn('\u2100', denorm_chars)
78
++        self.assertIn('\uFF03', denorm_chars)
79
++
80
++        for scheme in ["http", "https", "ftp"]:
81
++            for c in denorm_chars:
82
++                url = "{}://netloc{}false.netloc/path".format(scheme, c)
83
++                with self.subTest(url=url, char='{:04X}'.format(ord(c))):
84
++                    with self.assertRaises(ValueError):
85
++                        urllib.parse.urlsplit(url)
86
+ 
87
+ class Utility_Tests(unittest.TestCase):
88
+     """Testcase to test the various utility functions in the urllib."""
89
+diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py
90
+index 85e68c8..7b06f4d 100644
91
+--- a/Lib/urllib/parse.py
92
+@@ -391,6 +391,21 @@ def _splitnetloc(url, start=0):
93
+             delim = min(delim, wdelim)     # use earliest delim position
94
+     return url[start:delim], url[delim:]   # return (domain, rest)
95
+ 
96
++def _checknetloc(netloc):
97
++    if not netloc or not any(ord(c) > 127 for c in netloc):
98
++        return
99
++    # looking for characters like \u2100 that expand to 'a/c'
100
++    # IDNA uses NFKC equivalence, so normalize for this check
101
++    import unicodedata
102
++    netloc2 = unicodedata.normalize('NFKC', netloc)
103
++    if netloc == netloc2:
104
++        return
105
++    _, _, netloc = netloc.rpartition('@') # anything to the left of '@' is okay
106
++    for c in '/?#@:':
107
++        if c in netloc2:
108
++            raise ValueError("netloc '" + netloc2 + "' contains invalid " +
109
++                             "characters under NFKC normalization")
110
++
111
+ def urlsplit(url, scheme='', allow_fragments=True):
112
+     """Parse a URL into 5 components:
113
+     <scheme>://<netloc>/<path>?<query>#<fragment>
114
+@@ -420,6 +435,7 @@ def urlsplit(url, scheme='', allow_fragments=True):
115
+                 url, fragment = url.split('#', 1)
116
+             if '?' in url:
117
+                 url, query = url.split('?', 1)
118
++            _checknetloc(netloc)
119
+             v = SplitResult(scheme, netloc, url, query, fragment)
120
+             _parse_cache[key] = v
121
+             return _coerce_result(v)
122
+@@ -443,6 +459,7 @@ def urlsplit(url, scheme='', allow_fragments=True):
123
+         url, fragment = url.split('#', 1)
124
+     if '?' in url:
125
+         url, query = url.split('?', 1)
126
++    _checknetloc(netloc)
127
+     v = SplitResult(scheme, netloc, url, query, fragment)
128
+     _parse_cache[key] = v
129
+     return _coerce_result(v)
130
+diff --git a/Misc/NEWS.d/next/Security/2019-03-06-09-38-40.bpo-36216.6q1m4a.rst b/Misc/NEWS.d/next/Security/2019-03-06-09-38-40.bpo-36216.6q1m4a.rst
131
+new file mode 100644
132
+index 0000000..5546394
133
+--- /dev/null
134
+@@ -0,0 +1,3 @@
135
++Changes urlsplit() to raise ValueError when the URL contains characters that
136
++decompose under IDNA encoding (NFKC-normalization) into characters that
137
++affect how the URL is parsed.
... ...
@@ -1,7 +1,7 @@
1 1
 Summary:        A high-level scripting language
2 2
 Name:           python3
3 3
 Version:        3.6.5
4
-Release:        4%{?dist}
4
+Release:        5%{?dist}
5 5
 License:        PSF
6 6
 URL:            http://www.python.org/
7 7
 Group:          System Environment/Programming
... ...
@@ -14,6 +14,7 @@ Patch1:         python3-support-photon-platform.patch
14 14
 Patch2:         python3-CVE-2017-18207.patch
15 15
 Patch3:         python3-CVE-2018-14647.patch
16 16
 Patch4:         python3-CVE-2018-20406.patch
17
+Patch5:         python3-CVE-2019-9636.patch
17 18
 BuildRequires:  pkg-config >= 0.28
18 19
 BuildRequires:  bzip2-devel
19 20
 BuildRequires:  ncurses-devel
... ...
@@ -138,7 +139,7 @@ The test package contains all regression tests for Python as well as the modules
138 138
 %patch2 -p1
139 139
 %patch3 -p1
140 140
 %patch4 -p1
141
-
141
+%patch5 -p1
142 142
 
143 143
 %build
144 144
 export OPT="${CFLAGS}"
... ...
@@ -268,6 +269,8 @@ rm -rf %{buildroot}/*
268 268
 %{_libdir}/python3.6/test/*
269 269
 
270 270
 %changelog
271
+*   Wed Mar 13 2019 Tapas Kundu <tkundu@vmware.com> 3.6.5-5
272
+-   Fix for CVE-2019-9636.patch
271 273
 *   Mon Feb 11 2019 Tapas Kundu <tkundu@vmware.com> 3.6.5-4
272 274
 -   Fix for CVE-2018-20406
273 275
 *   Mon Dec 31 2018 Tapas Kundu <tkundu@vmware.com> 3.6.5-3