Browse code

python2: CVE-2019-9636

Change-Id: If8adffb57b0b28045fe41c18fb7fbd77536edf47
Reviewed-on: http://photon-jenkins.eng.vmware.com:8082/6853
Tested-by: gerrit-photon <photon-checkins@vmware.com>
Reviewed-by: Anish Swaminathan <anishs@vmware.com>

Tapas Kundu authored on 2019/03/13 10:32:06
Showing 2 changed files
1 1
new file mode 100644
... ...
@@ -0,0 +1,146 @@
0
+commit e37ef41289b77e0f0bb9a6aedb0360664c55bdd5
1
+Author: Steve Dower <steve.dower@microsoft.com>
2
+Date:   Thu Mar 7 09:08:45 2019 -0800
3
+
4
+    bpo-36216: Add check for characters in netloc that normalize to separators (GH-12201)
5
+
6
+diff --git a/Doc/library/urlparse.rst b/Doc/library/urlparse.rst
7
+index 22249da..0989c88 100644
8
+--- a/Doc/library/urlparse.rst
9
+@@ -119,12 +119,21 @@ The :mod:`urlparse` module defines the f
10
+    See section :ref:`urlparse-result-object` for more information on the result
11
+    object.
12
+ 
13
++   Characters in the :attr:`netloc` attribute that decompose under NFKC
14
++   normalization (as used by the IDNA encoding) into any of ``/``, ``?``,
15
++   ``#``, ``@``, or ``:`` will raise a :exc:`ValueError`. If the URL is
16
++   decomposed before parsing, or is not a Unicode string, no error will be
17
++   raised.
18
++
19
+    .. versionchanged:: 2.5
20
+       Added attributes to return value.
21
+ 
22
+    .. versionchanged:: 2.7
23
+       Added IPv6 URL parsing capabilities.
24
+ 
25
++   .. versionchanged:: 2.7.17
26
++      Characters that affect netloc parsing under NFKC normalization will
27
++      now raise :exc:`ValueError`.
28
+ 
29
+ .. function:: parse_qs(qs[, keep_blank_values[, strict_parsing]])
30
+ 
31
+@@ -232,11 +242,21 @@ The :mod:`urlparse` module defines the following functions:
32
+    See section :ref:`urlparse-result-object` for more information on the result
33
+    object.
34
+ 
35
++   Characters in the :attr:`netloc` attribute that decompose under NFKC
36
++   normalization (as used by the IDNA encoding) into any of ``/``, ``?``,
37
++   ``#``, ``@``, or ``:`` will raise a :exc:`ValueError`. If the URL is
38
++   decomposed before parsing, or is not a Unicode string, no error will be
39
++   raised.
40
++
41
+    .. versionadded:: 2.2
42
+ 
43
+    .. versionchanged:: 2.5
44
+       Added attributes to return value.
45
+ 
46
++   .. versionchanged:: 2.7.17
47
++      Characters that affect netloc parsing under NFKC normalization will
48
++      now raise :exc:`ValueError`.
49
++
50
+ 
51
+ .. function:: urlunsplit(parts)
52
+ 
53
+diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py
54
+index 4e1ded7..73b0228 100644
55
+--- a/Lib/test/test_urlparse.py
56
+@@ -1,4 +1,6 @@
57
+ from test import test_support
58
++import sys
59
++import unicodedata
60
+ import unittest
61
+ import urlparse
62
+ 
63
+@@ -624,6 +626,28 @@ class UrlParseTestCase(unittest.TestCase):
64
+         self.assertEqual(urlparse.urlparse("http://www.python.org:80"),
65
+                 ('http','www.python.org:80','','','',''))
66
+ 
67
++    def test_urlsplit_normalization(self):
68
++        # Certain characters should never occur in the netloc,
69
++        # including under normalization.
70
++        # Ensure that ALL of them are detected and cause an error
71
++        illegal_chars = u'/:#?@'
72
++        hex_chars = {'{:04X}'.format(ord(c)) for c in illegal_chars}
73
++        denorm_chars = [
74
++            c for c in map(unichr, range(128, sys.maxunicode))
75
++            if (hex_chars & set(unicodedata.decomposition(c).split()))
76
++            and c not in illegal_chars
77
++        ]
78
++        # Sanity check that we found at least one such character
79
++        self.assertIn(u'\u2100', denorm_chars)
80
++        self.assertIn(u'\uFF03', denorm_chars)
81
++
82
++        for scheme in [u"http", u"https", u"ftp"]:
83
++            for c in denorm_chars:
84
++                url = u"{}://netloc{}false.netloc/path".format(scheme, c)
85
++                print "Checking %r" % url
86
++                with self.assertRaises(ValueError):
87
++                    urlparse.urlsplit(url)
88
++
89
+ def test_main():
90
+     test_support.run_unittest(UrlParseTestCase)
91
+ 
92
+diff --git a/Lib/urlparse.py b/Lib/urlparse.py
93
+index f7c2b03..54eda08 100644
94
+--- a/Lib/urlparse.py
95
+@@ -165,6 +165,21 @@ def _splitnetloc(url, start=0):
96
+             delim = min(delim, wdelim)     # use earliest delim position
97
+     return url[start:delim], url[delim:]   # return (domain, rest)
98
+ 
99
++def _checknetloc(netloc):
100
++    if not netloc or not isinstance(netloc, unicode):
101
++        return
102
++    # looking for characters like \u2100 that expand to 'a/c'
103
++    # IDNA uses NFKC equivalence, so normalize for this check
104
++    import unicodedata
105
++    netloc2 = unicodedata.normalize('NFKC', netloc)
106
++    if netloc == netloc2:
107
++        return
108
++    _, _, netloc = netloc.rpartition('@') # anything to the left of '@' is okay
109
++    for c in '/?#@:':
110
++        if c in netloc2:
111
++            raise ValueError("netloc '" + netloc2 + "' contains invalid " +
112
++                             "characters under NFKC normalization")
113
++
114
+ def urlsplit(url, scheme='', allow_fragments=True):
115
+     """Parse a URL into 5 components:
116
+     <scheme>://<netloc>/<path>?<query>#<fragment>
117
+@@ -193,6 +208,7 @@ def urlsplit(url, scheme='', allow_fragments=True):
118
+                 url, fragment = url.split('#', 1)
119
+             if '?' in url:
120
+                 url, query = url.split('?', 1)
121
++            _checknetloc(netloc)
122
+             v = SplitResult(scheme, netloc, url, query, fragment)
123
+             _parse_cache[key] = v
124
+             return v
125
+@@ -216,6 +232,7 @@ def urlsplit(url, scheme='', allow_fragments=True):
126
+         url, fragment = url.split('#', 1)
127
+     if '?' in url:
128
+         url, query = url.split('?', 1)
129
++    _checknetloc(netloc)
130
+     v = SplitResult(scheme, netloc, url, query, fragment)
131
+     _parse_cache[key] = v
132
+     return v
133
+diff --git a/Misc/NEWS.d/next/Security/2019-03-06-09-38-40.bpo-36216.6q1m4a.rst b/Misc/NEWS.d/next/Security/2019-03-06-09-38-40.bpo-36216.6q1m4a.rst
134
+new file mode 100644
135
+index 0000000..1e1ad92
136
+--- /dev/null
137
+@@ -0,0 +1,3 @@
138
++Changes urlsplit() to raise ValueError when the URL contains characters that
139
++decompose under IDNA encoding (NFKC-normalization) into characters that
140
++affect how the URL is parsed.
141
+\ No newline at end of file
... ...
@@ -1,7 +1,7 @@
1 1
 Summary:        A high-level scripting language
2 2
 Name:           python2
3 3
 Version:        2.7.15
4
-Release:        3%{?dist}
4
+Release:        4%{?dist}
5 5
 License:        PSF
6 6
 URL:            http://www.python.org/
7 7
 Group:          System Environment/Programming
... ...
@@ -13,6 +13,7 @@ Patch0:         cgi.patch
13 13
 Patch1:         added-pyopenssl-ipaddress-certificate-validation.patch
14 14
 Patch2:         python2-support-photon-platform.patch
15 15
 Patch3:         CVE-2018-14647.patch
16
+Patch4:         CVE-2019-9636.patch
16 17
 BuildRequires:  pkg-config >= 0.28
17 18
 BuildRequires:  bzip2-devel
18 19
 BuildRequires:  openssl-devel
... ...
@@ -114,6 +115,7 @@ The test package contains all regression tests for Python as well as the modules
114 114
 %patch1 -p1
115 115
 %patch2 -p1
116 116
 %patch3 -p1
117
+%patch4 -p1
117 118
 
118 119
 %build
119 120
 export OPT="${CFLAGS}"
... ...
@@ -236,6 +238,8 @@ make test
236 236
 %{_libdir}/python2.7/test/*
237 237
 
238 238
 %changelog
239
+*   Tue Mar 12 2019 Tapas Kundu <tkundu@vmware.com> 2.7.15-4
240
+-   Added fix for CVE-2019-9636
239 241
 *   Fri Dec 21 2018 Tapas Kundu <tkundu@vmware.com> 2.7.15-3
240 242
 -   Fix for CVE-2018-14647
241 243
 *   Mon Sep 17 2018 Dweep Advani <dadvani@vmware.com> 2.7.15-2