1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135
|
From: Seth Michael Larson <seth@python.org>
Date: Fri, 31 Jan 2025 11:41:34 -0600
Subject: gh-105704: Disallow square brackets (`[` and `]`) in domain names
for parsed URLs (GH-129418)
* gh-105704: Disallow square brackets ( and ) in domain names for parsed URLs
* Use Sphinx references
Co-authored-by: Peter Bierma <zintensitydev@gmail.com>
* Add mismatched bracket test cases, fix news format
* Add more test coverage for ports
---------
(cherry picked from commit d89a5f6a6e65511a5f6e0618c4c30a7aa5aba56a)
Co-authored-by: Seth Michael Larson <seth@python.org>
Co-authored-by: Peter Bierma <zintensitydev@gmail.com>
Origin: upstream, https://github.com/python/cpython/pull/129528
---
.../2025-01-28-14-08-03.gh-issue-105704.EnhHxu.rst | 4 +++
lib-python/3/test/test_urlparse.py | 37 +++++++++++++++++++++-
lib-python/3/urllib/parse.py | 20 ++++++++++--
3 files changed, 58 insertions(+), 3 deletions(-)
create mode 100644 Misc/NEWS.d/next/Security/2025-01-28-14-08-03.gh-issue-105704.EnhHxu.rst
diff --git a/Misc/NEWS.d/next/Security/2025-01-28-14-08-03.gh-issue-105704.EnhHxu.rst b/Misc/NEWS.d/next/Security/2025-01-28-14-08-03.gh-issue-105704.EnhHxu.rst
new file mode 100644
index 0000000..bff1bc6
--- /dev/null
+++ b/Misc/NEWS.d/next/Security/2025-01-28-14-08-03.gh-issue-105704.EnhHxu.rst
@@ -0,0 +1,4 @@
+When using :func:`urllib.parse.urlsplit` and :func:`urllib.parse.urlparse` host
+parsing would not reject domain names containing square brackets (``[`` and
+``]``). Square brackets are only valid for IPv6 and IPvFuture hosts according to
+`RFC 3986 Section 3.2.2 <https://www.rfc-editor.org/rfc/rfc3986#section-3.2.2>`__.
diff --git a/lib-python/3/test/test_urlparse.py b/lib-python/3/test/test_urlparse.py
index 2376dad..a283063 100644
--- a/lib-python/3/test/test_urlparse.py
+++ b/lib-python/3/test/test_urlparse.py
@@ -1224,16 +1224,51 @@ class UrlParseTestCase(unittest.TestCase):
self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[0439:23af::2309::fae7:1234]/Path?Query')
self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[0439:23af:2309::fae7:1234:2342:438e:192.0.2.146]/Path?Query')
self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@]v6a.ip[/Path')
+ self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://prefix.[v6a.ip]')
+ self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://[v6a.ip].suffix')
+ self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://prefix.[v6a.ip]/')
+ self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://[v6a.ip].suffix/')
+ self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://prefix.[v6a.ip]?')
+ self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://[v6a.ip].suffix?')
+ self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://prefix.[::1]')
+ self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://[::1].suffix')
+ self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://prefix.[::1]/')
+ self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://[::1].suffix/')
+ self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://prefix.[::1]?')
+ self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://[::1].suffix?')
+ self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://prefix.[::1]:a')
+ self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://[::1].suffix:a')
+ self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://prefix.[::1]:a1')
+ self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://[::1].suffix:a1')
+ self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://prefix.[::1]:1a')
+ self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://[::1].suffix:1a')
+ self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://prefix.[::1]:')
+ self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://[::1].suffix:/')
+ self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://prefix.[::1]:?')
+ self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://user@prefix.[v6a.ip]')
+ self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://user@[v6a.ip].suffix')
+ self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://[v6a.ip')
+ self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://v6a.ip]')
+ self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://]v6a.ip[')
+ self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://]v6a.ip')
+ self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://v6a.ip[')
+ self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://prefix.[v6a.ip')
+ self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://v6a.ip].suffix')
+ self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://prefix]v6a.ip[suffix')
+ self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://prefix]v6a.ip')
+ self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://v6a.ip[suffix')
def test_splitting_bracketed_hosts(self):
- p1 = urllib.parse.urlsplit('scheme://user@[v6a.ip]/path?query')
+ p1 = urllib.parse.urlsplit('scheme://user@[v6a.ip]:1234/path?query')
self.assertEqual(p1.hostname, 'v6a.ip')
self.assertEqual(p1.username, 'user')
self.assertEqual(p1.path, '/path')
+ self.assertEqual(p1.port, 1234)
p2 = urllib.parse.urlsplit('scheme://user@[0439:23af:2309::fae7%test]/path?query')
self.assertEqual(p2.hostname, '0439:23af:2309::fae7%test')
self.assertEqual(p2.username, 'user')
self.assertEqual(p2.path, '/path')
+ self.assertIs(p2.port, None)
p3 = urllib.parse.urlsplit('scheme://user@[0439:23af:2309::fae7:1234:192.0.2.146%test]/path?query')
self.assertEqual(p3.hostname, '0439:23af:2309::fae7:1234:192.0.2.146%test')
self.assertEqual(p3.username, 'user')
diff --git a/lib-python/3/urllib/parse.py b/lib-python/3/urllib/parse.py
index abf1d1b..724cce8 100644
--- a/lib-python/3/urllib/parse.py
+++ b/lib-python/3/urllib/parse.py
@@ -436,6 +436,23 @@ def _checknetloc(netloc):
raise ValueError("netloc '" + netloc + "' contains invalid " +
"characters under NFKC normalization")
+def _check_bracketed_netloc(netloc):
+ # Note that this function must mirror the splitting
+ # done in NetlocResultMixins._hostinfo().
+ hostname_and_port = netloc.rpartition('@')[2]
+ before_bracket, have_open_br, bracketed = hostname_and_port.partition('[')
+ if have_open_br:
+ # No data is allowed before a bracket.
+ if before_bracket:
+ raise ValueError("Invalid IPv6 URL")
+ hostname, _, port = bracketed.partition(']')
+ # No data is allowed after the bracket but before the port delimiter.
+ if port and not port.startswith(":"):
+ raise ValueError("Invalid IPv6 URL")
+ else:
+ hostname, _, port = hostname_and_port.partition(':')
+ _check_bracketed_host(hostname)
+
# Valid bracketed hosts are defined in
# https://www.rfc-editor.org/rfc/rfc3986#page-49 and https://url.spec.whatwg.org/
def _check_bracketed_host(hostname):
@@ -496,8 +513,7 @@ def urlsplit(url, scheme='', allow_fragments=True):
(']' in netloc and '[' not in netloc)):
raise ValueError("Invalid IPv6 URL")
if '[' in netloc and ']' in netloc:
- bracketed_host = netloc.partition('[')[2].partition(']')[0]
- _check_bracketed_host(bracketed_host)
+ _check_bracketed_netloc(netloc)
if allow_fragments and '#' in url:
url, fragment = url.split('#', 1)
if '?' in url:
|