1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123
|
From ccf5f1bc678638ee3a889207c3b96de040933962 Mon Sep 17 00:00:00 2001
From: Tim Graham <timograham@gmail.com>
Date: Tue, 14 Mar 2017 10:46:53 -0400
Subject: Fixed #27912, CVE-2017-7233 -- Fixed is_safe_url() with numeric URLs.
This is a security fix.
---
django/utils/http.py | 66 +++++++++++++++++++++++++++++++++++++++++-
tests/utils_tests/test_http.py | 5 +++-
2 files changed, 69 insertions(+), 2 deletions(-)
diff --git a/django/utils/http.py b/django/utils/http.py
index 972760e..7a931a9 100644
--- a/django/utils/http.py
+++ b/django/utils/http.py
@@ -17,6 +17,18 @@ from django.utils.six.moves.urllib.parse import (
quote, quote_plus, unquote, unquote_plus, urlparse,
urlencode as original_urlencode)
+if six.PY2:
+ from urlparse import (
+ ParseResult, SplitResult, _splitnetloc, _splitparams, scheme_chars,
+ uses_params,
+ )
+ _coerce_args = None
+else:
+ from urllib.parse import (
+ ParseResult, SplitResult, _coerce_args, _splitnetloc, _splitparams,
+ scheme_chars, uses_params,
+ )
+
ETAG_MATCH = re.compile(r'(?:W/)?"((?:\\.|[^"])*)"')
MONTHS = 'jan feb mar apr may jun jul aug sep oct nov dec'.split()
@@ -284,12 +296,64 @@ def is_safe_url(url, host=None):
return _is_safe_url(url, host) and _is_safe_url(url.replace('\\', '/'), host)
+# Copied from urllib.parse.urlparse() but uses fixed urlsplit() function.
+def _urlparse(url, scheme='', allow_fragments=True):
+ """Parse a URL into 6 components:
+ <scheme>://<netloc>/<path>;<params>?<query>#<fragment>
+ Return a 6-tuple: (scheme, netloc, path, params, query, fragment).
+ Note that we don't break the components up in smaller bits
+ (e.g. netloc is a single string) and we don't expand % escapes."""
+ if _coerce_args:
+ url, scheme, _coerce_result = _coerce_args(url, scheme)
+ splitresult = _urlsplit(url, scheme, allow_fragments)
+ scheme, netloc, url, query, fragment = splitresult
+ if scheme in uses_params and ';' in url:
+ url, params = _splitparams(url)
+ else:
+ params = ''
+ result = ParseResult(scheme, netloc, url, params, query, fragment)
+ return _coerce_result(result) if _coerce_args else result
+
+
+# Copied from urllib.parse.urlsplit() with
+# https://github.com/python/cpython/pull/661 applied.
+def _urlsplit(url, scheme='', allow_fragments=True):
+ """Parse a URL into 5 components:
+ <scheme>://<netloc>/<path>?<query>#<fragment>
+ Return a 5-tuple: (scheme, netloc, path, query, fragment).
+ Note that we don't break the components up in smaller bits
+ (e.g. netloc is a single string) and we don't expand % escapes."""
+ if _coerce_args:
+ url, scheme, _coerce_result = _coerce_args(url, scheme)
+ allow_fragments = bool(allow_fragments)
+ netloc = query = fragment = ''
+ i = url.find(':')
+ if i > 0:
+ for c in url[:i]:
+ if c not in scheme_chars:
+ break
+ else:
+ scheme, url = url[:i].lower(), url[i + 1:]
+
+ if url[:2] == '//':
+ netloc, url = _splitnetloc(url, 2)
+ if (('[' in netloc and ']' not in netloc) or
+ (']' in netloc and '[' not in netloc)):
+ raise ValueError("Invalid IPv6 URL")
+ if allow_fragments and '#' in url:
+ url, fragment = url.split('#', 1)
+ if '?' in url:
+ url, query = url.split('?', 1)
+ v = SplitResult(scheme, netloc, url, query, fragment)
+ return _coerce_result(v) if _coerce_args else v
+
+
def _is_safe_url(url, host):
# Chrome considers any URL with more than two slashes to be absolute, but
# urlparse is not so flexible. Treat any url with three slashes as unsafe.
if url.startswith('///'):
return False
- url_info = urlparse(url)
+ url_info = _urlparse(url)
# Forbid URLs like http:///example.com - with a scheme, but without a hostname.
# In that URL, example.com is not the hostname but, a path component. However,
# Chrome will still consider example.com to be the hostname, so we must not
diff --git a/tests/utils_tests/test_http.py b/tests/utils_tests/test_http.py
index 769f163..fc8d902 100644
--- a/tests/utils_tests/test_http.py
+++ b/tests/utils_tests/test_http.py
@@ -117,6 +117,8 @@ class TestUtilsHttp(unittest.TestCase):
r'http://testserver\me:pass@example.com',
r'http://testserver\@example.com',
r'http:\\testserver\confirm\me@example.com',
+ 'http:999999999',
+ 'ftp:9999999999',
'\n'):
self.assertFalse(http.is_safe_url(bad_url, host='testserver'), "%s should be blocked" % bad_url)
for good_url in ('/view/?param=http://example.com',
@@ -127,7 +129,8 @@ class TestUtilsHttp(unittest.TestCase):
'HTTPS://testserver/',
'//testserver/',
'http://testserver/confirm?email=me@example.com',
- '/url%20with%20spaces/'):
+ '/url%20with%20spaces/',
+ 'path/http:2222222222'):
self.assertTrue(http.is_safe_url(good_url, host='testserver'), "%s should be allowed" % good_url)
if six.PY2:
|