From 2342693b31f740a422abf7267c53b4e7bc487c1b Mon Sep 17 00:00:00 2001
From: Tim Graham <timograham@gmail.com>
Date: Mon, 9 Mar 2015 20:05:13 -0400
Subject: [PATCH] [1.4.x] Made is_safe_url() reject URLs that start with
 control characters.

This is a security fix; disclosure to follow shortly.

[hertzog@debian.org: Backported to 1.2.3. Dropped the non-regression test
which was not available and updated the part applying to
django/utils/http.py]
---
 django/utils/http.py                |  9 ++++++++-
 docs/releases/1.4.20.txt            | 19 +++++++++++++++++++
 tests/regressiontests/utils/http.py |  4 +++-
 3 files changed, 30 insertions(+), 2 deletions(-)

--- a/django/utils/http.py
+++ b/django/utils/http.py
@@ -1,6 +1,7 @@
 import re
 import urllib
 import urlparse
+import unicodedata
 from email.Utils import formatdate
 
 from django.utils.encoding import smart_str, force_unicode
@@ -130,9 +131,10 @@ def is_safe_url(url, host=None):
 
     Always returns ``False`` on an empty url.
     """
+    if url is not None:
+        url = url.strip()
     if not url:
         return False
-    url = url.strip()
     # Chrome treats \ completely as /
     url = url.replace('\\', '/')
     # Chrome considers any URL with more than two slashes to be absolute, but
@@ -146,6 +148,11 @@ def is_safe_url(url, host=None):
     # allow this syntax.
     if not url_info[1] and url_info[0]:
         return False
+    # Forbid URLs that start with control characters. Some browsers (like
+    # Chrome) ignore quite a few control characters at the start of a
+    # URL and might consider the URL as scheme relative.
+    if unicodedata.category(unicode(url[0]))[0] == 'C':
+        return False
     return (not url_info[1] or url_info[1] == host) and \
         (not url_info[0] or url_info[0] in ['http', 'https'])