File: CVE-2015-0221-regression-fix.diff

package info (click to toggle)
python-django 1.4.5-1%2Bdeb7u16
links: PTS, VCS
area: main
in suites: wheezy
size: 44,168 kB
sloc: python: 140,205; xml: 659; makefile: 160; sh: 145; sql: 7
file content (138 lines) | stat: -rw-r--r-- 5,254 bytes
commit 1e39d0f6280abf34c7719db5e7ed1c333f5e5919
Author: Benjamin Richter <richter.benjamin@gmail.com>
Date:   Sun Jan 25 23:22:46 2015 +0100

    [1.4.x] Fixed #24158 -- Allowed GZipMiddleware to work with streaming responses
    
    Backport of django.utils.text.compress_sequence and fix for
    django.middleware.gzip.GZipMiddleware when using iterators as
    response.content.

diff --git a/django/middleware/gzip.py b/django/middleware/gzip.py
index 69f938c..eb4d8bf 100644
--- a/django/middleware/gzip.py
+++ b/django/middleware/gzip.py
@@ -1,6 +1,6 @@
 import re
 
-from django.utils.text import compress_string
+from django.utils.text import compress_string, compress_sequence
 from django.utils.cache import patch_vary_headers
 
 re_accepts_gzip = re.compile(r'\bgzip\b')
@@ -12,8 +12,9 @@ class GZipMiddleware(object):
     on the Accept-Encoding header.
     """
     def process_response(self, request, response):
+        # The response object can tell us whether content is a string or an iterable
         # It's not worth attempting to compress really short responses.
-        if len(response.content) < 200:
+        if not response._base_content_is_iter and len(response.content) < 200:
             return response
 
         patch_vary_headers(response, ('Accept-Encoding',))
@@ -32,15 +33,23 @@ class GZipMiddleware(object):
         if not re_accepts_gzip.search(ae):
             return response
 
-        # Return the compressed content only if it's actually shorter.
-        compressed_content = compress_string(response.content)
-        if len(compressed_content) >= len(response.content):
-            return response
+        # The response object can tell us whether content is a string or an iterable
+        if response._base_content_is_iter:
+            # If the response content is iterable we don't know the length, so delete the header.
+            del response['Content-Length']
+            # Wrap the response content in a streaming gzip iterator (direct access to inner response._container)
+            response.content = compress_sequence(response._container)
+        else:
+            # Return the compressed content only if it's actually shorter.
+            compressed_content = compress_string(response.content)
+            if len(compressed_content) >= len(response.content):
+                return response
+            response.content = compressed_content
+            response['Content-Length'] = str(len(response.content))
 
         if response.has_header('ETag'):
             response['ETag'] = re.sub('"$', ';gzip"', response['ETag'])
 
-        response.content = compressed_content
         response['Content-Encoding'] = 'gzip'
-        response['Content-Length'] = str(len(response.content))
+
         return response
diff --git a/django/utils/text.py b/django/utils/text.py
index eaafb96..8e43dc9 100644
--- a/django/utils/text.py
+++ b/django/utils/text.py
@@ -286,6 +286,39 @@ def compress_string(s):
 
 ustring_re = re.compile(u"([\u0080-\uffff])")
 
+# Backported from django 1.5
+class StreamingBuffer(object):
+    def __init__(self):
+        self.vals = []
+
+    def write(self, val):
+        self.vals.append(val)
+
+    def read(self):
+        ret = ''.join(self.vals)
+        self.vals = []
+        return ret
+
+    def flush(self):
+        return
+
+    def close(self):
+        return
+
+# Backported from django 1.5
+# Like compress_string, but for iterators of strings.
+def compress_sequence(sequence):
+    buf = StreamingBuffer()
+    zfile = GzipFile(mode='wb', compresslevel=6, fileobj=buf)
+    # Output headers...
+    yield buf.read()
+    for item in sequence:
+        zfile.write(item)
+        zfile.flush()
+        yield buf.read()
+    zfile.close()
+    yield buf.read()
+
 def javascript_quote(s, quote_double_quotes=False):
 
     def fix(match):
diff --git a/tests/regressiontests/middleware/tests.py b/tests/regressiontests/middleware/tests.py
index 138ee50..87b19fb 100644
--- a/tests/regressiontests/middleware/tests.py
+++ b/tests/regressiontests/middleware/tests.py
@@ -514,6 +514,7 @@ class GZipMiddlewareTest(TestCase):
     short_string = "This string is too short to be worth compressing."
     compressible_string = 'a' * 500
     uncompressible_string = ''.join(chr(random.randint(0, 255)) for _ in xrange(500))
+    iterator_as_content = iter(compressible_string)
 
     def setUp(self):
         self.req = HttpRequest()
@@ -589,6 +590,18 @@ class GZipMiddlewareTest(TestCase):
         self.assertEqual(r.content, self.uncompressible_string)
         self.assertEqual(r.get('Content-Encoding'), None)
 
+    def test_streaming_compression(self):
+        """
+        Tests that iterators as response content return a compressed stream without consuming
+        the whole response.content while doing so.
+        See #24158.
+        """
+        self.resp.content = self.iterator_as_content
+        r = GZipMiddleware().process_response(self.req, self.resp)
+        self.assertEqual(self.decompress(''.join(r.content)), self.compressible_string)
+        self.assertEqual(r.get('Content-Encoding'), 'gzip')
+        self.assertEqual(r.get('Content-Length'), None)
+
 
 class ETagGZipMiddlewareTest(TestCase):
     """