Author: Benjamin Richter <richter.benjamin@gmail.com>
Date:   Sun Jan 25 23:22:46 2015 +0100
Subject: [1.4.x] Fixed #24158 -- Allowed GZipMiddleware to work with streaming responses
    
    Backport of django.utils.text.compress_sequence and fix for
    django.middleware.gzip.GZipMiddleware when using iterators as
    response.content.

Patch further backported to Django 1.2 by Raphaël Hertzog
<hertzog@debian.org>.

Origin: backport, https://github.com/django/django/commit/1e39d0f6280abf34c7719db5e7ed1c333f5e5919
Bug: https://code.djangoproject.com/ticket/24158

--- a/django/middleware/gzip.py
+++ b/django/middleware/gzip.py
@@ -1,6 +1,6 @@
 import re
 
-from django.utils.text import compress_string
+from django.utils.text import compress_string, compress_sequence
 from django.utils.cache import patch_vary_headers
 
 re_accepts_gzip = re.compile(r'\bgzip\b')
@@ -12,8 +12,10 @@ class GZipMiddleware(object):
     on the Accept-Encoding header.
     """
     def process_response(self, request, response):
+        # The response object can tell us whether content is a string or an iterable
         # It's not worth compressing non-OK or really short responses.
-        if response.status_code != 200 or len(response.content) < 200:
+        if response.status_code != 200 or (response._is_string and
+                                           len(response.content) < 200):
             return response
 
         patch_vary_headers(response, ('Accept-Encoding',))
@@ -32,7 +34,17 @@ class GZipMiddleware(object):
         if not re_accepts_gzip.search(ae):
             return response
 
-        response.content = compress_string(response.content)
+        if response._is_string:
+            compressed_content = compress_string(response.content)
+            response.content = compressed_content
+            response['Content-Length'] = str(len(response.content))
+        else:
+            # If the response content is iterable we don't know the length,
+            # so delete the header.
+            del response['Content-Length']
+            # Wrap the response content in a streaming gzip iterator (direct
+            # access to inner response._container)
+            response.content = compress_sequence(response._container)
+
         response['Content-Encoding'] = 'gzip'
-        response['Content-Length'] = str(len(response.content))
         return response
--- a/django/utils/text.py
+++ b/django/utils/text.py
@@ -186,6 +186,40 @@ def compress_string(s):
 
 ustring_re = re.compile(u"([\u0080-\uffff])")
 
+# Backported from django 1.5
+class StreamingBuffer(object):
+    def __init__(self):
+        self.vals = []
+
+    def write(self, val):
+        self.vals.append(val)
+
+    def read(self):
+        ret = ''.join(self.vals)
+        self.vals = []
+        return ret
+
+    def flush(self):
+        return
+
+    def close(self):
+        return
+
+# Backported from django 1.5
+# Like compress_string, but for iterators of strings.
+def compress_sequence(sequence):
+    import gzip
+    buf = StreamingBuffer()
+    zfile = gzip.GzipFile(mode='wb', compresslevel=6, fileobj=buf)
+    # Output headers...
+    yield buf.read()
+    for item in sequence:
+        zfile.write(item)
+        zfile.flush()
+        yield buf.read()
+    zfile.close()
+    yield buf.read()
+
 def javascript_quote(s, quote_double_quotes=False):
 
     def fix(match):
--- a/django/http/__init__.py
+++ b/django/http/__init__.py
@@ -326,12 +326,7 @@ class HttpResponse(object):
         if not content_type:
             content_type = "%s; charset=%s" % (settings.DEFAULT_CONTENT_TYPE,
                     self._charset)
-        if not isinstance(content, basestring) and hasattr(content, '__iter__'):
-            self._container = content
-            self._is_string = False
-        else:
-            self._container = [content]
-            self._is_string = True
+        self.content = content
         self.cookies = CompatCookie()
         if status:
             self.status_code = status
@@ -408,8 +403,12 @@ class HttpResponse(object):
         return smart_str(''.join(self._container), self._charset)
 
     def _set_content(self, value):
-        self._container = [value]
-        self._is_string = True
+        if not isinstance(value, basestring) and hasattr(value, '__iter__'):
+            self._container = value
+            self._is_string = False
+        else:
+            self._container = [value]
+            self._is_string = True
 
     content = property(_get_content, _set_content)
 
--- a/tests/regressiontests/middleware/tests.py
+++ b/tests/regressiontests/middleware/tests.py
@@ -1,8 +1,13 @@
 # -*- coding: utf-8 -*-
 
+import gzip
+import random
+import StringIO
+
 from django.test import TestCase
-from django.http import HttpRequest
+from django.http import HttpRequest, HttpResponse
 from django.middleware.common import CommonMiddleware
+from django.middleware.gzip import GZipMiddleware
 from django.conf import settings
 
 class CommonMiddlewareTest(TestCase):
@@ -125,7 +130,7 @@ class CommonMiddlewareTest(TestCase):
         self.assertEquals(r.status_code, 301)
         self.assertEquals(r['Location'],
                           'http://www.testserver/middleware/slash/')
-   
+
 
     # The following tests examine expected behavior given a custom urlconf that
     # overrides the default one through the request object.
@@ -165,7 +170,7 @@ class CommonMiddlewareTest(TestCase):
       request = self._get_request('customurlconf/slash')
       request.urlconf = 'regressiontests.middleware.extra_urls'
       r = CommonMiddleware().process_request(request)
-      self.failIf(r is None, 
+      self.failIf(r is None,
           "CommonMiddlware failed to return APPEND_SLASH redirect using request.urlconf")
       self.assertEquals(r.status_code, 301)
       self.assertEquals(r['Location'], 'http://testserver/middleware/customurlconf/slash/')
@@ -209,7 +214,7 @@ class CommonMiddlewareTest(TestCase):
       request = self._get_request('customurlconf/needsquoting#')
       request.urlconf = 'regressiontests.middleware.extra_urls'
       r = CommonMiddleware().process_request(request)
-      self.failIf(r is None, 
+      self.failIf(r is None,
           "CommonMiddlware failed to return APPEND_SLASH redirect using request.urlconf")
       self.assertEquals(r.status_code, 301)
       self.assertEquals(
@@ -246,3 +251,80 @@ class CommonMiddlewareTest(TestCase):
       self.assertEquals(r.status_code, 301)
       self.assertEquals(r['Location'],
                         'http://www.testserver/middleware/customurlconf/slash/')
+
+
+class GZipMiddlewareTest(TestCase):
+    """
+    Tests the GZip middleware.
+    """
+    short_string = "This string is too short to be worth compressing."
+    compressible_string = 'a' * 500
+    uncompressible_string = ''.join(chr(random.randint(0, 255)) for _ in xrange(500))
+    iterator_as_content = iter(compressible_string)
+
+    def setUp(self):
+        self.req = HttpRequest()
+        self.req.META = {
+            'SERVER_NAME': 'testserver',
+            'SERVER_PORT': 80,
+        }
+        self.req.path = self.req.path_info = "/"
+        self.req.META['HTTP_ACCEPT_ENCODING'] = 'gzip, deflate'
+        self.req.META['HTTP_USER_AGENT'] = 'Mozilla/5.0 (Windows NT 5.1; rv:9.0.1) Gecko/20100101 Firefox/9.0.1'
+        self.resp = HttpResponse()
+        self.resp.status_code = 200
+        self.resp.content = self.compressible_string
+        self.resp['Content-Type'] = 'text/html; charset=UTF-8'
+
+    @staticmethod
+    def decompress(gzipped_string):
+        return gzip.GzipFile(mode='rb', fileobj=StringIO.StringIO(gzipped_string)).read()
+
+    def test_compress_response(self):
+        """
+        Tests that compression is performed on responses with compressible content.
+        """
+        r = GZipMiddleware().process_response(self.req, self.resp)
+        self.assertEqual(self.decompress(r.content), self.compressible_string)
+        self.assertEqual(r['Content-Encoding'], 'gzip')
+        self.assertEqual(r['Content-Length'], str(len(r.content)))
+
+    def test_no_compress_short_response(self):
+        """
+        Tests that compression isn't performed on responses with short content.
+        """
+        self.resp.content = self.short_string
+        r = GZipMiddleware().process_response(self.req, self.resp)
+        self.assertEqual(r.content, self.short_string)
+        self.assertFalse(r.has_header('Content-Encoding'))
+
+    def test_no_compress_compressed_response(self):
+        """
+        Tests that compression isn't performed on responses that are already compressed.
+        """
+        self.resp['Content-Encoding'] = 'deflate'
+        r = GZipMiddleware().process_response(self.req, self.resp)
+        self.assertEqual(r.content, self.compressible_string)
+        self.assertEqual(r['Content-Encoding'], 'deflate')
+
+    def test_no_compress_ie_js_requests(self):
+        """
+        Tests that compression isn't performed on JavaScript requests from Internet Explorer.
+        """
+        self.req.META['HTTP_USER_AGENT'] = 'Mozilla/4.0 (compatible; MSIE 5.00; Windows 98)'
+        self.resp['Content-Type'] = 'application/javascript; charset=UTF-8'
+        r = GZipMiddleware().process_response(self.req, self.resp)
+        self.assertEqual(r.content, self.compressible_string)
+        self.assertFalse(r.has_header('Content-Encoding'))
+
+    def test_streaming_compression(self):
+        """
+        Tests that iterators as response content return a compressed stream without consuming
+        the whole response.content while doing so.
+        See #24158.
+        """
+        self.resp.content = self.iterator_as_content
+        r = GZipMiddleware().process_response(self.req, self.resp)
+        self.assertEqual(self.decompress(''.join(r.content)), self.compressible_string)
+        self.assertEqual(r['Content-Encoding'], 'gzip')
+        self.assertFalse(r.has_header('Content-Length'))
