File: CVE-2015-0221-regression-fix.diff

package info (click to toggle)
python-django 1.4.5-1%2Bdeb7u16
  • links: PTS, VCS
  • area: main
  • in suites: wheezy
  • size: 44,168 kB
  • sloc: python: 140,205; xml: 659; makefile: 160; sh: 145; sql: 7
file content (138 lines) | stat: -rw-r--r-- 5,254 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
commit 1e39d0f6280abf34c7719db5e7ed1c333f5e5919
Author: Benjamin Richter <richter.benjamin@gmail.com>
Date:   Sun Jan 25 23:22:46 2015 +0100

    [1.4.x] Fixed #24158 -- Allowed GZipMiddleware to work with streaming responses
    
    Backport of django.utils.text.compress_sequence and fix for
    django.middleware.gzip.GZipMiddleware when using iterators as
    response.content.

diff --git a/django/middleware/gzip.py b/django/middleware/gzip.py
index 69f938c..eb4d8bf 100644
--- a/django/middleware/gzip.py
+++ b/django/middleware/gzip.py
@@ -1,6 +1,6 @@
 import re
 
-from django.utils.text import compress_string
+from django.utils.text import compress_string, compress_sequence
 from django.utils.cache import patch_vary_headers
 
 re_accepts_gzip = re.compile(r'\bgzip\b')
@@ -12,8 +12,9 @@ class GZipMiddleware(object):
     on the Accept-Encoding header.
     """
     def process_response(self, request, response):
+        # The response object can tell us whether content is a string or an iterable
         # It's not worth attempting to compress really short responses.
-        if len(response.content) < 200:
+        if not response._base_content_is_iter and len(response.content) < 200:
             return response
 
         patch_vary_headers(response, ('Accept-Encoding',))
@@ -32,15 +33,23 @@ class GZipMiddleware(object):
         if not re_accepts_gzip.search(ae):
             return response
 
-        # Return the compressed content only if it's actually shorter.
-        compressed_content = compress_string(response.content)
-        if len(compressed_content) >= len(response.content):
-            return response
+        # The response object can tell us whether content is a string or an iterable
+        if response._base_content_is_iter:
+            # If the response content is iterable we don't know the length, so delete the header.
+            del response['Content-Length']
+            # Wrap the response content in a streaming gzip iterator (direct access to inner response._container)
+            response.content = compress_sequence(response._container)
+        else:
+            # Return the compressed content only if it's actually shorter.
+            compressed_content = compress_string(response.content)
+            if len(compressed_content) >= len(response.content):
+                return response
+            response.content = compressed_content
+            response['Content-Length'] = str(len(response.content))
 
         if response.has_header('ETag'):
             response['ETag'] = re.sub('"$', ';gzip"', response['ETag'])
 
-        response.content = compressed_content
         response['Content-Encoding'] = 'gzip'
-        response['Content-Length'] = str(len(response.content))
+
         return response
diff --git a/django/utils/text.py b/django/utils/text.py
index eaafb96..8e43dc9 100644
--- a/django/utils/text.py
+++ b/django/utils/text.py
@@ -286,6 +286,39 @@ def compress_string(s):
 
 ustring_re = re.compile(u"([\u0080-\uffff])")
 
+# Backported from django 1.5
+class StreamingBuffer(object):
+    def __init__(self):
+        self.vals = []
+
+    def write(self, val):
+        self.vals.append(val)
+
+    def read(self):
+        ret = ''.join(self.vals)
+        self.vals = []
+        return ret
+
+    def flush(self):
+        return
+
+    def close(self):
+        return
+
+# Backported from django 1.5
+# Like compress_string, but for iterators of strings.
+def compress_sequence(sequence):
+    buf = StreamingBuffer()
+    zfile = GzipFile(mode='wb', compresslevel=6, fileobj=buf)
+    # Output headers...
+    yield buf.read()
+    for item in sequence:
+        zfile.write(item)
+        zfile.flush()
+        yield buf.read()
+    zfile.close()
+    yield buf.read()
+
 def javascript_quote(s, quote_double_quotes=False):
 
     def fix(match):
diff --git a/tests/regressiontests/middleware/tests.py b/tests/regressiontests/middleware/tests.py
index 138ee50..87b19fb 100644
--- a/tests/regressiontests/middleware/tests.py
+++ b/tests/regressiontests/middleware/tests.py
@@ -514,6 +514,7 @@ class GZipMiddlewareTest(TestCase):
     short_string = "This string is too short to be worth compressing."
     compressible_string = 'a' * 500
     uncompressible_string = ''.join(chr(random.randint(0, 255)) for _ in xrange(500))
+    iterator_as_content = iter(compressible_string)
 
     def setUp(self):
         self.req = HttpRequest()
@@ -589,6 +590,18 @@ class GZipMiddlewareTest(TestCase):
         self.assertEqual(r.content, self.uncompressible_string)
         self.assertEqual(r.get('Content-Encoding'), None)
 
+    def test_streaming_compression(self):
+        """
+        Tests that iterators as response content return a compressed stream without consuming
+        the whole response.content while doing so.
+        See #24158.
+        """
+        self.resp.content = self.iterator_as_content
+        r = GZipMiddleware().process_response(self.req, self.resp)
+        self.assertEqual(self.decompress(''.join(r.content)), self.compressible_string)
+        self.assertEqual(r.get('Content-Encoding'), 'gzip')
+        self.assertEqual(r.get('Content-Length'), None)
+
 
 class ETagGZipMiddlewareTest(TestCase):
     """