File: 23_restrict_xml_deserializer.diff

package info (click to toggle)
python-django 1.2.3-3%2Bsqueeze15
  • links: PTS, VCS
  • area: main
  • in suites: squeeze-lts
  • size: 29,720 kB
  • ctags: 21,538
  • sloc: python: 101,631; xml: 574; makefile: 149; sh: 121; sql: 7
file content (150 lines) | stat: -rw-r--r-- 5,685 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
Author: Carl Meyer <carl@oddbird.net>
Date:   Mon Feb 11 21:54:53 2013 -0700
Originm: backport, commit:d19a27066b2247102e65412aa66917aff0091112
Description: Restrict the XML deserializer to prevent network and entity-expansion DoS attacks.
Bug-Debian: http://bugs.debian.org/701186
    
--- a/django/core/serializers/xml_serializer.py
+++ b/django/core/serializers/xml_serializer.py
@@ -8,6 +8,8 @@ from django.db import models, DEFAULT_DB
 from django.utils.xmlutils import SimplerXMLGenerator
 from django.utils.encoding import smart_unicode
 from xml.dom import pulldom
+from xml.sax import handler
+from xml.sax.expatreader import ExpatParser as _ExpatParser
 
 class Serializer(base.Serializer):
     """
@@ -148,9 +150,13 @@ class Deserializer(base.Deserializer):
 
     def __init__(self, stream_or_string, **options):
         super(Deserializer, self).__init__(stream_or_string, **options)
-        self.event_stream = pulldom.parse(self.stream)
+        self.event_stream = pulldom.parse(self.stream, self._make_parser())
         self.db = options.pop('using', DEFAULT_DB_ALIAS)
 
+    def _make_parser(self):
+        """Create a hardened XML parser (no custom/external entities)."""
+        return DefusedExpatParser()
+
     def next(self):
         for event, node in self.event_stream:
             if event == "START_ELEMENT" and node.nodeName == "object":
@@ -288,3 +294,89 @@ def getInnerText(node):
         else:
            pass
     return u"".join(inner_text)
+
+
+# Below code based on Christian Heimes' defusedxml
+
+
+class DefusedExpatParser(_ExpatParser):
+    """
+    An expat parser hardened against XML bomb attacks.
+
+    Forbids DTDs, external entity references
+
+    """
+    def __init__(self, *args, **kwargs):
+        _ExpatParser.__init__(self, *args, **kwargs)
+        self.setFeature(handler.feature_external_ges, False)
+        self.setFeature(handler.feature_external_pes, False)
+
+    def start_doctype_decl(self, name, sysid, pubid, has_internal_subset):
+        raise DTDForbidden(name, sysid, pubid)
+
+    def entity_decl(self, name, is_parameter_entity, value, base,
+                    sysid, pubid, notation_name):
+        raise EntitiesForbidden(name, value, base, sysid, pubid, notation_name)
+
+    def unparsed_entity_decl(self, name, base, sysid, pubid, notation_name):
+        # expat 1.2
+        raise EntitiesForbidden(name, None, base, sysid, pubid, notation_name)
+
+    def external_entity_ref_handler(self, context, base, sysid, pubid):
+        raise ExternalReferenceForbidden(context, base, sysid, pubid)
+
+    def reset(self):
+        _ExpatParser.reset(self)
+        parser = self._parser
+        parser.StartDoctypeDeclHandler = self.start_doctype_decl
+        parser.EntityDeclHandler = self.entity_decl
+        parser.UnparsedEntityDeclHandler = self.unparsed_entity_decl
+        parser.ExternalEntityRefHandler = self.external_entity_ref_handler
+
+
+class DefusedXmlException(ValueError):
+    """Base exception."""
+    def __repr__(self):
+        return str(self)
+
+
+class DTDForbidden(DefusedXmlException):
+    """Document type definition is forbidden."""
+    def __init__(self, name, sysid, pubid):
+        self.name = name
+        self.sysid = sysid
+        self.pubid = pubid
+
+    def __str__(self):
+        tpl = "DTDForbidden(name='{}', system_id={!r}, public_id={!r})"
+        return tpl.format(self.name, self.sysid, self.pubid)
+
+
+class EntitiesForbidden(DefusedXmlException):
+    """Entity definition is forbidden."""
+    def __init__(self, name, value, base, sysid, pubid, notation_name):
+        super(EntitiesForbidden, self).__init__()
+        self.name = name
+        self.value = value
+        self.base = base
+        self.sysid = sysid
+        self.pubid = pubid
+        self.notation_name = notation_name
+
+    def __str__(self):
+        tpl = "EntitiesForbidden(name='{}', system_id={!r}, public_id={!r})"
+        return tpl.format(self.name, self.sysid, self.pubid)
+
+
+class ExternalReferenceForbidden(DefusedXmlException):
+    """Resolving an external reference is forbidden."""
+    def __init__(self, context, base, sysid, pubid):
+        super(ExternalReferenceForbidden, self).__init__()
+        self.context = context
+        self.base = base
+        self.sysid = sysid
+        self.pubid = pubid
+
+    def __str__(self):
+        tpl = "ExternalReferenceForbidden(system_id='{}', public_id={})"
+        return tpl.format(self.sysid, self.pubid)
--- a/tests/regressiontests/serializers_regress/tests.py
+++ b/tests/regressiontests/serializers_regress/tests.py
@@ -14,6 +14,7 @@ try:
     from cStringIO import StringIO
 except ImportError:
     from StringIO import StringIO
+from django.core.serializers.xml_serializer import DTDForbidden
 
 from django.conf import settings
 from django.core import serializers, management
@@ -419,3 +420,17 @@ for format in serializers.get_serializer
     setattr(SerializerTests, 'test_' + format + '_serializer_fields', curry(fieldsTest, format))
     if format != 'python':
         setattr(SerializerTests, 'test_' + format + '_serializer_stream', curry(streamTest, format))
+
+
+class XmlDeserializerSecurityTests(TestCase):
+
+    def test_no_dtd(self):
+        """
+        The XML deserializer shouldn't allow a DTD.
+
+        This is the most straightforward way to prevent all entity definitions
+        and avoid both external entities and entity-expansion attacks.
+
+        """
+        xml = '<?xml version="1.0" standalone="no"?><!DOCTYPE example SYSTEM "http://example.com/example.dtd">'
+        self.assertRaises(DTDForbidden, serializers.deserialize('xml', xml).next)