1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167
|
import gc
import time
from xml.dom import minidom
from django.core import serializers
from django.core.serializers.xml_serializer import Deserializer, DTDForbidden
from django.db import models
from django.test import TestCase, TransactionTestCase
from .tests import SerializersTestBase, SerializersTransactionTestBase
class XmlSerializerTestCase(SerializersTestBase, TestCase):
serializer_name = "xml"
pkless_str = """<?xml version="1.0" encoding="utf-8"?>
<django-objects version="1.0">
<object model="serializers.category">
<field type="CharField" name="name">Reference</field>
</object>
<object model="serializers.category">
<field type="CharField" name="name">Non-fiction</field>
</object>
</django-objects>"""
mapping_ordering_str = """<?xml version="1.0" encoding="utf-8"?>
<django-objects version="1.0">
<object model="serializers.article" pk="%(article_pk)s">
<field name="author" rel="ManyToOneRel" to="serializers.author">%(author_pk)s</field>
<field name="headline" type="CharField">Poker has no place on ESPN</field>
<field name="pub_date" type="DateTimeField">2006-06-16T11:00:00</field>
<field name="categories" rel="ManyToManyRel" to="serializers.category"><object pk="%(first_category_pk)s"></object><object pk="%(second_category_pk)s"></object></field>
<field name="meta_data" rel="ManyToManyRel" to="serializers.categorymetadata"></field>
<field name="topics" rel="ManyToManyRel" to="serializers.topic"></field>
</object>
</django-objects>""" # NOQA
@staticmethod
def _validate_output(serial_str):
try:
minidom.parseString(serial_str)
except Exception:
return False
else:
return True
@staticmethod
def _get_pk_values(serial_str):
ret_list = []
dom = minidom.parseString(serial_str)
fields = dom.getElementsByTagName("object")
for field in fields:
ret_list.append(field.getAttribute("pk"))
return ret_list
@staticmethod
def _get_field_values(serial_str, field_name):
ret_list = []
dom = minidom.parseString(serial_str)
fields = dom.getElementsByTagName("field")
for field in fields:
if field.getAttribute("name") == field_name:
temp = []
for child in field.childNodes:
temp.append(child.nodeValue)
ret_list.append("".join(temp))
return ret_list
def test_control_char_failure(self):
"""
Serializing control characters with XML should fail as those characters
are not supported in the XML 1.0 standard (except HT, LF, CR).
"""
self.a1.headline = "This contains \u0001 control \u0011 chars"
msg = "Article.headline (pk:%s) contains unserializable characters" % self.a1.pk
with self.assertRaisesMessage(ValueError, msg):
serializers.serialize(self.serializer_name, [self.a1])
self.a1.headline = "HT \u0009, LF \u000A, and CR \u000D are allowed"
self.assertIn(
"HT \t, LF \n, and CR \r are allowed",
serializers.serialize(self.serializer_name, [self.a1]),
)
def test_no_dtd(self):
"""
The XML deserializer shouldn't allow a DTD.
This is the most straightforward way to prevent all entity definitions
and avoid both external entities and entity-expansion attacks.
"""
xml = (
'<?xml version="1.0" standalone="no"?>'
'<!DOCTYPE example SYSTEM "http://example.com/example.dtd">'
)
with self.assertRaises(DTDForbidden):
next(serializers.deserialize("xml", xml))
def test_crafted_xml_performance(self):
"""The time to process invalid inputs is not quadratic."""
def build_crafted_xml(depth, leaf_text_len):
nested_open = "<nested>" * depth
nested_close = "</nested>" * depth
leaf = "x" * leaf_text_len
field_content = f"{nested_open}{leaf}{nested_close}"
return f"""
<django-objects version="1.0">
<object model="contenttypes.contenttype" pk="1">
<field name="app_label">{field_content}</field>
<field name="model">m</field>
</object>
</django-objects>
"""
def deserialize(crafted_xml):
iterator = Deserializer(crafted_xml)
gc.collect()
start_time = time.perf_counter()
result = list(iterator)
end_time = time.perf_counter()
self.assertEqual(len(result), 1)
self.assertIsInstance(result[0].object, models.Model)
return end_time - start_time
def assertFactor(label, params, factor=2):
factors = []
prev_time = None
for depth, length in params:
crafted_xml = build_crafted_xml(depth, length)
elapsed = deserialize(crafted_xml)
if prev_time is not None:
factors.append(elapsed / prev_time)
prev_time = elapsed
with self.subTest(label):
# Assert based on the average factor to reduce test flakiness.
self.assertLessEqual(sum(factors) / len(factors), factor)
assertFactor(
"varying depth, varying length",
[(50, 2000), (100, 4000), (200, 8000), (400, 16000), (800, 32000)],
2,
)
assertFactor("constant depth, varying length", [(100, 1), (100, 1000)], 2)
class XmlSerializerTransactionTestCase(
SerializersTransactionTestBase, TransactionTestCase
):
serializer_name = "xml"
fwd_ref_str = """<?xml version="1.0" encoding="utf-8"?>
<django-objects version="1.0">
<object pk="1" model="serializers.article">
<field to="serializers.author" name="author" rel="ManyToOneRel">1</field>
<field type="CharField" name="headline">Forward references pose no problem</field>
<field type="DateTimeField" name="pub_date">2006-06-16T15:00:00</field>
<field to="serializers.category" name="categories" rel="ManyToManyRel">
<object pk="1"></object>
</field>
<field to="serializers.categorymetadata" name="meta_data" rel="ManyToManyRel"></field>
</object>
<object pk="1" model="serializers.author">
<field type="CharField" name="name">Agnes</field>
</object>
<object pk="1" model="serializers.category">
<field type="CharField" name="name">Reference</field></object>
</django-objects>""" # NOQA
|