1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183
|
"""Tests for the HTMLSanitize preprocessor"""
from nbformat import v4 as nbformat
from nbconvert.preprocessors.sanitize import SanitizeHTML
from .base import PreprocessorTestsBase
class TestSanitizer(PreprocessorTestsBase):
"""Contains test functions for sanitize.py"""
maxDiff = None
def build_preprocessor(self):
"""Make an instance of a preprocessor"""
preprocessor = SanitizeHTML()
preprocessor.enabled = True
return preprocessor
def preprocess_source(self, cell_type, source, preprocessor):
nb = self.build_notebook()
res = self.build_resources()
nb.cells[0].cell_type = cell_type
nb.cells[0].source = source
nb, res = preprocessor(nb, res)
return nb.cells[0].source
def test_constructor(self):
"""Can a SanitizeHTML be constructed?"""
self.build_preprocessor()
def test_svg_handling(self):
"""
Test to make sure that svgs are handled 'properly'
We only allow <img> tags (via markdown syntax) and not all the other ways
to embed svg: <object>, <embed>, <iframe> nor inline <svg>
"""
preprocessor = self.build_preprocessor()
preprocessor.strip = True
self.assertEqual(
self.preprocess_source(
"markdown",
"""

<object data="something.svg" type="image/svg+xml"></object>
<embed data="something.svg" type="image/svg+xml" />
<iframe src="http://example.com/something.svg"></iframe>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 68 65">
<path fill="#1A374D" d="M42 27v-20c0-3.7-3.3-7-7-7s-7 3.3-7 7v21l12 15-7 15.7c14.5 13.9 35 2.8 35-13.7 0-13.3-13.4-21.8-26-18zm6 25c-3.9 0-7-3.1-7-7s3.1-7 7-7 7 3.1 7 7-3.1 7-7 7z"/>
<path d="M14 27v-20c0-3.7-3.3-7-7-7s-7 3.3-7 7v41c0 8.2 9.2 17 20 17s20-9.2 20-20c0-13.3-13.4-21.8-26-18zm6 25c-3.9 0-7-3.1-7-7s3.1-7 7-7 7 3.1 7 7-3.1 7-7 7z"/>
</svg>
""",
preprocessor,
).strip(),
"""

""".strip(),
)
def test_tag_allowlist_stripping(self):
"""Test tag allowlisting + stripping out offending tags"""
preprocessor = self.build_preprocessor()
preprocessor.strip = True
self.assertEqual(
self.preprocess_source(
"markdown", "_A_ <em>few</em> <script>tags</script>", preprocessor
),
"_A_ <em>few</em> tags",
)
def test_comment_stripping(self):
"""Test HTML comment stripping"""
preprocessor = self.build_preprocessor()
self.assertEqual(
self.preprocess_source("markdown", "_A_ <em>few</em> <!-- tags -->", preprocessor),
"_A_ <em>few</em> ",
)
preprocessor.strip_comments = False
self.assertEqual(
self.preprocess_source("markdown", "_A_ <em>few</em> <!-- tags -->", preprocessor),
"_A_ <em>few</em> <!-- tags -->",
)
def test_attributes_allowlist(self):
"""Test style"""
preprocessor = self.build_preprocessor()
preprocessor.attributes["a"] = ["href", "title"]
self.assertEqual(
self.preprocess_source(
"markdown", '<a href="link" rel="nofollow">Hi</a>', preprocessor
),
'<a href="link">Hi</a>',
)
def test_style_allowlist(self):
"""Test style"""
preprocessor = self.build_preprocessor()
if "*" in preprocessor.attributes:
preprocessor.attributes["*"].append("style")
else:
preprocessor.attributes["*"] = ["style"]
preprocessor.styles = [
"color",
]
self.assertEqual(
self.preprocess_source(
"markdown",
'_A_ <em style="color: blue; background-color: pink">'
"few</em> <script>tags</script>",
preprocessor,
),
'_A_ <em style="color: blue;">few</em> <script>tags</script>',
)
def test_tag_passthrough(self):
"""Test passing through raw output"""
preprocessor = self.build_preprocessor()
self.assertEqual(
self.preprocess_source("raw", "_A_ <em>few</em> <script>tags</script>", preprocessor),
"_A_ <em>few</em> <script>tags</script>",
)
def test_output_sanitizing(self):
"""Test that outputs are also sanitized properly"""
preprocessor = self.build_preprocessor()
nb = self.build_notebook()
outputs = [
nbformat.new_output(
"display_data",
data={
"text/plain": "b",
"text/html": "<script>more evil</script>",
"text/css": "<style> * {display:none}</style>",
},
),
nbformat.new_output("stream", name="stdout", text="wat"),
nbformat.new_output("stream", name="stdout", text="<script>Evil tag</script>"),
]
nb.cells[0].outputs = outputs
res = self.build_resources()
nb, res = preprocessor(nb, res)
expected_output = [
{
"data": {"text/html": "<script>more evil</script>", "text/plain": "b"},
"metadata": {},
"output_type": "display_data",
},
{"name": "stdout", "output_type": "stream", "text": "wat"},
{"name": "stdout", "output_type": "stream", "text": "<script>Evil tag</script>"},
]
self.assertEqual(nb.cells[0].outputs, expected_output)
def test_tag_allowlist(self):
"""Test tag allowlisting"""
preprocessor = self.build_preprocessor()
self.assertEqual(
self.preprocess_source(
"markdown", "_A_ <em>few</em> <script>tags</script>", preprocessor
),
"_A_ <em>few</em> <script>tags</script>",
)
|