File: 0003-sanitizer-escape-HTML-comments.patch

package info (click to toggle)
python-bleach 3.1.2-0%2Bdeb10u2
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 1,320 kB
  • sloc: python: 13,477; makefile: 130; sh: 47
file content (96 lines) | stat: -rw-r--r-- 3,735 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
From: Greg Guthe <gguthe@mozilla.com>
Date: Thu, 28 Jan 2021 14:56:24 -0500
Subject: sanitizer: escape HTML comments
Origin: https://github.com/mozilla/bleach/commit/1334134d34397966a7f7cfebd38639e9ba2c680e
Bug-Debian: https://bugs.debian.org/986251
Bug: https://bugzilla.mozilla.org/show_bug.cgi?id=1689399
Bug: https://github.com/mozilla/bleach/security/advisories/GHSA-vv2x-vrpj-qqpq
Bug-Debian-Security: https://security-tracker.debian.org/tracker/CVE-2021-23980

fixes: bug 1689399 / GHSA vv2x-vrpj-qqpq
---
 bleach/html5lib_shim.py |  1 +
 bleach/sanitizer.py     |  4 ++++
 tests/test_clean.py     | 47 +++++++++++++++++++++++++++++++++++++++++
 3 files changed, 52 insertions(+)

--- a/bleach/html5lib_shim.py
+++ b/bleach/html5lib_shim.py
@@ -25,7 +25,7 @@ from html5lib.filters.base import Filter
 from html5lib.filters.sanitizer import allowed_protocols
 from html5lib.filters.sanitizer import Filter as SanitizerFilter
 from html5lib._inputstream import HTMLInputStream
-from html5lib.serializer import HTMLSerializer
+from html5lib.serializer import escape, HTMLSerializer
 from html5lib._tokenizer import HTMLTokenizer
 from html5lib._trie import Trie
 
--- a/bleach/sanitizer.py
+++ b/bleach/sanitizer.py
@@ -347,6 +347,10 @@ class BleachSanitizerFilter(html5lib_shi
 
         elif token_type == 'Comment':
             if not self.strip_html_comments:
+                # call lxml.sax.saxutils to escape &, <, and > in addition to " and '
+                token["data"] = html5lib_shim.escape(
+                    token["data"], entities={'"': "&quot;", "'": "&#x27;"}
+                )
                 return token
             else:
                 return None
--- a/tests/test_clean.py
+++ b/tests/test_clean.py
@@ -820,6 +820,53 @@ def test_namespace_rc_data_element_strip
     assert clean(data, tags=[namespace_tag, rc_data_element_tag], strip=False) == expected
 
 
+@pytest.mark.parametrize(
+    "namespace_tag, end_tag, data, expected",
+    [
+        (
+            "math",
+            "p",
+            "<math></p><style><!--</style><img src/onerror=alert(1)>",
+            "<math><p></p><style><!--&lt;/style&gt;&lt;img src/onerror=alert(1)&gt;--></style></math>",
+        ),
+        (
+            "math",
+            "br",
+            "<math></br><style><!--</style><img src/onerror=alert(1)>",
+            "<math><br><style><!--&lt;/style&gt;&lt;img src/onerror=alert(1)&gt;--></style></math>",
+        ),
+        (
+            "svg",
+            "p",
+            "<svg></p><style><!--</style><img src/onerror=alert(1)>",
+            "<svg><p></p><style><!--&lt;/style&gt;&lt;img src/onerror=alert(1)&gt;--></style></svg>",
+        ),
+        (
+            "svg",
+            "br",
+            "<svg></br><style><!--</style><img src/onerror=alert(1)>",
+            "<svg><br><style><!--&lt;/style&gt;&lt;img src/onerror=alert(1)&gt;--></style></svg>",
+        ),
+    ],
+)
+def test_html_comments_escaped(namespace_tag, end_tag, data, expected):
+    # refs: bug 1689399 / GHSA-vv2x-vrpj-qqpq
+    #
+    # p and br can be just an end tag (e.g. </p> == <p></p>)
+    #
+    # In browsers:
+    #
+    # * img and other tags break out of the svg or math namespace (e.g. <svg><img></svg> == <svg><img></svg>)
+    # * style does not (e.g. <svg><style></svg> == <svg><style></style></svg>)
+    # * the breaking tag ejects trailing elements (e.g. <svg><img><style></style></svg> == <svg></svg><img><style></style>)
+    #
+    # the ejected elements can trigger XSS
+    assert (
+        clean(data, tags=[namespace_tag, end_tag, "style"], strip_comments=False)
+        == expected
+    )
+
+
 def get_ids_and_tests():
     """Retrieves regression tests from data/ directory