File: 0001-Incomplete-tags-with-punctuation-after-as-part-of-th.patch

package info (click to toggle)
python-markdown2 2.3.7-2%2Bdeb10u1
links: PTS, VCS
area: main
in suites: buster
size: 1,952 kB
sloc: python: 2,790; makefile: 35
file content (73 lines) | stat: -rw-r--r-- 2,908 bytes
From: Gareth Simpson <g@xurble.org>
Date: Tue, 2 Jun 2020 20:14:30 +0200
Subject: Incomplete tags with punctuation after as part of the tag name are a
 source of XSS
Bug: https://github.com/trentm/python-markdown2/issues/348

Fixes CVE-2020-11888.

python-markdown2 through 2.3.8 allows XSS because element names are
mishandled unless a \w+ match succeeds. For example, an attack might use
elementname@ or elementname- with an onclick attribute.
---
 lib/markdown2.py                           | 9 ++++++---
 test/tm-cases/issue348_incomplete_tag.html | 1 +
 test/tm-cases/issue348_incomplete_tag.opts | 1 +
 test/tm-cases/issue348_incomplete_tag.text | 1 +
 4 files changed, 9 insertions(+), 3 deletions(-)
 create mode 100644 test/tm-cases/issue348_incomplete_tag.html
 create mode 100644 test/tm-cases/issue348_incomplete_tag.opts
 create mode 100644 test/tm-cases/issue348_incomplete_tag.text

diff --git a/lib/markdown2.py b/lib/markdown2.py
index 16672f5..bd9fe0c 100755
--- a/lib/markdown2.py
+++ b/lib/markdown2.py
@@ -1772,7 +1772,7 @@ class Markdown(object):
                 lexer_name = lexer_name[3:].strip()
                 codeblock = rest.lstrip("\n")   # Remove lexer declaration line.
                 formatter_opts = self.extras['code-color'] or {}
-        
+
         # Use pygments only if not using the highlightjs-lang extra
         if lexer_name and "highlightjs-lang" not in self.extras:
             def unhash_code(codeblock):
@@ -2134,12 +2134,15 @@ class Markdown(object):
         text = self._naked_gt_re.sub('&gt;', text)
         return text
 
-    _incomplete_tags_re = re.compile("<(/?\w+[\s/]+?)")
+    _incomplete_tags_re = re.compile("<(/?\w+?(?!\w).+?[\s/]+?)")
 
     def _encode_incomplete_tags(self, text):
         if self.safe_mode not in ("replace", "escape"):
             return text
-            
+
+        if text.endswith(">"):
+            return text  # this is not an incomplete tag, this is a link in the form <http://x.y.z>
+
         return self._incomplete_tags_re.sub("&lt;\\1", text)
 
     def _encode_backslash_escapes(self, text):
diff --git a/test/tm-cases/issue348_incomplete_tag.html b/test/tm-cases/issue348_incomplete_tag.html
new file mode 100644
index 0000000..46059cc
--- /dev/null
+++ b/test/tm-cases/issue348_incomplete_tag.html
@@ -0,0 +1 @@
+<p>&lt;lol@/ //id="pwn"//onclick="alert(1)"//<strong>abc</strong></p>
diff --git a/test/tm-cases/issue348_incomplete_tag.opts b/test/tm-cases/issue348_incomplete_tag.opts
new file mode 100644
index 0000000..ad487c0
--- /dev/null
+++ b/test/tm-cases/issue348_incomplete_tag.opts
@@ -0,0 +1 @@
+{"safe_mode": "escape"}
diff --git a/test/tm-cases/issue348_incomplete_tag.text b/test/tm-cases/issue348_incomplete_tag.text
new file mode 100644
index 0000000..bb4a0de
--- /dev/null
+++ b/test/tm-cases/issue348_incomplete_tag.text
@@ -0,0 +1 @@
+<lol@/ //id="pwn"//onclick="alert(1)"//**abc**