1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103
|
From: Keith Packard <keithp@keithp.com>
Date: Sun, 5 Jan 2025 16:51:03 -0800
Subject: test: Fix regex syntax errors
The regular expressions in normalize.py and pathological_tests.py are
missing a lot of backslashes -- you need two in the source file to get
one in the string.
Signed-off-by: Keith Packard <keithp@keithp.com>
---
test/normalize.py | 4 ++--
test/pathological_tests.py | 20 ++++++++++----------
2 files changed, 12 insertions(+), 12 deletions(-)
diff --git a/test/normalize.py b/test/normalize.py
index b7fd9b2..ec4979c 100644
--- a/test/normalize.py
+++ b/test/normalize.py
@@ -18,7 +18,7 @@ import re
# Normalization code, adapted from
# https://github.com/karlcow/markdown-testsuite/
significant_attrs = ["alt", "href", "src", "title"]
-whitespace_re = re.compile('\s+')
+whitespace_re = re.compile('\\s+')
class MyHTMLParser(HTMLParser):
def __init__(self):
HTMLParser.__init__(self)
@@ -176,7 +176,7 @@ def normalize_html(html):
'\u2200&><"'
"""
- html_chunk_re = re.compile("(\<!\[CDATA\[.*?\]\]\>|\<[^>]*\>|[^<]+)")
+ html_chunk_re = re.compile("(\\<!\\[CDATA\\[.*?\\]\\]\\>|\\<[^>]*\\>|[^<]+)")
try:
parser = MyHTMLParser()
# We work around HTMLParser's limitations parsing CDATA
diff --git a/test/pathological_tests.py b/test/pathological_tests.py
index f556a1d..80b5261 100644
--- a/test/pathological_tests.py
+++ b/test/pathological_tests.py
@@ -30,7 +30,7 @@ def hash_collisions():
document = ''.join("[%s]: /url\n\n[%s]\n\n" % (key, bad_key) for key in collisions)
- return document, re.compile("(<p>\[%s\]</p>\n){%d}" % (bad_key, COUNT-1))
+ return document, re.compile("(<p>\\[%s\\]</p>\n){%d}" % (bad_key, COUNT-1))
allowed_failures = {"many references": True}
@@ -48,10 +48,10 @@ pathological = {
re.compile("(_a ){64999}_a")),
"many link closers with no openers":
(("a]" * 65000),
- re.compile("(a\]){65000}")),
+ re.compile("(a\\]){65000}")),
"many link openers with no closers":
(("[a" * 65000),
- re.compile("(\[a){65000}")),
+ re.compile("(\\[a){65000}")),
"mismatched openers and closers":
(("*a_ " * 50000),
re.compile("([*]a[_] ){49999}[*]a_")),
@@ -60,19 +60,19 @@ pathological = {
re.compile("a[*][*]b(c[*] ){49999}c[*]")),
"link openers and emph closers":
(("[ a_" * 50000),
- re.compile("(\[ a_){50000}")),
+ re.compile("(\\[ a_){50000}")),
"pattern [ (]( repeated":
(("[ (](" * 80000),
- re.compile("(\[ \(\]\(){80000}")),
+ re.compile("(\\[ \\(\\]\\(){80000}")),
"pattern ![[]() repeated":
("![[]()" * 160000,
- re.compile("(!\[<a href=\"\"></a>){160000}")),
+ re.compile("(!\\[<a href=\"\"></a>){160000}")),
"hard link/emph case":
("**x [a*b**c*](d)",
re.compile("\\*\\*x <a href=\"d\">a<em>b\\*\\*c</em></a>")),
"nested brackets":
(("[" * 50000) + "a" + ("]" * 50000),
- re.compile("\[{50000}a\]{50000}")),
+ re.compile("\\[{50000}a\\]{50000}")),
"nested block quotes":
((("> " * 50000) + "a"),
re.compile("(<blockquote>\n){50000}")),
@@ -87,13 +87,13 @@ pathological = {
re.compile("^<p>[e`]*</p>\n$")),
"unclosed links A":
("[a](<b" * 30000,
- re.compile("(\[a\]\(<b){30000}")),
+ re.compile("(\\[a\\]\\(<b){30000}")),
"unclosed links B":
("[a](b" * 30000,
- re.compile("(\[a\]\(b){30000}")),
+ re.compile("(\\[a\\]\\(b){30000}")),
"unclosed <!--":
("</" + "<!--" * 300000,
- re.compile("\<\/(\<!--){300000}")),
+ re.compile("\\<\\/(\\<!--){300000}")),
"tables":
("aaa\rbbb\n-\v\n" * 30000,
re.compile("^<p>aaa</p>\n<table>\n<thead>\n<tr>\n<th>bbb</th>\n</tr>\n</thead>\n<tbody>\n(<tr>\n<td>aaa</td>\n</tr>\n<tr>\n<td>bbb</td>\n</tr>\n<tr>\n<td>-\x0b</td>\n</tr>\n){29999}</tbody>\n</table>\n$")),
|