1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35
|
From: =?utf-8?q?Guido_G=C3=BCnther?= <agx@sigxcpu.org>
Date: Sun, 22 Sep 2024 14:47:03 +0200
Subject: extract: Use flags
(?i) is a global flag so we can use compilation flags instead. Otherwise
we fail with
re.error: global flags not at the start of the expression at position 95 (line 2, column 50)
on python 3.12
---
wikiextractor/extract.py | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/wikiextractor/extract.py b/wikiextractor/extract.py
index a00e23d..9997235 100644
--- a/wikiextractor/extract.py
+++ b/wikiextractor/extract.py
@@ -380,12 +380,12 @@ wgUrlProtocols = [
# as well as U+3000 is IDEOGRAPHIC SPACE for bug 19052
EXT_LINK_URL_CLASS = r'[^][<>"\x00-\x20\x7F\s]'
ExtLinkBracketedRegex = re.compile(
- '\[(((?i)' + '|'.join(wgUrlProtocols) + ')' + EXT_LINK_URL_CLASS + r'+)\s*([^\]\x00-\x08\x0a-\x1F]*?)\]',
- re.S | re.U)
+ '\[((' + '|'.join(wgUrlProtocols) + ')' + EXT_LINK_URL_CLASS + r'+)\s*([^\]\x00-\x08\x0a-\x1F]*?)\]',
+ re.S | re.U | re.I)
EXT_IMAGE_REGEX = re.compile(
r"""^(http://|https://)([^][<>"\x00-\x20\x7F\s]+)
- /([A-Za-z0-9_.,~%\-+&;#*?!=()@\x80-\xFF]+)\.((?i)gif|png|jpg|jpeg)$""",
- re.X | re.S | re.U)
+ /([A-Za-z0-9_.,~%\-+&;#*?!=()@\x80-\xFF]+)\.(gif|png|jpg|jpeg)$""",
+ re.X | re.S | re.U | re.I)
def replaceExternalLinks(text):
|