File: extract-Use-flags.patch

package info (click to toggle)
wikiextractor 3.0.7-1
  • links: PTS
  • area: main
  • in suites: sid
  • size: 380 kB
  • sloc: python: 1,602; sh: 17; makefile: 5
file content (35 lines) | stat: -rw-r--r-- 1,344 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
From: =?utf-8?q?Guido_G=C3=BCnther?= <agx@sigxcpu.org>
Date: Sun, 22 Sep 2024 14:47:03 +0200
Subject: extract: Use flags

(?i) is a global flag so we can use compilation flags instead. Otherwise
we fail with

  re.error: global flags not at the start of the expression at position 95 (line 2, column 50)

on python 3.12
---
 wikiextractor/extract.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/wikiextractor/extract.py b/wikiextractor/extract.py
index a00e23d..9997235 100644
--- a/wikiextractor/extract.py
+++ b/wikiextractor/extract.py
@@ -380,12 +380,12 @@ wgUrlProtocols = [
 # as well as U+3000 is IDEOGRAPHIC SPACE for bug 19052
 EXT_LINK_URL_CLASS = r'[^][<>"\x00-\x20\x7F\s]'
 ExtLinkBracketedRegex = re.compile(
-    '\[(((?i)' + '|'.join(wgUrlProtocols) + ')' + EXT_LINK_URL_CLASS + r'+)\s*([^\]\x00-\x08\x0a-\x1F]*?)\]',
-    re.S | re.U)
+    '\[((' + '|'.join(wgUrlProtocols) + ')' + EXT_LINK_URL_CLASS + r'+)\s*([^\]\x00-\x08\x0a-\x1F]*?)\]',
+    re.S | re.U | re.I)
 EXT_IMAGE_REGEX = re.compile(
     r"""^(http://|https://)([^][<>"\x00-\x20\x7F\s]+)
-    /([A-Za-z0-9_.,~%\-+&;#*?!=()@\x80-\xFF]+)\.((?i)gif|png|jpg|jpeg)$""",
-    re.X | re.S | re.U)
+    /([A-Za-z0-9_.,~%\-+&;#*?!=()@\x80-\xFF]+)\.(gif|png|jpg|jpeg)$""",
+    re.X | re.S | re.U | re.I)
 
 
 def replaceExternalLinks(text):