1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71
|
Description: Fix SyntaxWarnings regarding invalid escape sequences
Progressing towards Python 3.12 compatibility by using raw strings for regex.
Bug-Debian: https://bugs.debian.org/1085926
Author: Arian Ott <arian.ott@ieee.org>
Last-Update: 2026-01-28
Index: python-jieba/jieba/__init__.py
===================================================================
--- python-jieba.orig/jieba/__init__.py
+++ python-jieba/jieba/__init__.py
@@ -33,15 +33,15 @@ DICT_WRITING = {}
pool = None
-re_userdict = re.compile('^(.+?)( [0-9]+)?( [a-z]+)?$', re.U)
+re_userdict = re.compile(r'^(.+?)( [0-9]+)?( [a-z]+)?$', re.U)
-re_eng = re.compile('[a-zA-Z0-9]', re.U)
+re_eng = re.compile(r'[a-zA-Z0-9]', re.U)
# \u4E00-\u9FD5a-zA-Z0-9+#&\._ : All non-space characters. Will be handled with re_han
# \r\n|\s : whitespace characters. Will not be handled.
-# re_han_default = re.compile("([\u4E00-\u9FD5a-zA-Z0-9+#&\._%]+)", re.U)
+# re_han_default = re.compile(r"([\u4E00-\u9FD5a-zA-Z0-9+#&\._%]+)", re.U)
# Adding "-" symbol in re_han_default
-re_han_default = re.compile("([\u4E00-\u9FD5a-zA-Z0-9+#&\._%\-]+)", re.U)
+re_han_default = re.compile(r"([\u4E00-\u9FD5a-zA-Z0-9+#&\._%\-]+)", re.U)
re_skip_default = re.compile("(\r\n|\s)", re.U)
Index: python-jieba/jieba/finalseg/__init__.py
===================================================================
--- python-jieba.orig/jieba/finalseg/__init__.py
+++ python-jieba/jieba/finalseg/__init__.py
@@ -74,8 +74,8 @@ def __cut(sentence):
if nexti < len(sentence):
yield sentence[nexti:]
-re_han = re.compile("([\u4E00-\u9FD5]+)")
-re_skip = re.compile("([a-zA-Z0-9]+(?:\.\d+)?%?)")
+re_han = re.compile(r"([\u4E00-\u9FD5]+)")
+re_skip = re.compile(r"([a-zA-Z0-9]+(?:\.\d+)?%?)")
def add_force_split(word):
Index: python-jieba/jieba/posseg/__init__.py
===================================================================
--- python-jieba.orig/jieba/posseg/__init__.py
+++ python-jieba/jieba/posseg/__init__.py
@@ -12,15 +12,15 @@ PROB_TRANS_P = "prob_trans.p"
PROB_EMIT_P = "prob_emit.p"
CHAR_STATE_TAB_P = "char_state_tab.p"
-re_han_detail = re.compile("([\u4E00-\u9FD5]+)")
-re_skip_detail = re.compile("([\.0-9]+|[a-zA-Z0-9]+)")
-re_han_internal = re.compile("([\u4E00-\u9FD5a-zA-Z0-9+#&\._]+)")
-re_skip_internal = re.compile("(\r\n|\s)")
+re_han_detail = re.compile(r"([\u4E00-\u9FD5]+)")
+re_skip_detail = re.compile(r"([\.0-9]+|[a-zA-Z0-9]+)")
+re_han_internal = re.compile(r"([\u4E00-\u9FD5a-zA-Z0-9+#&\._]+)")
+re_skip_internal = re.compile(r"(\r\n|\s)")
-re_eng = re.compile("[a-zA-Z0-9]+")
-re_num = re.compile("[\.0-9]+")
+re_eng = re.compile(r"[a-zA-Z0-9]+")
+re_num = re.compile(r"[\.0-9]+")
-re_eng1 = re.compile('^[a-zA-Z0-9]$', re.U)
+re_eng1 = re.compile(r'^[a-zA-Z0-9]$', re.U)
def load_model():
|