File: 0002-fix-syntax-warnings.patch

package info (click to toggle)
python-jieba 0.42.1-6
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 51,864 kB
  • sloc: python: 194,937; makefile: 16; sh: 3
file content (71 lines) | stat: -rw-r--r-- 2,800 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
Description: Fix SyntaxWarnings regarding invalid escape sequences
 Progressing towards Python 3.12 compatibility by using raw strings for regex.
Bug-Debian: https://bugs.debian.org/1085926
Author: Arian Ott <arian.ott@ieee.org>
Last-Update: 2026-01-28
Index: python-jieba/jieba/__init__.py
===================================================================
--- python-jieba.orig/jieba/__init__.py
+++ python-jieba/jieba/__init__.py
@@ -33,15 +33,15 @@ DICT_WRITING = {}
 
 pool = None
 
-re_userdict = re.compile('^(.+?)( [0-9]+)?( [a-z]+)?$', re.U)
+re_userdict = re.compile(r'^(.+?)( [0-9]+)?( [a-z]+)?$', re.U)
 
-re_eng = re.compile('[a-zA-Z0-9]', re.U)
+re_eng = re.compile(r'[a-zA-Z0-9]', re.U)
 
 # \u4E00-\u9FD5a-zA-Z0-9+#&\._ : All non-space characters. Will be handled with re_han
 # \r\n|\s : whitespace characters. Will not be handled.
-# re_han_default = re.compile("([\u4E00-\u9FD5a-zA-Z0-9+#&\._%]+)", re.U)
+# re_han_default = re.compile(r"([\u4E00-\u9FD5a-zA-Z0-9+#&\._%]+)", re.U)
 # Adding "-" symbol in re_han_default
-re_han_default = re.compile("([\u4E00-\u9FD5a-zA-Z0-9+#&\._%\-]+)", re.U)
+re_han_default = re.compile(r"([\u4E00-\u9FD5a-zA-Z0-9+#&\._%\-]+)", re.U)
 
 re_skip_default = re.compile("(\r\n|\s)", re.U)
 
Index: python-jieba/jieba/finalseg/__init__.py
===================================================================
--- python-jieba.orig/jieba/finalseg/__init__.py
+++ python-jieba/jieba/finalseg/__init__.py
@@ -74,8 +74,8 @@ def __cut(sentence):
     if nexti < len(sentence):
         yield sentence[nexti:]
 
-re_han = re.compile("([\u4E00-\u9FD5]+)")
-re_skip = re.compile("([a-zA-Z0-9]+(?:\.\d+)?%?)")
+re_han = re.compile(r"([\u4E00-\u9FD5]+)")
+re_skip = re.compile(r"([a-zA-Z0-9]+(?:\.\d+)?%?)")
 
 
 def add_force_split(word):
Index: python-jieba/jieba/posseg/__init__.py
===================================================================
--- python-jieba.orig/jieba/posseg/__init__.py
+++ python-jieba/jieba/posseg/__init__.py
@@ -12,15 +12,15 @@ PROB_TRANS_P = "prob_trans.p"
 PROB_EMIT_P = "prob_emit.p"
 CHAR_STATE_TAB_P = "char_state_tab.p"
 
-re_han_detail = re.compile("([\u4E00-\u9FD5]+)")
-re_skip_detail = re.compile("([\.0-9]+|[a-zA-Z0-9]+)")
-re_han_internal = re.compile("([\u4E00-\u9FD5a-zA-Z0-9+#&\._]+)")
-re_skip_internal = re.compile("(\r\n|\s)")
+re_han_detail = re.compile(r"([\u4E00-\u9FD5]+)")
+re_skip_detail = re.compile(r"([\.0-9]+|[a-zA-Z0-9]+)")
+re_han_internal = re.compile(r"([\u4E00-\u9FD5a-zA-Z0-9+#&\._]+)")
+re_skip_internal = re.compile(r"(\r\n|\s)")
 
-re_eng = re.compile("[a-zA-Z0-9]+")
-re_num = re.compile("[\.0-9]+")
+re_eng = re.compile(r"[a-zA-Z0-9]+")
+re_num = re.compile(r"[\.0-9]+")
 
-re_eng1 = re.compile('^[a-zA-Z0-9]$', re.U)
+re_eng1 = re.compile(r'^[a-zA-Z0-9]$', re.U)
 
 
 def load_model():