File: 8e239e278103c7ce0c1bcba1fc9939533b865241.patch

package info (click to toggle)
python-pattern 2.6%2Bgit20180818-4
  • links: PTS
  • area: main
  • in suites: bookworm
  • size: 95,148 kB
  • sloc: python: 28,136; xml: 15,085; javascript: 5,810; makefile: 194
file content (201 lines) | stat: -rw-r--r-- 7,912 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
From 8e239e278103c7ce0c1bcba1fc9939533b865241 Mon Sep 17 00:00:00 2001
From: James Powell <james@dutc.io>
Date: Sun, 3 Jul 2022 02:42:33 -0400
Subject: [PATCH] ensure unit tests succeed; small clean-up (incl. context
 managers to reduce test noise)

---
 pattern/text/__init__.py             | 38 +++++++++++++++-----------
 pattern/text/en/wordlist/__init__.py |  3 +-
 pattern/text/en/wordnet/__init__.py  |  3 +-
 pattern/text/ru/wordlist/__init__.py |  3 +-
 pattern/text/search.py               | 41 ++++++----------------------
 5 files changed, 36 insertions(+), 52 deletions(-)

diff --git a/pattern/text/__init__.py b/pattern/text/__init__.py
index aa77af0b..aaeda064 100644
--- a/pattern/text/__init__.py
+++ b/pattern/text/__init__.py
@@ -589,24 +589,28 @@ def _read(path, encoding="utf-8", comment=";;;"):
     """ Returns an iterator over the lines in the file at the given path,
         strippping comments and decoding each line to Unicode.
     """
+    def process(lines):
+        for i, line in enumerate(lines):
+            line = line.strip(BOM_UTF8) if i == 0 and isinstance(line, str) else line
+            line = line.strip()
+            line = decode_utf8(line, encoding)
+            if not line or (comment and line.startswith(comment)):
+                continue
+            yield line
     if path:
         if isinstance(path, str) and os.path.exists(path):
             # From file path.
-            f = open(path, "r", encoding="utf-8")
+            with open(path, "r", encoding="utf-8") as f:
+                yield from process(f)
         elif isinstance(path, str):
             # From string.
             f = path.splitlines()
+            yield from process(f)
         else:
             # From file or buffer.
             f = path
-        for i, line in enumerate(f):
-            line = line.strip(BOM_UTF8) if i == 0 and isinstance(line, str) else line
-            line = line.strip()
-            line = decode_utf8(line, encoding)
-            if not line or (comment and line.startswith(comment)):
-                continue
-            yield line
-    raise StopIteration
+            yield from process(f)
+    # raise StopIteration
 
 
 class Lexicon(lazydict):
@@ -1851,7 +1855,11 @@ def commandline(parse=Parser().parse):
         print(__version__)
         sys.path.pop(0)
     # Either a text file (-f) or a text string (-s) must be supplied.
-    s = o.file and codecs.open(o.file, "r", o.encoding).read() or o.string
+    if o.file:
+        with codes.open(o.file, "r", o.encoding):
+            s = f.read()
+    else:
+        s = o.string
     # The given text can be parsed in two modes:
     # - implicit: parse everything (tokenize, tag/chunk, find relations, lemmatize).
     # - explicit: define what to parse manually.
@@ -2594,9 +2602,8 @@ def save(self, path):
                              "label=\"%s\""   % self.labeler.get(w, "")
                     ))
         a.append("</sentiment>")
-        f = open(path, "w", encoding="utf-8")
-        f.write(BOM_UTF8 + encode_utf8("\n".join(a)))
-        f.close()
+        with open(path, "w", encoding="utf-8") as f:
+            f.write(BOM_UTF8 + encode_utf8("\n".join(a)))
 
 #### SPELLING CORRECTION ###########################################################################
 # Based on: Peter Norvig, "How to Write a Spelling Corrector", http://norvig.com/spell-correct.html
@@ -2640,9 +2647,8 @@ def train(self, s, path="spelling.txt"):
             model[w] = w in model and model[w] + 1 or 1
         model = ("%s %s" % (k, v) for k, v in sorted(model.items()))
         model = "\n".join(model)
-        f = open(path, "w", encoding="utf-8")
-        f.write(model)
-        f.close()
+        with open(path, "w", encoding="utf-8") as f:
+            f.write(model)
 
     def _edit1(self, w):
         """ Returns a set of words with edit distance 1 from the given word.
diff --git a/pattern/text/en/wordlist/__init__.py b/pattern/text/en/wordlist/__init__.py
index 047a805f..8f859b34 100644
--- a/pattern/text/en/wordlist/__init__.py
+++ b/pattern/text/en/wordlist/__init__.py
@@ -33,7 +33,8 @@ def __init__(self, name, data=[]):
 
     def _load(self):
         if not self._data:
-            self._data = open(os.path.join(MODULE, self._name + ".txt")).read().split(", ")
+            with open(os.path.join(MODULE, self._name + ".txt")) as f:
+                self._data = f.read().split(", ")
 
     def __repr__(self):
         self._load()
diff --git a/pattern/text/en/wordnet/__init__.py b/pattern/text/en/wordnet/__init__.py
index 022285fd..5345d32d 100644
--- a/pattern/text/en/wordnet/__init__.py
+++ b/pattern/text/en/wordnet/__init__.py
@@ -411,7 +411,8 @@ def map32(id, pos=NOUN):
     """
     global _map32_cache
     if not _map32_cache:
-        _map32_cache = open(os.path.join(MODULE, "dict", "index.32"), encoding="latin-1").readlines()
+        with open(os.path.join(MODULE, "dict", "index.32"), encoding="latin-1") as f:
+            _map32_cache = f.readlines()
         _map32_cache = (x for x in _map32_cache if x[0] != ";") # comments
         _map32_cache = dict(x.strip().split(" ") for x in _map32_cache)
     k = pos in _map32_pos2 and pos or _map32_pos1.get(pos, "x")
diff --git a/pattern/text/ru/wordlist/__init__.py b/pattern/text/ru/wordlist/__init__.py
index 01e5b6dc..a5cc4d42 100644
--- a/pattern/text/ru/wordlist/__init__.py
+++ b/pattern/text/ru/wordlist/__init__.py
@@ -33,7 +33,8 @@ def __init__(self, name, data=[]):
 
     def _load(self):
         if not self._data:
-            self._data = open(os.path.join(MODULE, self._name + ".txt")).read().split("\n")
+            with open(os.path.join(MODULE, self._name + ".txt")) as f:
+                self._data = f.read().split("\n")
 
     def __repr__(self):
         self._load()
diff --git a/pattern/text/search.py b/pattern/text/search.py
index 6efd5199..e7291e0e 100644
--- a/pattern/text/search.py
+++ b/pattern/text/search.py
@@ -19,8 +19,7 @@
 
 import re
 import itertools
-
-from functools import cmp_to_key
+from itertools import product, compress, combinations
 
 #--- TEXT, SENTENCE AND WORD -----------------------------------------------------------------------
 # The search() and match() functions work on Text, Sentence and Word objects (see pattern.text.tree),
@@ -150,35 +149,6 @@ def find(function, iterable):
             return x
 
 
-def combinations(iterable, n):
-    # Backwards compatibility.
-    return product(iterable, repeat=n)
-
-
-def product(*args, **kwargs):
-    """ Yields all permutations with replacement:
-        list(product("cat", repeat=2)) => 
-        [("c", "c"), 
-         ("c", "a"), 
-         ("c", "t"), 
-         ("a", "c"), 
-         ("a", "a"), 
-         ("a", "t"), 
-         ("t", "c"), 
-         ("t", "a"), 
-         ("t", "t")]
-    """
-    p = [[]]
-    for iterable in map(tuple, args) * kwargs.get("repeat", 1):
-        p = [x + [y] for x in p for y in iterable]
-    for p in p:
-        yield tuple(p)
-
-try:
-    from itertools import product
-except:
-    pass
-
 
 def variations(iterable, optional=lambda x: False):
     """ Returns all possible variations of a sequence with optional items.
@@ -200,8 +170,13 @@ def variations(iterable, optional=lambda x: False):
         v = tuple(iterable[i] for i in range(len(v)) if not v[i])
         a.add(v)
     # Longest-first.
-    f = lambda x, y: len(y) - len(x)
-    return sorted(a, key=cmp_to_key(f))
+    return sorted(a, key=len, reverse=True)
+
+def variations(iterable, optional=lambda x: False):
+    optional = [*map(optional, iterable)]
+    candidates = [*product(*([False, True] if opt else [True] for opt in optional))]
+    candidates.sort(key=sum, reverse=True)
+    return [(*compress(iterable, cnd),) for cnd in candidates]
 
 #### TAXONOMY ######################################################################################