File: 45fe0b1c2952b92e25a79e416dd0e81c5e51f02d.patch

package info (click to toggle)
python-pattern 2.6%2Bgit20180818-4
  • links: PTS
  • area: main
  • in suites: bookworm
  • size: 95,148 kB
  • sloc: python: 28,136; xml: 15,085; javascript: 5,810; makefile: 194
file content (309 lines) | stat: -rw-r--r-- 13,893 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
From 45fe0b1c2952b92e25a79e416dd0e81c5e51f02d Mon Sep 17 00:00:00 2001
From: James Powell <james@dutc.io>
Date: Tue, 5 Jul 2022 20:23:12 -0400
Subject: [PATCH] unit test fixes; disable irrelevant unit tests

---
 test/test_db.py     | 12 +++++++-----
 test/test_de.py     | 10 ++++++----
 test/test_en.py     | 10 ++++++----
 test/test_es.py     | 10 ++++++----
 test/test_fr.py     | 12 +++++++-----
 test/test_it.py     | 10 ++++++----
 test/test_nl.py     | 10 ++++++----
 test/test_text.py   |  2 +-
 test/test_vector.py |  5 +++--
 test/test_web.py    |  7 ++++---
 10 files changed, 52 insertions(+), 36 deletions(-)

diff --git a/test/test_db.py b/test/test_db.py
index 0a279d01..03446267 100644
--- a/test/test_db.py
+++ b/test/test_db.py
@@ -22,7 +22,7 @@
 
 # To test MySQL, you need MySQLdb and a username + password with rights to create a database.
 HOST, PORT, USERNAME, PASSWORD = \
-    "localhost", 3306, "root", ""
+    "mariadb", 3306, "root", "password"
 
 DB_MYSQL = DB_SQLITE = None
 
@@ -63,7 +63,7 @@ def create_db_sqlite():
         password = PASSWORD)
 
     # Drop all tables first
-    for table in list(DB_MYSQL.tables):
+    for table in list(DB_SQLITE.tables):
         DB_SQLITE.drop(table)
 
     return DB_SQLITE
@@ -180,7 +180,7 @@ def test_date(self):
         self.assertEqual(str(v5), "2014-01-01 00:00:00")
         # Assert timestamp input.
         v6 = db.date(db.date(2014, 1, 1).timestamp)
-        self.assertEqual(str(v5), "2014-01-01 00:00:00")
+        self.assertEqual(str(v6), "2014-01-01 00:00:00")
         # Assert DateError for other input.
         self.assertRaises(db.DateError, db.date, None)
         print("pattern.db.date()")
@@ -204,7 +204,8 @@ def test_format(self):
     def test_timestamp(self):
         # Assert Date.timestamp.
         v = db.date(2010, 9, 21, format=db.DEFAULT_DATE_FORMAT)
-        self.assertEqual(v.timestamp, 1285020000)
+        v = v.replace(tzinfo=datetime.timezone.utc)
+        self.assertEqual(v.timestamp, 1285027200)
         print("pattern.db.Date.timestamp")
 
     def test_time(self):
@@ -955,7 +956,8 @@ def test_file(self):
         # Assert CSV file contents.
         v = self.csv
         v.save("test.csv", headers=True)
-        v = open("test.csv", "rb").read()
+        with open("test.csv", "rb") as f:
+            v = f.read()
         v = db.decode_utf8(v.lstrip(codecs.BOM_UTF8))
         v = v.replace("\r\n", "\n")
         self.assertEqual(v,
diff --git a/test/test_de.py b/test/test_de.py
index 298db59a..71891cb4 100644
--- a/test/test_de.py
+++ b/test/test_de.py
@@ -212,7 +212,9 @@ def test_parse(self):
         )
         # 3) Assert the accuracy of the German tagger.
         i, n = 0, 0
-        for sentence in open(os.path.join(PATH, "corpora", "tagged-de-tiger.txt")).readlines():
+        with open(os.path.join(PATH, "corpora", "tagged-de-tiger.txt")) as f:
+            sentences = f.readlines()
+        for sentence in sentences:
             sentence = sentence.strip()
             s1 = [w.split("/") for w in sentence.split(" ")]
             s1 = [de.stts2penntreebank(w, pos) for w, pos in s1]
@@ -235,9 +237,9 @@ def test_tag(self):
     def test_command_line(self):
         # Assert parsed output from the command-line (example from the documentation).
         p = ["python", "-m", "pattern.de", "-s", "Der grosse Hund.", "-OTCRL"]
-        p = subprocess.Popen(p, stdout=subprocess.PIPE)
-        p.wait()
-        v = p.stdout.read().decode('utf-8')
+        with subprocess.Popen(p, stdout=subprocess.PIPE) as p:
+            p.wait()
+            v = p.stdout.read().decode('utf-8')
         v = v.strip()
         self.assertEqual(v, "Der/DT/B-NP/O/O/der grosse/JJ/I-NP/O/O/gross Hund/NN/I-NP/O/O/hund ././O/O/O/.")
         print("python -m pattern.de")
diff --git a/test/test_en.py b/test/test_en.py
index 5a1d5076..f134f7b4 100644
--- a/test/test_en.py
+++ b/test/test_en.py
@@ -546,7 +546,9 @@ def test_parse(self):
         # 7) Assert the accuracy of the English tagger.
         i, n = 0, 0
         for corpus, a in (("tagged-en-wsj.txt", (0.968, 0.945)), ("tagged-en-oanc.txt", (0.929, 0.932))):
-            for sentence in open(os.path.join(PATH, "corpora", corpus)).readlines():
+            with open(os.path.join(PATH, "corpora", corpus)) as f:
+                sentences = f.readlines()
+            for sentence in sentences:
                 sentence = sentence.strip()
                 s1 = [w.split("/") for w in sentence.split(" ")]
                 s2 = [[w for w, pos in s1]]
@@ -607,9 +609,9 @@ def test_ngrams(self):
     def test_command_line(self):
         # Assert parsed output from the command-line (example from the documentation).
         p = ["python", "-m", "pattern.en", "-s", "Nice cat.", "-OTCRL"]
-        p = subprocess.Popen(p, stdout=subprocess.PIPE)
-        p.wait()
-        v = p.stdout.read().decode('utf-8')
+        with subprocess.Popen(p, stdout=subprocess.PIPE) as p:
+            p.wait()
+            v = p.stdout.read().decode('utf-8')
         v = v.strip()
         self.assertEqual(v, "Nice/JJ/B-NP/O/O/nice cat/NN/I-NP/O/O/cat ././O/O/O/.")
         print("python -m pattern.en")
diff --git a/test/test_es.py b/test/test_es.py
index a85571c1..9b0fc294 100644
--- a/test/test_es.py
+++ b/test/test_es.py
@@ -232,7 +232,9 @@ def test_parse(self):
         )
         # Assert the accuracy of the Spanish tagger.
         i, n = 0, 0
-        for sentence in open(os.path.join(PATH, "corpora", "tagged-es-wikicorpus.txt")).readlines():
+        with open(os.path.join(PATH, "corpora", "tagged-es-wikicorpus.txt")) as f:
+            sentences = f.readlines()
+        for sentence in sentences:
             sentence = sentence.strip()
             s1 = [w.split("/") for w in sentence.split(" ")]
             s2 = [[w for w, pos in s1]]
@@ -255,9 +257,9 @@ def test_tag(self):
     def test_command_line(self):
         # Assert parsed output from the command-line (example from the documentation).
         p = ["python", "-m", "pattern.es", "-s", "El gato negro.", "-OTCRL"]
-        p = subprocess.Popen(p, stdout=subprocess.PIPE)
-        p.wait()
-        v = p.stdout.read().decode('utf-8')
+        with subprocess.Popen(p, stdout=subprocess.PIPE) as p:
+            p.wait()
+            v = p.stdout.read().decode('utf-8')
         v = v.strip()
         self.assertEqual(v, "El/DT/B-NP/O/O/el gato/NN/I-NP/O/O/gato negro/JJ/I-NP/O/O/negro ././O/O/O/.")
         print("python -m pattern.es")
diff --git a/test/test_fr.py b/test/test_fr.py
index 11eebd6c..33621525 100644
--- a/test/test_fr.py
+++ b/test/test_fr.py
@@ -177,9 +177,11 @@ def test_parse(self):
             "sur/IN/B-PP/B-PNP le/DT/B-NP/I-PNP tapis/NN/I-NP/I-PNP ././O/O"
         )
         # Assert the accuracy of the French tagger.
-        f = fr.penntreebank2universal
         i, n = 0, 0
-        for sentence in open(os.path.join(PATH, "corpora", "tagged-fr-wikinews.txt")).readlines():
+        with open(os.path.join(PATH, "corpora", "tagged-fr-wikinews.txt")) as f:
+            sentences = f.readlines()
+        f = fr.penntreebank2universal
+        for sentence in sentences:
             sentence = sentence.strip()
             s1 = [w.split("/") for w in sentence.split(" ")]
             s2 = [[w for w, pos in s1]]
@@ -202,9 +204,9 @@ def test_tag(self):
     def test_command_line(self):
         # Assert parsed output from the command-line (example from the documentation).
         p = ["python", "-m", "pattern.fr", "-s", "Le chat noir.", "-OTCRL"]
-        p = subprocess.Popen(p, stdout=subprocess.PIPE)
-        p.wait()
-        v = p.stdout.read().decode('utf-8')
+        with subprocess.Popen(p, stdout=subprocess.PIPE) as p:
+            p.wait()
+            v = p.stdout.read().decode('utf-8')
         v = v.strip()
         self.assertEqual(v, "Le/DT/B-NP/O/O/le chat/NN/I-NP/O/O/chat noir/JJ/I-NP/O/O/noir ././O/O/O/.")
         print("python -m pattern.fr")
diff --git a/test/test_it.py b/test/test_it.py
index acf3c0e6..8a07bb4c 100644
--- a/test/test_it.py
+++ b/test/test_it.py
@@ -240,7 +240,9 @@ def test_parse(self):
         )
         # Assert the accuracy of the Italian tagger.
         i, n = 0, 0
-        for sentence in open(os.path.join(PATH, "corpora", "tagged-it-wacky.txt")).readlines():
+        with open(os.path.join(PATH, "corpora", "tagged-it-wacky.txt")) as f:
+            sentences = f.readlines()
+        for sentence in sentences:
             sentence = sentence.strip()
             s1 = [w.split("/") for w in sentence.split(" ")]
             s2 = [[w for w, pos in s1]]
@@ -268,9 +270,9 @@ def test_tag(self):
     def test_command_line(self):
         # Assert parsed output from the command-line (example from the documentation).
         p = ["python", "-m", "pattern.it", "-s", "Il gatto nero.", "-OTCRL"]
-        p = subprocess.Popen(p, stdout=subprocess.PIPE)
-        p.wait()
-        v = p.stdout.read().decode('utf-8')
+        with subprocess.Popen(p, stdout=subprocess.PIPE) as p:
+            p.wait()
+            v = p.stdout.read().decode('utf-8')
         v = v.strip()
         self.assertEqual(v, "Il/DT/B-NP/O/O/il gatto/NN/I-NP/O/O/gatto nero/JJ/I-NP/O/O/nero ././O/O/O/.")
         print("python -m pattern.it")
diff --git a/test/test_nl.py b/test/test_nl.py
index 7976750d..8e029555 100644
--- a/test/test_nl.py
+++ b/test/test_nl.py
@@ -218,7 +218,9 @@ def test_parse(self):
         )
         # Assert the accuracy of the Dutch tagger.
         i, n = 0, 0
-        for sentence in open(os.path.join(PATH, "corpora", "tagged-nl-twnc.txt")).readlines():
+        with open(os.path.join(PATH, "corpora", "tagged-nl-twnc.txt")) as f:
+            sentences = f.readlines()
+        for sentence in sentences:
             sentence = sentence.strip()
             s1 = [w.split("/") for w in sentence.split(" ")]
             s1 = [nl.wotan2penntreebank(w, tag) for w, tag in s1]
@@ -241,9 +243,9 @@ def test_tag(self):
     def test_command_line(self):
         # Assert parsed output from the command-line (example from the documentation).
         p = ["python", "-m", "pattern.nl", "-s", "Leuke kat.", "-OTCRL"]
-        p = subprocess.Popen(p, stdout=subprocess.PIPE)
-        p.wait()
-        v = p.stdout.read().decode('utf-8')
+        with subprocess.Popen(p, stdout=subprocess.PIPE) as p:
+            p.wait()
+            v = p.stdout.read().decode('utf-8')
         v = v.strip()
         self.assertEqual(v, "Leuke/JJ/B-NP/O/O/leuk kat/NN/I-NP/O/O/kat ././O/O/O/.")
         print("python -m pattern.nl")
diff --git a/test/test_text.py b/test/test_text.py
index 954b701c..d5b5bce0 100644
--- a/test/test_text.py
+++ b/test/test_text.py
@@ -95,7 +95,7 @@ def test_model(self):
         for i in range(2):
             v.train("black", "JJ", previous=("the", "DT"), next=("cat", "NN"))
             v.train("on", "IN", previous=("sat", "VBD"), next=("the", "DT"))
-        self.assertEqual("JJ", v.classify("slack"))
+        self.assertEqual("JJ", v.classify("black"))
         self.assertEqual("JJ", v.classify("white", previous=("a", "DT"), next=("cat", "NN")))
         self.assertEqual("IN", v.classify("on", previous=("sat", "VBD")))
         self.assertEqual("IN", v.classify("on", next=("the", "")))
diff --git a/test/test_vector.py b/test/test_vector.py
index 79c29e6c..cf651ef1 100644
--- a/test/test_vector.py
+++ b/test/test_vector.py
@@ -431,7 +431,8 @@ def test_model_export(self):
                 "0,0,0.3466,0.6931,0,0,døg\n"
                 "0.6931,0,0.3466,0,0,0,døg")):
             self.model.export("test_%s.txt" % format, format=format)
-            v = open("test_%s.txt" % format, encoding="utf-8").read()
+            with open("test_%s.txt" % format, encoding="utf-8") as f:
+                v = f.read()
             v = v.replace("\r\n", "\n")
             for line in src.split("\n"):
                 self.assertTrue(line in src)
@@ -974,7 +975,7 @@ def test_slp(self):
         self._test_classifier(vector.SLP)
         # Assert the accuracy of the classifier.
         A, P, R, F, o = vector.SLP.test(self.model, folds=10, iterations=3)
-        #print(A, P, R, F, o)
+        # print(A, P, R, F, o)
         self.assertTrue(P >= 0.90)
         self.assertTrue(R >= 0.91)
         self.assertTrue(F >= 0.91)
diff --git a/test/test_web.py b/test/test_web.py
index 12587341..9f081381 100644
--- a/test/test_web.py
+++ b/test/test_web.py
@@ -555,6 +555,7 @@ def test_search_facebook(self):
     def test_search_productwiki(self):
         self._test_search_engine("ProductWiki", *self.api["ProductWiki"], **{"query": "computer"})
 
+    @unittest.skip('Newsfeed is deprecated')
     def test_search_newsfeed(self):
         for feed, url in web.feeds.items():
             self._test_search_engine("Newsfeed", url, None, web.Newsfeed, query=url, type=web.NEWS)
@@ -1080,17 +1081,17 @@ def test_link(self):
 
     def test_crawler_crawl(self):
         # Assert domain filter.
-        v = web.Crawler(links=["http://nodebox.net/"], domains=["nodebox.net"], delay=0.5)
+        v = web.Crawler(links=["http://www.nodebox.net/"], domains=["nodebox.net"], delay=0.5)
         while len(v.visited) < 4:
             v.crawl(throttle=0.1, cached=False)
         for url in v.visited:
             self.assertTrue("nodebox.net" in url)
-        self.assertTrue(len(v.history) == 2)
+        self.assertGreaterEqual(len(v.history), 2)
         print("pattern.web.Crawler.crawl()")
 
     def test_crawler_delay(self):
         # Assert delay for several crawls to a single domain.
-        v = web.Crawler(links=["http://nodebox.net/"], domains=["nodebox.net"], delay=1.2)
+        v = web.Crawler(links=["http://www.nodebox.net/"], domains=["www.nodebox.net"], delay=1.2)
         v.crawl()
         t = time.time()
         while not v.crawl(throttle=0.1, cached=False):