From 54c3694f81267e9e97dc9526f1893db1f7ff25e7 Mon Sep 17 00:00:00 2001
From: James Powell <james@dutc.io>
Date: Mon, 4 Jul 2022 02:00:16 -0400
Subject: [PATCH] ensure unit tests succeed; small fixes

---
 pattern/vector/__init__.py          | 32 +++++++++----------
 pattern/vector/svm/liblinear.py     |  4 +--
 pattern/vector/svm/liblinearutil.py | 49 +++++++++++++++--------------
 pattern/vector/svm/libsvm.py        |  4 +--
 pattern/vector/svm/libsvmutil.py    | 25 ++++++++-------
 5 files changed, 58 insertions(+), 56 deletions(-)

diff --git a/pattern/vector/__init__.py b/pattern/vector/__init__.py
index 76a368ab..e0f8a1d6 100644
--- a/pattern/vector/__init__.py
+++ b/pattern/vector/__init__.py
@@ -280,9 +280,9 @@ def pop(self, i):
 stopwords = _stopwords = {}
 for f in glob.glob(os.path.join(MODULE, "stopwords-*.txt")):
     language = os.path.basename(f)[-6:-4] # stopwords-[en].txt
-    w = codecs.open(f, encoding="utf-8")
-    w = (w.strip() for w in w.read().split(","))
-    stopwords[language] = dict.fromkeys(w, True)
+    with codecs.open(f, encoding="utf-8") as w:
+        w = (w.strip() for w in w.read().split(","))
+        stopwords[language] = dict.fromkeys(w, True)
 
 # The following English words could also be meaningful nouns:
 
@@ -514,7 +514,8 @@ def load(cls, path):
             The given text file must be generated with Document.save().
         """
         # Open unicode file.
-        s = open(path, "rb").read()
+        with open(path, "rb") as f:
+            s = f.read()
         s = s.lstrip(codecs.BOM_UTF8)
         s = decode_utf8(s)
         a = {}
@@ -571,10 +572,9 @@ def save(self, path):
         s = "\n".join(s)
         s = encode_utf8(s)
         # Save unicode file.
-        f = open(path, "wb")
-        f.write(codecs.BOM_UTF8)
-        f.write(s)
-        f.close()
+        with open(path, "wb") as f:
+            f.write(codecs.BOM_UTF8)
+            f.write(s)
 
     def _get_model(self):
         return self._model
@@ -1068,9 +1068,8 @@ def load(cls, path):
         # Deserialize Model.classifier.
         if model.classifier:
             p = path + ".tmp"
-            f = open(p, "wb")
-            f.write(model.classifier)
-            f.close()
+            with open(p, "wb") as f:
+                f.write(model.classifier)
             model._classifier = Classifier.load(p)
             os.remove(p)
         return model
@@ -1095,7 +1094,8 @@ def save(self, path, update=False, final=False):
         if self._classifier:
             p = path + ".tmp"
             self._classifier.save(p, final)
-            self._classifier = open(p, "rb").read()
+            with open(p, "rb") as f:
+                self._classifier = f.read()
             os.remove(p)
         f = gzip.GzipFile(path, "wb")
         f.write(pickle.dumps(self, 1))  # 1 = binary
@@ -1130,9 +1130,8 @@ def export(self, path, format=ORANGE, **kwargs):
                 v = "%s,%s" % (v, document.type or "")
                 s.append(v)
         s = "\n".join(s)
-        f = open(path, "w", encoding="utf-8")
-        f.write(decode_utf8(s))
-        f.close()
+        with open(path, "w", encoding="utf-8") as f:
+            f.write(decode_utf8(s))
 
     def _update(self):
         # Ensures that all document vectors are recalculated
@@ -3633,7 +3632,8 @@ def save(self, path, final=False):
         # Unlink LIBSVM/LIBLINEAR binaries for cPickle.
         svm, model = self._svm, self._model
         self._svm = None
-        self._model = (open(path, "rb").read(),) + model[1:]
+        with open(path, "rb") as f:
+            self._model = (f.read(),) + model[1:]
         Classifier.save(self, path, final)
         self._svm = svm
         self._model = model
diff --git a/pattern/vector/svm/liblinear.py b/pattern/vector/svm/liblinear.py
index 00ee4d24..e9b0235c 100644
--- a/pattern/vector/svm/liblinear.py
+++ b/pattern/vector/svm/liblinear.py
@@ -29,9 +29,9 @@
 try:
 	dirname = path.dirname(path.abspath(__file__))
 	if sys.platform == 'win32':
-		liblinear = CDLL(path.join(dirname, 'windows\liblinear-2.20\liblinear.dll'))
+		liblinear = CDLL(path.join(dirname, r'windows\liblinear-2.20\liblinear.dll'))
 	else:
-		liblinear = CDLL(path.join(dirname, 'macos/liblinear-2.20/liblinear.so.3'))
+		liblinear = CDLL(path.join(dirname, r'macos/liblinear-2.20/liblinear.so.3'))
 except:
 # For unix the prefix 'lib' is not considered.
 	if find_library('linear'):
diff --git a/pattern/vector/svm/liblinearutil.py b/pattern/vector/svm/liblinearutil.py
index e705460b..0d5c2c40 100644
--- a/pattern/vector/svm/liblinearutil.py
+++ b/pattern/vector/svm/liblinearutil.py
@@ -33,30 +33,31 @@ def svm_read_problem(data_file_name, return_scipy=False):
     prob_x = []
     row_ptr = [0]
     col_idx = []
-    for i, line in enumerate(open(data_file_name)):
-        line = line.split(None, 1)
-        # In case an instance with all zero features
-        if len(line) == 1:
-            line += ['']
-        label, features = line
-        prob_y += [float(label)]
-        if scipy is not None and return_scipy:
-            nz = 0
-            for e in features.split():
-                ind, val = e.split(":")
-                val = float(val)
-                if val != 0:
-                    col_idx += [int(ind) - 1]
-                    prob_x += [val]
-                    nz += 1
-            row_ptr += [row_ptr[-1] + nz]
-        else:
-            xi = {}
-            for e in features.split():
-                ind, val = e.split(":")
-                if val != 0:
-                    xi[int(ind)] = float(val)
-            prob_x += [xi]
+    with open(data_file_name) as f:
+        for i, line in enumerate(f):
+            line = line.split(None, 1)
+            # In case an instance with all zero features
+            if len(line) == 1:
+                line += ['']
+            label, features = line
+            prob_y += [float(label)]
+            if scipy is not None and return_scipy:
+                nz = 0
+                for e in features.split():
+                    ind, val = e.split(":")
+                    val = float(val)
+                    if val != 0:
+                        col_idx += [int(ind) - 1]
+                        prob_x += [val]
+                        nz += 1
+                row_ptr += [row_ptr[-1] + nz]
+            else:
+                xi = {}
+                for e in features.split():
+                    ind, val = e.split(":")
+                    if val != 0:
+                        xi[int(ind)] = float(val)
+                prob_x += [xi]
     if scipy is not None and return_scipy:
         prob_y = scipy.array(prob_y)
         prob_x = scipy.array(prob_x)
diff --git a/pattern/vector/svm/libsvm.py b/pattern/vector/svm/libsvm.py
index ac992d2b..50678aa5 100644
--- a/pattern/vector/svm/libsvm.py
+++ b/pattern/vector/svm/libsvm.py
@@ -22,9 +22,9 @@
 try:
 	dirname = path.dirname(path.abspath(__file__))
 	if sys.platform == 'win32':
-		libsvm = CDLL(path.join(dirname, 'windows\libsvm-3.22\libsvm.dll'))
+		libsvm = CDLL(path.join(dirname, r'windows\libsvm-3.22\libsvm.dll'))
 	else:
-		libsvm = CDLL(path.join(dirname, 'macos/libsvm-3.22/libsvm.so.2'))
+		libsvm = CDLL(path.join(dirname, r'macos/libsvm-3.22/libsvm.so.2'))
 
 except:
 # For unix the prefix 'lib' is not considered.
diff --git a/pattern/vector/svm/libsvmutil.py b/pattern/vector/svm/libsvmutil.py
index 29110b00..1e3a3a5a 100644
--- a/pattern/vector/svm/libsvmutil.py
+++ b/pattern/vector/svm/libsvmutil.py
@@ -29,18 +29,19 @@ def svm_read_problem(data_file_name):
     """
     prob_y = []
     prob_x = []
-    for line in open(data_file_name):
-        line = line.split(None, 1)
-        # In case an instance with all zero features
-        if len(line) == 1:
-            line += ['']
-        label, features = line
-        xi = {}
-        for e in features.split():
-            ind, val = e.split(":")
-            xi[int(ind)] = float(val)
-        prob_y += [float(label)]
-        prob_x += [xi]
+    with open(data_file_name) as f:
+        for line in f:
+            line = line.split(None, 1)
+            # In case an instance with all zero features
+            if len(line) == 1:
+                line += ['']
+            label, features = line
+            xi = {}
+            for e in features.split():
+                ind, val = e.split(":")
+                xi[int(ind)] = float(val)
+            prob_y += [float(label)]
+            prob_x += [xi]
     return (prob_y, prob_x)