File: numpy_2x.patch

package info (click to toggle)
python-loompy 3.0.7%2Bdfsg-4
links: PTS, VCS
area: main
in suites: forky, sid, trixie
size: 3,316 kB
sloc: python: 3,152; sh: 63; makefile: 16
file content (59 lines) | stat: -rw-r--r-- 3,099 bytes
--- a/loompy/loom_validator.py
+++ b/loompy/loom_validator.py
@@ -232,7 +232,7 @@
 		if self.version == "3.0.0":
 			expected_dtype = np.object_
 		else:
-			expected_dtype = np.string_
+			expected_dtype = np.bytes_
 		delay_print("Row attributes:")
 		if self._check("row_attrs" in file, "'row_attrs' group is missing"):
 			for ra in file["row_attrs"]:
--- a/loompy/normalize.py
+++ b/loompy/normalize.py
@@ -7,20 +7,19 @@
 
 def normalize_attr_strings(a: np.ndarray) -> np.ndarray:
 	"""
-	Take an np.ndarray of all kinds of string-like elements, and return an array of ascii (np.string_) objects
+	Take an np.ndarray of all kinds of string-like elements, and return an array of ascii (np.bytes_) objects
 	"""
 	if np.issubdtype(a.dtype, np.object_):
-		# if np.all([type(x) is str for x in a]) or np.all([type(x) is np.str_ for x in a]) or np.all([type(x) is np.unicode_ for x in a]):
-		if np.all([(type(x) is str or type(x) is np.str_ or type(x) is np.unicode_) for x in a]):
+		if np.all([(type(x) is str or type(x) is np.str_) for x in a]):
 			return np.array([x.encode('ascii', 'xmlcharrefreplace') for x in a])
-		elif np.all([type(x) is np.string_ for x in a]) or np.all([type(x) is np.bytes_ for x in a]):
-			return a.astype("string_")
+		elif np.all([type(x) is np.bytes_ for x in a]):
+			return a.astype("bytes_")
 		else:
 			logging.debug(f"Attribute contains mixed object types ({np.unique([str(type(x)) for x in a])}); casting all to string")
-			return np.array([str(x) for x in a], dtype="string_")
-	elif np.issubdtype(a.dtype, np.string_) or np.issubdtype(a.dtype, np.object_):
+			return np.array([str(x) for x in a], dtype="bytes_")
+	elif np.issubdtype(a.dtype, np.bytes_) or np.issubdtype(a.dtype, np.object_):
 		return a
-	elif np.issubdtype(a.dtype, np.str_) or np.issubdtype(a.dtype, np.unicode_):
+	elif np.issubdtype(a.dtype, np.str_):
 		return np.array([x.encode('ascii', 'xmlcharrefreplace') for x in a])
 	else:
 		raise ValueError("String values must be object, ascii or unicode.")
@@ -88,7 +87,7 @@
 		scalar = True
 		a = np.array([a])
 	result: np.ndarray = None   # This second clause takes care of attributes stored as variable-length ascii, which can be generated by loomR or Seurat
-	if np.issubdtype(a.dtype, np.string_) or np.issubdtype(a.dtype, np.object_):
+	if np.issubdtype(a.dtype, np.bytes_) or np.issubdtype(a.dtype, np.object_):
 		# First ensure that what we load is valid ascii (i.e. ignore anything outside 7-bit range)
 		if hasattr(a, "decode"):  # This takes care of Loom files that store strings as UTF8, which comes in as str and doesn't have a decode method
 			temp = np.array([x.decode('ascii', 'ignore') for x in a])
@@ -100,7 +99,7 @@
 		except:  # Dirty hack to handle UTF-8 non-break-space in scalar strings. TODO: Rewrite this whole method completely!
 			if type(a[0]) == np.bytes_:
 				result = [ a[0].replace(b'\xc2\xa0', b'') ]
-	elif np.issubdtype(a.dtype, np.str_) or np.issubdtype(a.dtype, np.unicode_):
+	elif np.issubdtype(a.dtype, np.str_):
 		result = np.array(a.astype(str), dtype=object)
 	else:
 		result = a