1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59
|
--- a/loompy/loom_validator.py
+++ b/loompy/loom_validator.py
@@ -232,7 +232,7 @@
if self.version == "3.0.0":
expected_dtype = np.object_
else:
- expected_dtype = np.string_
+ expected_dtype = np.bytes_
delay_print("Row attributes:")
if self._check("row_attrs" in file, "'row_attrs' group is missing"):
for ra in file["row_attrs"]:
--- a/loompy/normalize.py
+++ b/loompy/normalize.py
@@ -7,20 +7,19 @@
def normalize_attr_strings(a: np.ndarray) -> np.ndarray:
"""
- Take an np.ndarray of all kinds of string-like elements, and return an array of ascii (np.string_) objects
+ Take an np.ndarray of all kinds of string-like elements, and return an array of ascii (np.bytes_) objects
"""
if np.issubdtype(a.dtype, np.object_):
- # if np.all([type(x) is str for x in a]) or np.all([type(x) is np.str_ for x in a]) or np.all([type(x) is np.unicode_ for x in a]):
- if np.all([(type(x) is str or type(x) is np.str_ or type(x) is np.unicode_) for x in a]):
+ if np.all([(type(x) is str or type(x) is np.str_) for x in a]):
return np.array([x.encode('ascii', 'xmlcharrefreplace') for x in a])
- elif np.all([type(x) is np.string_ for x in a]) or np.all([type(x) is np.bytes_ for x in a]):
- return a.astype("string_")
+ elif np.all([type(x) is np.bytes_ for x in a]):
+ return a.astype("bytes_")
else:
logging.debug(f"Attribute contains mixed object types ({np.unique([str(type(x)) for x in a])}); casting all to string")
- return np.array([str(x) for x in a], dtype="string_")
- elif np.issubdtype(a.dtype, np.string_) or np.issubdtype(a.dtype, np.object_):
+ return np.array([str(x) for x in a], dtype="bytes_")
+ elif np.issubdtype(a.dtype, np.bytes_) or np.issubdtype(a.dtype, np.object_):
return a
- elif np.issubdtype(a.dtype, np.str_) or np.issubdtype(a.dtype, np.unicode_):
+ elif np.issubdtype(a.dtype, np.str_):
return np.array([x.encode('ascii', 'xmlcharrefreplace') for x in a])
else:
raise ValueError("String values must be object, ascii or unicode.")
@@ -88,7 +87,7 @@
scalar = True
a = np.array([a])
result: np.ndarray = None # This second clause takes care of attributes stored as variable-length ascii, which can be generated by loomR or Seurat
- if np.issubdtype(a.dtype, np.string_) or np.issubdtype(a.dtype, np.object_):
+ if np.issubdtype(a.dtype, np.bytes_) or np.issubdtype(a.dtype, np.object_):
# First ensure that what we load is valid ascii (i.e. ignore anything outside 7-bit range)
if hasattr(a, "decode"): # This takes care of Loom files that store strings as UTF8, which comes in as str and doesn't have a decode method
temp = np.array([x.decode('ascii', 'ignore') for x in a])
@@ -100,7 +99,7 @@
except: # Dirty hack to handle UTF-8 non-break-space in scalar strings. TODO: Rewrite this whole method completely!
if type(a[0]) == np.bytes_:
result = [ a[0].replace(b'\xc2\xa0', b'') ]
- elif np.issubdtype(a.dtype, np.str_) or np.issubdtype(a.dtype, np.unicode_):
+ elif np.issubdtype(a.dtype, np.str_):
result = np.array(a.astype(str), dtype=object)
else:
result = a
|