1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97
|
# ----------------------------------------------------------------------------
# Copyright (c) 2016-2023, QIIME 2 development team.
#
# Distributed under the terms of the Modified BSD License.
#
# The full license is in the file LICENSE, distributed with this software.
# ----------------------------------------------------------------------------
import unittest
import pandas as pd
import pandas.testing as pdt
import numpy as np
from skbio.metadata.missing import series_encode_missing, series_extract_missing
class RoundTripMixin:
def check_roundtrip(self, real_value, dtype):
notna_exp = [real_value]
series = pd.Series(notna_exp + self.missing_terms)
encoded = series_encode_missing(series, self.enum)
missing = series_extract_missing(encoded)
self.assertEqual(encoded.dtype, dtype)
# the non-null side of the series
self.assertEqual(list(encoded[encoded.notna()]), notna_exp)
# the null end (but in the original vocabulary)
pdt.assert_series_equal(missing, series[1:].astype(object))
def test_roundtrip_float(self):
self.check_roundtrip(0.05, float)
def test_roundtrip_string(self):
self.check_roundtrip('hello', object)
def test_roundtrip_int(self):
self.check_roundtrip(42, float)
def test_roundtrip_bool(self):
self.check_roundtrip(True, object)
def test_roundtrip_all_missing_object(self):
expected = [None, float('nan')] + self.missing_terms
series = pd.Series(expected, dtype=object)
encoded = series_encode_missing(series, self.enum)
missing = series_extract_missing(encoded)
self.assertEqual(encoded.dtype, object)
pdt.assert_series_equal(missing, series.astype(object))
class TestISNDC(RoundTripMixin, unittest.TestCase):
def setUp(self):
self.enum = 'INSDC:missing'
self.missing_terms = ['not applicable', 'missing', 'not collected',
'not provided', 'restricted access']
class TestOmitted(RoundTripMixin, unittest.TestCase):
def setUp(self):
self.enum = 'blank'
self.missing_terms = [None, float('nan')]
# test_roundtrip_all_missing_float is not possible with other schemes
def test_roundtrip_all_missing_float(self):
expected = [None, float('nan')] + self.missing_terms
series = pd.Series(expected, dtype=float)
encoded = series_encode_missing(series, self.enum)
missing = series_extract_missing(encoded)
self.assertEqual(encoded.dtype, float)
pdt.assert_series_equal(missing, series.astype(object))
class TestError(RoundTripMixin, unittest.TestCase):
def setUp(self):
self.enum = 'no-missing'
self.missing_terms = []
# no missing values, so bool and int are not object and float
def test_roundtrip_bool(self):
self.check_roundtrip(True, bool)
def test_roundtrip_int(self):
self.check_roundtrip(42, np.int64)
def test_roundtrip_all_missing_object(self):
with self.assertRaisesRegex(ValueError, 'Missing values.*name=None'):
super().test_roundtrip_all_missing_object()
if __name__ == '__main__':
unittest.main()
|