1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67
|
"""Test utilities."""
import unittest
from bioregistry.utils import backfill, deduplicate
class TestDeduplicate(unittest.TestCase):
"""Test deduplication workflow."""
def test_backfill(self):
"""Test record backfill."""
records = [
{"pubmed": "pmid_1"},
{"arxiv": "arxiv_1", "doi": "doi_1"},
{"doi": "doi_1", "pubmed": "pmid_1", "title": "yup"},
{"pubmed": "pmid_1"},
]
res = backfill(records, keys=["pubmed", "doi", "pmc", "arxiv"])
self.assertEqual(
[
{
"arxiv": "arxiv_1",
"doi": "doi_1",
"pubmed": "pmid_1",
},
{
"arxiv": "arxiv_1",
"doi": "doi_1",
"pubmed": "pmid_1",
},
{
"arxiv": "arxiv_1",
"doi": "doi_1",
"pubmed": "pmid_1",
"title": "yup",
},
{
"arxiv": "arxiv_1",
"doi": "doi_1",
"pubmed": "pmid_1",
},
],
res,
)
def test_deduplicate(self):
"""Test record deduplication."""
records = [
{"arxiv": "arxiv_1", "doi": "doi_1"},
{"doi": "doi_1", "pubmed": "pmid_1", "title": "yup"},
{"pubmed": "pmid_1", "pmc": "pmc_1"},
{"pubmed": "pmid_1"},
]
res = deduplicate(records, keys=["pubmed", "doi", "pmc", "arxiv"])
self.assertEqual(
[
{
"arxiv": "arxiv_1",
"doi": "doi_1",
"pubmed": "pmid_1",
"title": "yup",
"pmc": "pmc_1",
},
],
res,
)
|