File: test_identifiers_org.py

package info (click to toggle)
python-bioregistry 0.11.12-3
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 23,244 kB
  • sloc: python: 16,082; makefile: 10
file content (168 lines) | stat: -rw-r--r-- 6,900 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
# -*- coding: utf-8 -*-

"""Tests for identifiers.org."""

import unittest
from typing import Mapping

import requests

import bioregistry
from bioregistry import (
    Resource,
    get_identifiers_org_curie,
    get_identifiers_org_iri,
    get_resource,
    manager,
)
from bioregistry.constants import IDOT_BROKEN, MIRIAM_BLACKLIST
from bioregistry.version import VERSION


class TestIdentifiersOrg(unittest.TestCase):
    """Tests for identifiers.org."""

    def setUp(self) -> None:
        """Prepare a session that has a user agent."""
        self.session = requests.Session()
        self.session.headers = {
            "User-Agent": f"bioregistry/{VERSION}",
        }
        self.entries: Mapping[str, Resource] = {
            prefix: entry
            for prefix, entry in bioregistry.read_registry().items()
            if entry.get_miriam_prefix()
        }

    def test_get_prefix(self):
        """Test getting identifiers.org prefixes."""
        for prefix, miriam_prefix in [
            ("ncbitaxon", "taxonomy"),
            ("eccode", "ec-code"),
        ]:
            with self.subTest(prefix=prefix):
                self.assertEqual(miriam_prefix, bioregistry.get_identifiers_org_prefix(prefix))

        for prefix in ["MONDO"]:
            self.assertIsNone(bioregistry.get_identifiers_org_prefix(prefix))

    def test_standardize_identifier(self):
        """Test that standardization makes patterns valid."""
        for prefix, entry in self.entries.items():
            if prefix in MIRIAM_BLACKLIST:
                continue
            example = entry.get_example()
            self.assertIsNotNone(example)
            pattern = entry.miriam.get("pattern")
            self.assertIsNotNone(pattern)
            with self.subTest(prefix=prefix, example=example, pattern=pattern):
                standardized_example = entry.miriam_standardize_identifier(example)
                self.assertIsNotNone(standardized_example)
                self.assertRegex(standardized_example, pattern)

    def test_curie(self):
        """Test CURIEs explicitly."""
        for prefix, identifier, expected in [
            # Standard
            ("pdb", "2gc4", "pdb:2gc4"),
            # Has namespace embedded in lui for pattern
            ("go", "0000001", "GO:0000001"),
            ("ark", "/12345/fk1234", "ark:/12345/fk1234"),
            # Require banana peels
            ("cellosaurus", "0001", "cellosaurus:CVCL_0001"),
            ("biomodels.kisao", "0000057", "biomodels.kisao:KISAO_0000057"),
            ("geogeo", "000000001", "geogeo:GEO_000000001"),
            ("geogeo", "000000001", "geogeo:GEO_000000001"),
            ("gramene.taxonomy", "013681", "gramene.taxonomy:GR_tax:013681"),
        ]:
            with self.subTest(prefix=prefix, identifier=identifier):
                self.assertEqual(expected, manager.get_miriam_curie(prefix, identifier))

    def test_url_banana(self):
        """Test that entries curated with a new banana are resolved properly."""
        for prefix, entry in self.entries.items():
            banana = entry.get_banana()
            if banana is None:
                continue
            if prefix in IDOT_BROKEN:
                continue  # identifiers.org is broken for these prefixes
            example = bioregistry.get_example(prefix)
            self.assertIsNotNone(example)
            with self.subTest(prefix=prefix, banana=banana, peel=entry.get_banana_peel()):
                self.assert_url(prefix, example)

    def assert_url(self, prefix: str, identifier: str):
        """Assert the URL resolves."""
        url = bioregistry.get_identifiers_org_iri(prefix, identifier)
        self.assertIsNotNone(url)
        res = self.session.get(url, allow_redirects=False)
        self.assertEqual(302, res.status_code, msg=f"failed with URL: {url}")

    @unittest.skip
    def test_url_auto(self):
        """Test generating and resolving Identifiers.org URIs.

        .. warning::

            This test takes up to 5 minutes since it makes a lot of web requests, and
            is therefore skipped by default.
        """
        for prefix, entry in self.entries.items():
            miriam_prefix = entry.get_identifiers_org_prefix()
            if miriam_prefix is None or prefix in IDOT_BROKEN:
                continue
            identifier = entry.get_example()
            with self.subTest(prefix=prefix, identifier=identifier):
                self.assert_url(prefix, identifier)

    def test_url(self):
        """Test formatting URLs."""
        for prefix, identifier, expected, _reason in [
            ("efo", "0000400", "efo:0000400", "test simple concatenation"),
            ("chebi", "CHEBI:1234", "CHEBI:1234", "test redundant namespace (standard)"),
            ("chebi", "1234", "CHEBI:1234", "test exclusion of redundant namespace (standard)"),
            (
                "mzspec",
                "PXD002255:ES_XP_Ubi_97H_HCD_349:scan:9617:LAEIYVNSSFYK/2",
                "mzspec:PXD002255:ES_XP_Ubi_97H_HCD_349:scan:9617:LAEIYVNSSFYK/2",
                "test simple concatenation with false banana",
            ),
            (
                "mzspec",
                "mzspec:PXD002255:ES_XP_Ubi_97H_HCD_349:scan:9617:LAEIYVNSSFYK/2",
                "mzspec:PXD002255:ES_XP_Ubi_97H_HCD_349:scan:9617:LAEIYVNSSFYK/2",
                "test simple concatenation (redundant) with false banana",
            ),
        ]:
            with self.subTest(p=prefix, i=identifier):
                curie = get_identifiers_org_curie(prefix, identifier)
                self.assertEqual(expected, curie, msg="wrong CURIE")

                url = get_identifiers_org_iri(prefix, identifier)
                self.assertEqual(f"https://identifiers.org/{curie}", url, msg="wrong URL")

                self.assert_url(prefix, identifier)

    def test_miriam_uri(self):
        """Test URI generation."""
        self.assertEqual(
            "https://identifiers.org/taxonomy:", get_resource("ncbitaxon").get_miriam_uri_prefix()
        )
        self.assertEqual("https://identifiers.org/GO:", get_resource("go").get_miriam_uri_prefix())
        self.assertEqual(
            "https://identifiers.org/doid/DOID:",
            get_resource("doid").get_miriam_uri_prefix(legacy_banana=True),
        )
        self.assertEqual(
            "https://identifiers.org/vario/VariO:",
            get_resource("vario").get_miriam_uri_prefix(legacy_banana=True),
        )
        self.assertEqual(
            "https://identifiers.org/cellosaurus/CVCL_",
            get_resource("cellosaurus").get_miriam_uri_prefix(legacy_banana=True),
        )
        self.assertEqual(
            "https://identifiers.org/DOID/",
            get_resource("doid").get_miriam_uri_prefix(legacy_delimiter=True),
        )
        self.assertIsNone(get_resource("sty").get_miriam_uri_prefix())