File: test_parse_control.py

package info (click to toggle)
pybel 0.15.5-2
  • links: PTS, VCS
  • area: main
  • in suites: forky
  • size: 16,492 kB
  • sloc: python: 29,392; javascript: 246; makefile: 226; sh: 20
file content (358 lines) | stat: -rw-r--r-- 12,939 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
# -*- coding: utf-8 -*-

import logging
import re
import unittest
from random import randint

from pybel.constants import (
    ANNOTATIONS,
    CITATION,
    CITATION_TYPE_PUBMED,
    EVIDENCE,
    IDENTIFIER,
    NAMESPACE,
)
from pybel.exceptions import (
    CitationTooLongException,
    CitationTooShortException,
    IllegalAnnotationValueWarning,
    InvalidCitationType,
    InvalidPubMedIdentifierWarning,
    MissingAnnotationKeyWarning,
    MissingAnnotationRegexWarning,
    UndefinedAnnotationWarning,
)
from pybel.language import Entity
from pybel.parser import ControlParser
from pybel.parser.parse_control import set_citation_stub
from pybel.testing.utils import n
from tests.constants import SET_CITATION_TEST, test_citation_dict

logging.getLogger("requests").setLevel(logging.WARNING)


class TestParseControl(unittest.TestCase):
    def setUp(self):
        self.annotation_to_term = {
            "Custom1": {"Custom1_A", "Custom1_B"},
            "Custom2": {"Custom2_A", "Custom2_B"},
        }

        self.annotation_to_pattern = {"CustomRegex": re.compile("[0-9]+")}

        self.parser = ControlParser(
            annotation_to_term=self.annotation_to_term,
            annotation_to_pattern=self.annotation_to_pattern,
        )


class TestParseControlUnsetStatementErrors(TestParseControl):
    def test_unset_missing_evidence(self):
        with self.assertRaises(MissingAnnotationKeyWarning):
            self.parser.parseString("UNSET Evidence")

    def test_unset_missing_citation(self):
        with self.assertRaises(MissingAnnotationKeyWarning):
            self.parser.parseString("UNSET Citation")

    def test_unset_missing_evidence_with_citation(self):
        """Tests that an evidence can't be unset without a citation"""
        s = [SET_CITATION_TEST, "UNSET Evidence"]
        with self.assertRaises(MissingAnnotationKeyWarning):
            self.parser.parse_lines(s)

    def test_unset_missing_statement_group(self):
        with self.assertRaises(MissingAnnotationKeyWarning):
            self.parser.parseString("UNSET STATEMENT_GROUP")

    def test_unset_missing_command(self):
        s = [SET_CITATION_TEST, "UNSET Custom1"]
        with self.assertRaises(MissingAnnotationKeyWarning):
            self.parser.parse_lines(s)

    def test_unset_invalid_command(self):
        s = [SET_CITATION_TEST, "UNSET MISSING"]
        with self.assertRaises(UndefinedAnnotationWarning):
            self.parser.parse_lines(s)

    def test_unset_list_compact(self):
        """Tests unsetting an annotation list, without spaces in it"""
        s = [
            SET_CITATION_TEST,
            'SET Custom1 = "Custom1_A"',
            'SET Custom2 = "Custom2_A"',
        ]
        self.parser.parse_lines(s)
        self.assertIn("Custom1", self.parser.annotations)
        self.assertIn("Custom2", self.parser.annotations)
        self.parser.parseString("UNSET {Custom1,Custom2}")
        self.assertFalse(self.parser.annotations)

    def test_unset_list_spaced(self):
        """Tests unsetting an annotation list, with spaces in it"""
        s = [
            SET_CITATION_TEST,
            'SET Custom1 = "Custom1_A"',
            'SET Custom2 = "Custom2_A"',
        ]
        self.parser.parse_lines(s)
        self.assertIn("Custom1", self.parser.annotations)
        self.assertIn("Custom2", self.parser.annotations)
        self.parser.parseString("UNSET {Custom1, Custom2}")
        self.assertFalse(self.parser.annotations)


class TestSetCitation(unittest.TestCase):
    def test_parser_double(self):
        set_citation_stub.parseString('Citation = {"PubMed","12928037"}')

    def test_parser_double_spaced(self):
        set_citation_stub.parseString('Citation = {"PubMed", "12928037"}')

    def test_parser_triple(self):
        set_citation_stub.parseString('Citation = {"PubMed Central","Trends in molecular medicine","12928037"}')

    def test_parser_triple_spaced(self):
        set_citation_stub.parseString('Citation = {"PubMed Central", "Trends in molecular medicine", "12928037"}')


class TestParseControlSetStatementErrors(TestParseControl):
    def test_invalid_citation_type(self):
        with self.assertRaises(InvalidCitationType):
            self.parser.parseString('SET Citation = {"PubMedCentral","Trends in molecular medicine","12928037"}')

    def test_invalid_pmid(self):
        with self.assertRaises(InvalidPubMedIdentifierWarning):
            self.parser.parseString('SET Citation = {"PubMed","Trends in molecular medicine","NOT VALID NUMBER"}')

    def test_invalid_pmid_short(self):
        with self.assertRaises(InvalidPubMedIdentifierWarning):
            self.parser.parseString('SET Citation = {"PubMed","NOT VALID NUMBER"}')

    def test_set_missing_statement(self):
        statements = [SET_CITATION_TEST, 'SET MissingKey = "lol"']
        with self.assertRaises(UndefinedAnnotationWarning):
            self.parser.parse_lines(statements)

    def test_custom_annotation_list_withInvalid(self):
        statements = [
            SET_CITATION_TEST,
            'SET Custom1 = {"Custom1_A","Custom1_B","Evil invalid!!!"}',
        ]

        with self.assertRaises(IllegalAnnotationValueWarning):
            self.parser.parse_lines(statements)

    def test_custom_value_failure(self):
        """Tests what happens for a valid annotation key, but an invalid value"""
        s = [SET_CITATION_TEST, 'SET Custom1 = "Custom1_C"']
        with self.assertRaises(IllegalAnnotationValueWarning):
            self.parser.parse_lines(s)

    def test_regex_failure(self):
        s = [SET_CITATION_TEST, 'SET CustomRegex = "abce13"']
        with self.assertRaises(MissingAnnotationRegexWarning):
            self.parser.parse_lines(s)


class TestParseControl2(TestParseControl):
    def test_set_statement_group(self):
        """Tests a statement group gets set properly"""
        s1 = 'SET STATEMENT_GROUP = "my group"'

        self.assertIsNone(self.parser.statement_group)

        self.parser.parseString(s1)
        self.assertEqual("my group", self.parser.statement_group, msg="problem with integration")

        s2 = "UNSET STATEMENT_GROUP"
        self.parser.parseString(s2)
        self.assertIsNone(self.parser.statement_group, msg="problem with unset")

    def test_citation_short(self):
        self.parser.parseString(SET_CITATION_TEST)
        self.assertEqual(test_citation_dict[IDENTIFIER], self.parser.citation_db_id)
        self.assertEqual(test_citation_dict[NAMESPACE], self.parser.citation_db)

        expected_annotations = {
            EVIDENCE: None,
            ANNOTATIONS: {},
            CITATION: test_citation_dict,
        }
        self.assertEqual(expected_annotations, self.parser.get_annotations())

        self.parser.parseString("UNSET Citation")
        self.assertFalse(self.parser.citation_is_set)

    def test_citation_invalid_date(self):
        s = 'SET Citation = {"PubMed","Trends in molecular medicine","12928037","01-12-1999","de Nigris"}'

        self.parser.parseString(s)
        self.assertEqual(CITATION_TYPE_PUBMED, self.parser.citation_db)
        self.assertEqual("12928037", self.parser.citation_db_id)

        expected_dict = {
            EVIDENCE: None,
            ANNOTATIONS: {},
            CITATION: {
                NAMESPACE: CITATION_TYPE_PUBMED,
                IDENTIFIER: "12928037",
            },
        }

        self.assertEqual(expected_dict, self.parser.get_annotations())

    def test_citation_with_empty_comment(self):
        s = 'SET Citation = {"PubMed","Test Name","12928037","1999-01-01","de Nigris|Lerman A|Ignarro LJ",""}'
        self.parser.parseString(s)

        self.assertEqual(CITATION_TYPE_PUBMED, self.parser.citation_db)
        self.assertEqual("12928037", self.parser.citation_db_id)

        expected_dict = {
            EVIDENCE: None,
            ANNOTATIONS: {},
            CITATION: {
                NAMESPACE: CITATION_TYPE_PUBMED,
                IDENTIFIER: "12928037",
            },
        }

        self.assertEqual(expected_dict, self.parser.get_annotations())

    def test_double(self):
        s = 'SET Citation = {"PubMed","12928037"}'
        self.parser.parseString(s)
        self.assertEqual(CITATION_TYPE_PUBMED, self.parser.citation_db)
        self.assertEqual("12928037", self.parser.citation_db_id)

    def test_double_with_space(self):
        """Same as test_double, but has a space between the comma and next entry"""
        s = 'SET Citation = {"PubMed", "12928037"}'
        self.parser.parseString(s)
        self.assertEqual(CITATION_TYPE_PUBMED, self.parser.citation_db)
        self.assertEqual("12928037", self.parser.citation_db_id)

    def test_citation_too_short(self):
        s = 'SET Citation = {"PubMed"}'
        with self.assertRaises(CitationTooShortException):
            self.parser.parseString(s)

    def test_citation_too_long(self):
        s = 'SET Citation = {"PubMed","Name","1234","1999-01-01","Nope|Noper","Nope", "nope nope"}'
        with self.assertRaises(CitationTooLongException):
            self.parser.parseString(s)

    def test_evidence(self):
        self.parser.parseString(SET_CITATION_TEST)
        s = 'SET Evidence = "For instance, during 7-ketocholesterol-induced apoptosis of U937 cells"'
        self.parser.parseString(s)

        self.assertIsNotNone(self.parser.evidence)

        expected_annotation = {
            CITATION: test_citation_dict,
            ANNOTATIONS: {},
            EVIDENCE: "For instance, during 7-ketocholesterol-induced apoptosis of U937 cells",
        }

        self.assertEqual(expected_annotation, self.parser.get_annotations())

    def test_custom_annotation(self):
        s = [SET_CITATION_TEST, 'SET Custom1 = "Custom1_A"']
        self.parser.parse_lines(s)

        expected_annotation = {
            "Custom1": [Entity(namespace="Custom1", identifier="Custom1_A")],
        }

        self.assertEqual(expected_annotation, self.parser.annotations)

    def test_custom_annotation_list(self):
        s = [SET_CITATION_TEST, 'SET Custom1 = {"Custom1_A","Custom1_B"}']
        self.parser.parse_lines(s)

        expected_annotation = {
            "Custom1": [
                Entity(namespace="Custom1", identifier="Custom1_A"),
                Entity(namespace="Custom1", identifier="Custom1_B"),
            ],
        }

        self.assertEqual(expected_annotation, self.parser.annotations)

        expected_dict = {
            ANNOTATIONS: expected_annotation,
            CITATION: test_citation_dict,
            EVIDENCE: None,
        }

        self.assertEqual(expected_dict, self.parser.get_annotations())

    def test_overwrite_evidence(self):
        s1 = 'SET Evidence = "a"'
        s2 = 'SET Evidence = "b"'

        self.parser.parseString(s1)
        self.parser.parseString(s2)

        self.assertEqual("b", self.parser.evidence)

    def test_unset_evidence(self):
        s1 = 'SET Evidence = "a"'
        s2 = "UNSET Evidence"

        self.parser.parseString(s1)
        self.parser.parseString(s2)

        self.assertEqual({}, self.parser.annotations)

    def test_unset_custom(self):
        statements = [SET_CITATION_TEST, 'SET Custom1 = "Custom1_A"', "UNSET Custom1"]

        self.parser.parse_lines(statements)

        self.assertEqual({}, self.parser.annotations)

    def test_reset_citation(self):
        s1_identifier = str(randint(0, 1e7))
        s1 = 'SET Citation = {{"PubMed","Test Reference 1","{}"}}'.format(s1_identifier)
        s2 = 'SET Evidence = "d"'

        s3_identifier = str(randint(0, 1e7))
        s3 = 'SET Citation = {{"PubMed","Test Reference 2","{}"}}'.format(s3_identifier)
        _test_evidence = n()
        s4 = 'SET Evidence = "{}"'.format(_test_evidence)
        s5 = 'SET Custom1 = "Custom1_A"'
        s6 = 'SET Custom2 = "Custom2_A"'

        statements = [s1, s2, s3, s4, s5, s6]

        self.parser.parse_lines(statements)

        self.assertEqual(_test_evidence, self.parser.evidence)
        self.assertEqual(CITATION_TYPE_PUBMED, self.parser.citation_db)
        self.assertEqual(s3_identifier, self.parser.citation_db_id)

        self.parser.parseString("UNSET {Custom1,Evidence}")
        self.assertNotIn("Custom1", self.parser.annotations)
        self.assertIsNone(self.parser.evidence)
        self.assertIn("Custom2", self.parser.annotations)
        self.assertTrue(self.parser.citation_is_set)

        self.parser.parseString("UNSET ALL")
        self.assertEqual(0, len(self.parser.annotations))
        self.assertFalse(self.parser.citation_is_set)

    def test_set_regex(self):
        v = str(randint(0, 1e5))
        s = [SET_CITATION_TEST, f'SET CustomRegex = "{v}"']
        self.parser.parse_lines(s)

        self.assertEqual(
            [
                Entity(namespace="CustomRegex", identifier=v),
            ],
            self.parser.annotations["CustomRegex"],
        )