File: test_grammar.py

package info (click to toggle)
python-hypothesis 6.138.0-1
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 15,272 kB
  • sloc: python: 62,853; ruby: 1,107; sh: 253; makefile: 41; javascript: 6
file content (168 lines) | stat: -rw-r--r-- 5,199 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
# This file is part of Hypothesis, which may be found at
# https://github.com/HypothesisWorks/hypothesis/
#
# Copyright the Hypothesis Authors.
# Individual contributors are listed in AUTHORS.rst and the git log.
#
# This Source Code Form is subject to the terms of the Mozilla Public License,
# v. 2.0. If a copy of the MPL was not distributed with this file, You can
# obtain one at https://mozilla.org/MPL/2.0/.

import json

import pytest
from lark.lark import Lark

from hypothesis import given
from hypothesis.errors import InvalidArgument
from hypothesis.extra.lark import from_lark
from hypothesis.strategies import characters, data, just

from tests.common.debug import check_can_generate_examples, find_any

# Adapted from the official Lark tutorial, with modifications to ensure
# that the generated JSON is valid.  i.e. no numbers starting with ".",
# \f is not ignorable whitespace, and restricted strings only.  Source:
# https://github.com/lark-parser/lark/blob/master/docs/json_tutorial.md
EBNF_GRAMMAR = r"""
    value: dict
         | list
         | STRING
         | NUMBER
         | "true"  -> true
         | "false" -> false
         | "null"  -> null
    list : "[" [value ("," value)*] "]"
    dict : "{" [STRING ":" value ("," STRING ":" value)*] "}"

    STRING : /"[a-z]*"/
    NUMBER : /-?[1-9][0-9]*(\.[0-9]+)?([eE][+-]?[0-9]+)?/

    WS : /[ \t\r\n]+/
    %ignore WS
"""

LIST_GRAMMAR = r"""
list : "[" [NUMBER ("," NUMBER)*] "]"
NUMBER: /[0-9]|[1-9][0-9]*/
"""


@given(from_lark(Lark(EBNF_GRAMMAR, start="value")))
def test_generates_valid_json(string):
    json.loads(string)


@pytest.mark.parametrize(
    "start, type_",
    [
        ("dict", dict),
        ("list", list),
        ("STRING", str),
        ("NUMBER", (int, float)),
        ("TRUE", bool),
        ("FALSE", bool),
        ("NULL", type(None)),
    ],
)
@given(data=data())
def test_can_specify_start_rule(data, start, type_):
    string = data.draw(from_lark(Lark(EBNF_GRAMMAR, start="value"), start=start))
    value = json.loads(string)
    assert isinstance(value, type_)


def test_can_generate_ignored_tokens():
    list_grammar = r"""
    list : "[" [STRING ("," STRING)*] "]"
    STRING : /"[a-z]*"/
    WS : /[ \t\r\n]+/
    %ignore WS
    """
    strategy = from_lark(Lark(list_grammar, start="list"))
    # A JSON list of strings in canonical form which does not round-trip,
    # must contain ignorable whitespace in the initial string.
    find_any(strategy, lambda s: "\t" in s)


def test_generation_without_whitespace():
    find_any(from_lark(Lark(LIST_GRAMMAR, start="list")), lambda g: " " not in g)


def test_cannot_convert_EBNF_to_strategy_directly():
    with pytest.raises(InvalidArgument):
        # Not a Lark object
        check_can_generate_examples(from_lark(EBNF_GRAMMAR))
    with pytest.raises(TypeError):
        # Not even the right number of arguments
        check_can_generate_examples(from_lark(EBNF_GRAMMAR, start="value"))
    with pytest.raises(InvalidArgument):
        # Wrong type for explicit_strategies
        check_can_generate_examples(
            from_lark(Lark(LIST_GRAMMAR, start="list"), explicit=[])
        )


def test_required_undefined_terminals_require_explicit_strategies():
    elem_grammar = r"""
    list : "[" ELEMENT ("," ELEMENT)* "]"
    %declare ELEMENT
    """
    with pytest.raises(InvalidArgument, match=r"%declare"):
        check_can_generate_examples(from_lark(Lark(elem_grammar, start="list")))
    strategy = {"ELEMENT": just("200")}
    check_can_generate_examples(
        from_lark(Lark(elem_grammar, start="list"), explicit=strategy)
    )


def test_cannot_use_explicit_strategies_for_unknown_terminals():
    with pytest.raises(InvalidArgument):
        check_can_generate_examples(
            from_lark(
                Lark(LIST_GRAMMAR, start="list"), explicit={"unused_name": just("")}
            )
        )


def test_non_string_explicit_strategies_are_invalid():
    with pytest.raises(InvalidArgument):
        check_can_generate_examples(
            from_lark(Lark(LIST_GRAMMAR, start="list"), explicit={"NUMBER": just(0)})
        )


@given(
    string=from_lark(Lark(LIST_GRAMMAR, start="list"), explicit={"NUMBER": just("0")})
)
def test_can_override_defined_terminal(string):
    assert sum(json.loads(string)) == 0


@given(string=from_lark(Lark(LIST_GRAMMAR, start="list"), alphabet="[0,]"))
def test_can_generate_from_limited_alphabet(string):
    assert sum(json.loads(string)) == 0


@given(string=from_lark(Lark(LIST_GRAMMAR, start="list"), alphabet="[9]"))
def test_can_generate_from_limited_alphabet_no_comma(string):
    assert len(json.loads(string)) <= 1


@given(
    string=from_lark(
        Lark(EBNF_GRAMMAR, start="value"),
        alphabet=characters(codec="ascii", exclude_characters=","),
    )
)
def test_can_generate_from_limited_alphabet_no_comma_json(string):
    assert "," not in string


def test_error_if_alphabet_bans_all_start_rules():
    with pytest.raises(
        InvalidArgument, match=r"No start rule .+ is allowed by alphabet="
    ):
        check_can_generate_examples(
            from_lark(Lark(LIST_GRAMMAR, start="list"), alphabet="abc")
        )