1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168
|
# This file is part of Hypothesis, which may be found at
# https://github.com/HypothesisWorks/hypothesis/
#
# Copyright the Hypothesis Authors.
# Individual contributors are listed in AUTHORS.rst and the git log.
#
# This Source Code Form is subject to the terms of the Mozilla Public License,
# v. 2.0. If a copy of the MPL was not distributed with this file, You can
# obtain one at https://mozilla.org/MPL/2.0/.
import json
import pytest
from lark.lark import Lark
from hypothesis import given
from hypothesis.errors import InvalidArgument
from hypothesis.extra.lark import from_lark
from hypothesis.strategies import characters, data, just
from tests.common.debug import check_can_generate_examples, find_any
# Adapted from the official Lark tutorial, with modifications to ensure
# that the generated JSON is valid. i.e. no numbers starting with ".",
# \f is not ignorable whitespace, and restricted strings only. Source:
# https://github.com/lark-parser/lark/blob/master/docs/json_tutorial.md
EBNF_GRAMMAR = r"""
value: dict
| list
| STRING
| NUMBER
| "true" -> true
| "false" -> false
| "null" -> null
list : "[" [value ("," value)*] "]"
dict : "{" [STRING ":" value ("," STRING ":" value)*] "}"
STRING : /"[a-z]*"/
NUMBER : /-?[1-9][0-9]*(\.[0-9]+)?([eE][+-]?[0-9]+)?/
WS : /[ \t\r\n]+/
%ignore WS
"""
LIST_GRAMMAR = r"""
list : "[" [NUMBER ("," NUMBER)*] "]"
NUMBER: /[0-9]|[1-9][0-9]*/
"""
@given(from_lark(Lark(EBNF_GRAMMAR, start="value")))
def test_generates_valid_json(string):
json.loads(string)
@pytest.mark.parametrize(
"start, type_",
[
("dict", dict),
("list", list),
("STRING", str),
("NUMBER", (int, float)),
("TRUE", bool),
("FALSE", bool),
("NULL", type(None)),
],
)
@given(data=data())
def test_can_specify_start_rule(data, start, type_):
string = data.draw(from_lark(Lark(EBNF_GRAMMAR, start="value"), start=start))
value = json.loads(string)
assert isinstance(value, type_)
def test_can_generate_ignored_tokens():
list_grammar = r"""
list : "[" [STRING ("," STRING)*] "]"
STRING : /"[a-z]*"/
WS : /[ \t\r\n]+/
%ignore WS
"""
strategy = from_lark(Lark(list_grammar, start="list"))
# A JSON list of strings in canonical form which does not round-trip,
# must contain ignorable whitespace in the initial string.
find_any(strategy, lambda s: "\t" in s)
def test_generation_without_whitespace():
find_any(from_lark(Lark(LIST_GRAMMAR, start="list")), lambda g: " " not in g)
def test_cannot_convert_EBNF_to_strategy_directly():
with pytest.raises(InvalidArgument):
# Not a Lark object
check_can_generate_examples(from_lark(EBNF_GRAMMAR))
with pytest.raises(TypeError):
# Not even the right number of arguments
check_can_generate_examples(from_lark(EBNF_GRAMMAR, start="value"))
with pytest.raises(InvalidArgument):
# Wrong type for explicit_strategies
check_can_generate_examples(
from_lark(Lark(LIST_GRAMMAR, start="list"), explicit=[])
)
def test_required_undefined_terminals_require_explicit_strategies():
elem_grammar = r"""
list : "[" ELEMENT ("," ELEMENT)* "]"
%declare ELEMENT
"""
with pytest.raises(InvalidArgument, match=r"%declare"):
check_can_generate_examples(from_lark(Lark(elem_grammar, start="list")))
strategy = {"ELEMENT": just("200")}
check_can_generate_examples(
from_lark(Lark(elem_grammar, start="list"), explicit=strategy)
)
def test_cannot_use_explicit_strategies_for_unknown_terminals():
with pytest.raises(InvalidArgument):
check_can_generate_examples(
from_lark(
Lark(LIST_GRAMMAR, start="list"), explicit={"unused_name": just("")}
)
)
def test_non_string_explicit_strategies_are_invalid():
with pytest.raises(InvalidArgument):
check_can_generate_examples(
from_lark(Lark(LIST_GRAMMAR, start="list"), explicit={"NUMBER": just(0)})
)
@given(
string=from_lark(Lark(LIST_GRAMMAR, start="list"), explicit={"NUMBER": just("0")})
)
def test_can_override_defined_terminal(string):
assert sum(json.loads(string)) == 0
@given(string=from_lark(Lark(LIST_GRAMMAR, start="list"), alphabet="[0,]"))
def test_can_generate_from_limited_alphabet(string):
assert sum(json.loads(string)) == 0
@given(string=from_lark(Lark(LIST_GRAMMAR, start="list"), alphabet="[9]"))
def test_can_generate_from_limited_alphabet_no_comma(string):
assert len(json.loads(string)) <= 1
@given(
string=from_lark(
Lark(EBNF_GRAMMAR, start="value"),
alphabet=characters(codec="ascii", exclude_characters=","),
)
)
def test_can_generate_from_limited_alphabet_no_comma_json(string):
assert "," not in string
def test_error_if_alphabet_bans_all_start_rules():
with pytest.raises(
InvalidArgument, match=r"No start rule .+ is allowed by alphabet="
):
check_can_generate_examples(
from_lark(Lark(LIST_GRAMMAR, start="list"), alphabet="abc")
)
|