1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133
|
# This file is part of Hypothesis, which may be found at
# https://github.com/HypothesisWorks/hypothesis/
#
# Copyright the Hypothesis Authors.
# Individual contributors are listed in AUTHORS.rst and the git log.
#
# This Source Code Form is subject to the terms of the Mozilla Public License,
# v. 2.0. If a copy of the MPL was not distributed with this file, You can
# obtain one at https://mozilla.org/MPL/2.0/.
import json
import pytest
from lark.lark import Lark
from hypothesis import given
from hypothesis.errors import InvalidArgument
from hypothesis.extra.lark import from_lark
from hypothesis.strategies import data, just
from tests.common.debug import find_any
# Adapted from the official Lark tutorial, with modifications to ensure
# that the generated JSON is valid. i.e. no numbers starting with ".",
# \f is not ignorable whitespace, and restricted strings only. Source:
# https://github.com/lark-parser/lark/blob/master/docs/json_tutorial.md
EBNF_GRAMMAR = r"""
value: dict
| list
| STRING
| NUMBER
| "true" -> true
| "false" -> false
| "null" -> null
list : "[" [value ("," value)*] "]"
dict : "{" [STRING ":" value ("," STRING ":" value)*] "}"
STRING : /"[a-z]*"/
NUMBER : /-?[1-9][0-9]*(\.[0-9]+)?([eE][+-]?[0-9]+)?/
WS : /[ \t\r\n]+/
%ignore WS
"""
LIST_GRAMMAR = r"""
list : "[" [NUMBER ("," NUMBER)*] "]"
NUMBER: /[0-9]+/
"""
@given(from_lark(Lark(EBNF_GRAMMAR, start="value")))
def test_generates_valid_json(string):
json.loads(string)
@pytest.mark.parametrize(
"start, type_",
[
("dict", dict),
("list", list),
("STRING", str),
("NUMBER", (int, float)),
("TRUE", bool),
("FALSE", bool),
("NULL", type(None)),
],
)
@given(data=data())
def test_can_specify_start_rule(data, start, type_):
string = data.draw(from_lark(Lark(EBNF_GRAMMAR, start="value"), start=start))
value = json.loads(string)
assert isinstance(value, type_)
def test_can_generate_ignored_tokens():
list_grammar = r"""
list : "[" [STRING ("," STRING)*] "]"
STRING : /"[a-z]*"/
WS : /[ \t\r\n]+/
%ignore WS
"""
strategy = from_lark(Lark(list_grammar, start="list"))
# A JSON list of strings in canonical form which does not round-trip,
# must contain ignorable whitespace in the initial string.
find_any(strategy, lambda s: "\t" in s)
def test_generation_without_whitespace():
find_any(from_lark(Lark(LIST_GRAMMAR, start="list")), lambda g: " " not in g)
def test_cannot_convert_EBNF_to_strategy_directly():
with pytest.raises(InvalidArgument):
# Not a Lark object
from_lark(EBNF_GRAMMAR).example()
with pytest.raises(TypeError):
# Not even the right number of arguments
from_lark(EBNF_GRAMMAR, start="value").example()
with pytest.raises(InvalidArgument):
# Wrong type for explicit_strategies
from_lark(Lark(LIST_GRAMMAR, start="list"), explicit=[]).example()
def test_undefined_terminals_require_explicit_strategies():
elem_grammar = r"""
list : "[" [ELEMENT ("," ELEMENT)*] "]"
%declare ELEMENT
"""
with pytest.raises(InvalidArgument):
from_lark(Lark(elem_grammar, start="list")).example()
strategy = {"ELEMENT": just("200")}
from_lark(Lark(elem_grammar, start="list"), explicit=strategy).example()
def test_cannot_use_explicit_strategies_for_unknown_terminals():
with pytest.raises(InvalidArgument):
from_lark(
Lark(LIST_GRAMMAR, start="list"), explicit={"unused_name": just("")}
).example()
def test_non_string_explicit_strategies_are_invalid():
with pytest.raises(InvalidArgument):
from_lark(
Lark(LIST_GRAMMAR, start="list"), explicit={"NUMBER": just(0)}
).example()
@given(
string=from_lark(Lark(LIST_GRAMMAR, start="list"), explicit={"NUMBER": just("0")})
)
def test_can_override_defined_terminal(string):
assert sum(json.loads(string)) == 0
|