File: test_lstar.py

package info (click to toggle)
python-hypothesis 6.138.0-1
links: PTS, VCS
area: main
in suites: sid
size: 15,272 kB
sloc: python: 62,853; ruby: 1,107; sh: 253; makefile: 41; javascript: 6
file content (245 lines) | stat: -rw-r--r-- 6,230 bytes
parent folder | download | duplicates (2)
# This file is part of Hypothesis, which may be found at
# https://github.com/HypothesisWorks/hypothesis/
#
# Copyright the Hypothesis Authors.
# Individual contributors are listed in AUTHORS.rst and the git log.
#
# This Source Code Form is subject to the terms of the Mozilla Public License,
# v. 2.0. If a copy of the MPL was not distributed with this file, You can
# obtain one at https://mozilla.org/MPL/2.0/.

import itertools

import pytest

from hypothesis import assume, example, given, strategies as st
from hypothesis.errors import InvalidState
from hypothesis.internal.conjecture.dfa.lstar import IntegerNormalizer, LStar


def test_can_learn_simple_predicate():
    learner = LStar(lambda s: len(s) >= 3)

    learner.learn(bytes(3))

    dfa = learner.dfa
    assert dfa.start == 0
    assert dfa.transition(0, 0) == 1
    assert dfa.transition(1, 0) == 2
    assert dfa.transition(2, 0) == 3
    assert dfa.transition(3, 0) == 3

    assert not dfa.is_accepting(0)
    assert not dfa.is_accepting(1)
    assert not dfa.is_accepting(2)
    assert dfa.is_accepting(3)


def test_relearning_does_not_change_generation():
    learner = LStar(lambda s: len(s) >= 3)

    prev = learner.generation
    learner.learn(bytes(3))
    assert prev != learner.generation

    prev = learner.generation
    learner.learn(bytes(3))
    assert prev == learner.generation


def test_can_learn_dead_nodes():
    learner = LStar(lambda s: len(s) == 4 and max(s) <= 1)

    learner.learn(bytes(4))

    assert learner.dfa.matches(bytes(4))
    assert learner.dfa.matches(bytes([1] * 4))
    assert learner.dfa.matches(bytes([1] * 4))

    learner.learn([2, 0, 0, 0])

    # Need a length 5 string to distinguish this from
    # something that just loops back to zero.
    learner.learn([2, 0, 0, 0, 0])

    dfa = learner.dfa

    assert dfa.is_dead(dfa.transition(dfa.start, 2))
    assert dfa.is_dead(dfa.transition(dfa.start, 3))


def test_iterates_over_learned_strings():
    upper_bound = bytes([1, 2])
    learner = LStar(lambda s: len(s) == 2 and max(s) <= 5 and s <= upper_bound)

    learner.learn(upper_bound)

    prev = -1
    while learner.generation != prev:
        prev = learner.generation
        learner.learn([1, 2, 0])
        learner.learn([6, 1, 2])
        learner.learn([1, 3])
        for i in range(7):
            learner.learn([0, i])
            learner.learn([1, i])
        learner.learn([2, 0])

        learner.learn([2, 0, 0, 0])
        learner.learn([2, 0, 0])
        learner.learn([0, 6, 0, 0])
        learner.learn([1, 3, 0, 0])
        learner.learn([1, 6, 0, 0])
        learner.learn([0, 0, 0, 0, 0])

    dfa = learner.dfa

    n = 9
    matches = list(itertools.islice(dfa.all_matching_strings(), n + 1))
    for m in matches:
        assert learner.member(m), list(m)
    assert len(matches) == n


def test_iteration_with_dead_nodes():
    learner = LStar(lambda s: len(s) == 3 and max(s) <= 1 and s[1] == 0)
    learner.learn([1, 0, 1])
    learner.learn([1, 1, 1])
    learner.learn([0, 1, 1])
    learner.learn([1, 1, 0])
    learner.learn([1, 1, 1, 0, 1])
    learner.learn([0, 0, 4])

    dfa = learner.dfa
    i = dfa.transition(dfa.start, 1)
    assert not dfa.is_dead(i)
    assert dfa.is_dead(dfa.transition(i, 2))

    assert list(learner.dfa.all_matching_strings()) == [
        bytes([0, 0, 0]),
        bytes([0, 0, 1]),
        bytes([1, 0, 0]),
        bytes([1, 0, 1]),
    ]


def test_learning_is_just_checking_when_fully_explored():
    count = 0

    def accept(s):
        nonlocal count
        count += 1
        return len(s) <= 5 and all(c == 0 for c in s)

    learner = LStar(accept)

    for c in [0, 1]:
        for n in range(10):
            learner.learn(bytes([c]) * n)

    assert list(learner.dfa.all_matching_strings()) == [bytes(n) for n in range(6)]

    prev = count

    learner.learn([2] * 11)

    calls = count - prev

    assert calls == 1


def test_canonicalises_values_to_zero_where_appropriate():
    calls = 0

    def member(s):
        nonlocal calls
        calls += 1
        return len(s) == 10

    learner = LStar(member)

    learner.learn(bytes(10))
    learner.learn(bytes(11))

    prev = calls

    assert learner.dfa.matches(bytes([1] * 10))

    assert calls == prev


def test_normalizing_defaults_to_zero():
    normalizer = IntegerNormalizer()

    assert normalizer.normalize(10) == 0

    assert not normalizer.distinguish(10, lambda n: True)

    assert normalizer.normalize(10) == 0


def test_normalizing_can_be_made_to_distinguish_values():
    normalizer = IntegerNormalizer()

    assert normalizer.distinguish(10, lambda n: n >= 5)
    assert normalizer.normalize(10) == 5
    assert normalizer.normalize(4) == 0


def test_learning_large_dfa():
    """Mostly the thing this is testing is actually that this runs in reasonable
    time. A naive breadth first search will run ~forever when trying to find this
    because it will have to explore all strings of length 19 before it finds one
    of length 20."""

    learner = LStar(lambda s: len(s) == 20)

    learner.learn(bytes(20))

    for i, s in enumerate(itertools.islice(learner.dfa.all_matching_strings(), 500)):
        assert len(s) == 20
        assert i == int.from_bytes(s, "big")


def varint_predicate(b):
    if not b:
        return False
    n = b[0] & 15
    if len(b) != n + 1:
        return False
    value = int.from_bytes(b[1:], "big")
    return value >= 10


@st.composite
def varint(draw):
    result = bytearray()
    result.append(draw(st.integers(1, 255)))
    n = result[0] & 15
    assume(n > 0)
    value = draw(st.integers(10, 256**n - 1))
    result.extend(value.to_bytes(n, "big"))
    return bytes(result)


@example([b"\x02\x01\n"])
@given(st.lists(varint(), min_size=1))
def test_can_learn_varint_predicate(varints):
    learner = LStar(varint_predicate)
    prev = -1
    while learner.generation != prev:
        prev = learner.generation

        for s in varints:
            learner.learn(s)

    for s in varints:
        assert learner.dfa.matches(s)


def test_cannot_reuse_dfa():
    x = LStar(lambda x: len(x) == 3)
    dfa = x.dfa
    x.learn(bytes(3))
    with pytest.raises(InvalidState):
        dfa.start