File: fuzz.py

package info (click to toggle)
mdformat 1.0.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 748 kB
  • sloc: python: 11,287; makefile: 9
file content (55 lines) | stat: -rw-r--r-- 1,522 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import atheris

with atheris.instrument_imports():
    import hashlib
    import sys
    import warnings

    import mdformat
    from mdformat._util import is_md_equal

# Suppress all warnings.
warnings.simplefilter("ignore")


def test_one_input(input_bytes: bytes) -> None:
    # We need a Unicode string, not bytes
    fdp = atheris.FuzzedDataProvider(input_bytes)
    data = fdp.ConsumeUnicode(sys.maxsize)

    try:
        formatted_data = mdformat.text(data)
    except BaseException:
        handle_err(data)
        raise

    if not is_md_equal(data, formatted_data):
        handle_err(data)
        raise Exception("Formatted Markdown not equal!")


def handle_err(data):
    codepoints = [hex(ord(x)) for x in data]
    sys.stderr.write(f"Input was {type(data)}:\n{data}\nCodepoints:\n{codepoints}\n")

    # Atheris already writes crash data to a file, but it seems it is not UTF-8 encoded.
    # I'm not sure what the encoding is exactly. Anyway, let's write another file here
    # that is guaranteed to be valid UTF-8.
    data_bytes = data.encode()
    filename = "crash-utf8-" + hashlib.sha256(data_bytes).hexdigest()
    with open(filename, "wb") as f:
        f.write(data_bytes)
    sys.stderr.write(f"Wrote UTF-8 encoded data to {filename}\n")

    sys.stderr.flush()


def main():
    # For possible options, see https://llvm.org/docs/LibFuzzer.html#options
    fuzzer_options = sys.argv
    atheris.Setup(fuzzer_options, test_one_input)
    atheris.Fuzz()


if __name__ == "__main__":
    main()