1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55
|
import atheris
with atheris.instrument_imports():
import hashlib
import sys
import warnings
import mdformat
from mdformat._util import is_md_equal
# Suppress all warnings.
warnings.simplefilter("ignore")
def test_one_input(input_bytes: bytes) -> None:
# We need a Unicode string, not bytes
fdp = atheris.FuzzedDataProvider(input_bytes)
data = fdp.ConsumeUnicode(sys.maxsize)
try:
formatted_data = mdformat.text(data)
except BaseException:
handle_err(data)
raise
if not is_md_equal(data, formatted_data):
handle_err(data)
raise Exception("Formatted Markdown not equal!")
def handle_err(data):
codepoints = [hex(ord(x)) for x in data]
sys.stderr.write(f"Input was {type(data)}:\n{data}\nCodepoints:\n{codepoints}\n")
# Atheris already writes crash data to a file, but it seems it is not UTF-8 encoded.
# I'm not sure what the encoding is exactly. Anyway, let's write another file here
# that is guaranteed to be valid UTF-8.
data_bytes = data.encode()
filename = "crash-utf8-" + hashlib.sha256(data_bytes).hexdigest()
with open(filename, "wb") as f:
f.write(data_bytes)
sys.stderr.write(f"Wrote UTF-8 encoded data to {filename}\n")
sys.stderr.flush()
def main():
# For possible options, see https://llvm.org/docs/LibFuzzer.html#options
fuzzer_options = sys.argv
atheris.Setup(fuzzer_options, test_one_input)
atheris.Fuzz()
if __name__ == "__main__":
main()
|