File: build_corpus.py

package info (click to toggle)
oxigraph 0.5.3-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 7,848 kB
  • sloc: python: 1,949; cpp: 158; sh: 145; makefile: 42
file content (30 lines) | stat: -rw-r--r-- 917 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
import hashlib
import random
from pathlib import Path

base = Path(__file__).parent.parent
for target, ext in [
    ("sparql_query", "rq"),
    ("sparql_update", "ru"),
    ("sparql_results_xml", "srx"),
    ("sparql_results_json", "srj"),
    ("sparql_results_tsv", "tsv"),
    ("n3", "n3"),
    ("nquads", "nq"),
    ("trig", "trig"),
    ("rdf_xml", "rdf"),
    ("jsonld", "jsonld")
]:
    target_dir = base / "fuzz" / "corpus" / target
    for f in base.rglob(f"*.{ext}"):
        if "manifest" in str(f):
            continue  # we skip the manifests
        with f.open("rb") as fp:
            data = fp.read()
        pos = random.randint(0, len(data))
        data = data[:pos] + b"\xff" + data[pos:]
        hash = hashlib.sha256()
        hash.update(data)
        target_dir.mkdir(parents=True, exist_ok=True)
        with (target_dir / f"{hash.hexdigest()}").open("wb") as fp:
            fp.write(data)