1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67
|
#!/usr/bin/env python3
"""This file generates the lookup table from mime-db/db.json.
You only need to run this when updating mime-db.
"""
import collections
import io
import json
def assert_boring_ascii(text):
"""Check that it's safe to do code generation with this string."""
# If there's unicode we'll get incorrect offsets
# If mime-db ever starts containing unicode (unlikely!), process strings
# with .encode("utf8") first
assert text.isascii()
assert text.lower() == text
assert text.isprintable()
assert '"' not in text
assert "'" not in text
assert "\\" not in text
assert "/" not in text
with open("mime-db/db.json") as f:
db = json.load(f)
by_type = collections.defaultdict(dict)
for mime, info in sorted(db.items()):
if extensions := info.get("extensions"):
type_, subtype = mime.split("/")
by_type[type_][subtype] = extensions[0]
raw_data = io.StringIO()
lookup_text = io.StringIO()
lookup_text.write(
"""// This file is generated by build.py
// Do not edit manually
&[
"""
)
for type_, extensions in by_type.items():
assert_boring_ascii(type_)
lookup_text.write(f"""("{type_}", &[\n""")
for subtype, extension in extensions.items():
assert_boring_ascii(subtype)
assert_boring_ascii(extension)
assert "." not in extension
lookup_text.write(
f"""// {type_}/{subtype}: {extension}
Entry({raw_data.tell()}, {len(subtype)}, {len(extension)}),
"""
)
raw_data.write(subtype)
raw_data.write(extension)
lookup_text.write("]),\n")
lookup_text.write("]\n")
with open("src/raw_data", "w") as f:
f.write(raw_data.getvalue())
with open("src/lookup", "w") as f:
f.write(lookup_text.getvalue())
|