File: generate_default_media_types.py

package info (click to toggle)
python-whitenoise 6.8.2-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 472 kB
  • sloc: python: 2,040; makefile: 132; javascript: 10
file content (104 lines) | stat: -rwxr-xr-x 3,026 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
#!/usr/bin/env python
from __future__ import annotations

import argparse
import http.client
import re
from contextlib import closing
from pathlib import Path

module_dir = Path(__file__).parent.resolve()
media_types_py = module_dir / "../src/whitenoise/media_types.py"


def main() -> int:
    parser = argparse.ArgumentParser()
    parser.add_argument("--check", action="store_true")
    args = parser.parse_args()

    func_str = get_default_types_function()
    text = media_types_py.read_text()
    new_text = re.sub(
        r"def default_types.*\}",
        func_str,
        text,
        flags=re.DOTALL,
    )
    if new_text != text:
        if args.check:
            print("Would write changes")
            return 1
        else:
            print(f"Writing {media_types_py}")
            media_types_py.write_text(new_text)
    return 0


EXTRA_MIMETYPES = {
    # nginx file uses application/javascript, but HTML specification recommends
    # text/javascript:
    ".js": "text/javascript",
    ".md": "text/markdown",
    ".mjs": "text/javascript",
    ".woff": "application/font-woff",
    ".woff2": "font/woff2",
    "apple-app-site-association": "application/pkc7-mime",
    # Adobe Products - see:
    # https://www.adobe.com/devnet-docs/acrobatetk/tools/AppSec/xdomain.html#policy-file-host-basics
    "crossdomain.xml": "text/x-cross-domain-policy",
}


FUNCTION_TEMPLATE = '''\
def default_types() -> dict[str, str]:
    """
    We use our own set of default media types rather than the system-supplied
    ones. This ensures consistent media type behaviour across varied
    environments.  The defaults are based on those shipped with nginx, with
    some custom additions.

    (Auto-generated by scripts/generate_default_media_types.py)
    """
    return {{
{entries}
    }}'''


def get_default_types_function() -> str:
    types_map = get_types_map()
    lines = [
        f'        "{suffix}": "{media_type}",'  # noqa: B028
        for suffix, media_type in types_map.items()
    ]
    return FUNCTION_TEMPLATE.format(entries="\n".join(lines))


def get_types_map() -> dict[str, str]:
    nginx_data = get_nginx_data()
    matches = re.findall(r"(\w+/.*?)\s+(.*?);", nginx_data)
    types_map = {}
    for match in matches:
        media_type = match[0]
        # This is the default media type anyway, no point specifying
        # it explicitly
        if media_type == "application/octet-stream":
            continue

        extensions = match[1].split()
        for extension in extensions:
            types_map[f".{extension}"] = media_type
    types_map.update(EXTRA_MIMETYPES)
    return dict(sorted(types_map.items()))


def get_nginx_data() -> str:
    conn = http.client.HTTPSConnection("raw.githubusercontent.com")
    with closing(conn):
        conn.request("GET", "/nginx/nginx/master/conf/mime.types")
        response = conn.getresponse()
        assert response.status == 200
        return response.read().decode()


if __name__ == "__main__":
    raise SystemExit(main())