File: genencodings.py

package info (click to toggle)
html5-parser 0.4.9-3
  • links: PTS, VCS
  • area: main
  • in suites: bullseye, sid
  • size: 1,764 kB
  • sloc: ansic: 32,441; python: 2,055; makefile: 13
file content (48 lines) | stat: -rwxr-xr-x 1,197 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
#!/usr/bin/env python
# vim:fileencoding=utf-8
# License: Apache 2.0 Copyright: 2017, Kovid Goyal <kovid at kovidgoyal.net>

from __future__ import absolute_import, division, print_function, unicode_literals

import json
import os

try:
    from urllib.request import urlopen
except ImportError:
    from urllib import urlopen

self_path = os.path.abspath(__file__)
HEADER = '''\
# Do not edit
# Generated by genencodings.py

encodings = {
'''


def get_data(url='https://encoding.spec.whatwg.org/encodings.json'):
    return json.loads(urlopen(url).read().decode('ascii'))


def get_mapping(data):
    for category in data:
        for encoding in category['encodings']:
            name = encoding['name'].lower()
            for label in encoding['labels']:
                yield label.lower(), name


def main():
    os.chdir(os.path.dirname(self_path))
    data = get_data()
    ans = dict(get_mapping(data))
    keys = sorted(ans)
    lines = ['  "%s": "%s",' % (k, ans[k]) for k in keys] + ['}']
    with open('src/html5_parser/encoding_names.py', 'wb') as f:
        f.write(HEADER.encode('ascii'))
        f.write('\n'.join(lines).encode('ascii'))


if __name__ == '__main__':
    main()