1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100
|
#!/usr/bin/env python
# vim:fileencoding=utf-8
# License: Apache 2.0 Copyright: 2017, Kovid Goyal <kovid at kovidgoyal.net>
from __future__ import absolute_import, division, print_function, unicode_literals
import os
import re
import subprocess
from lxml import html
self_path = os.path.abspath(__file__)
HEADER = '''\
// Do not edit
// Generated by genattrs.py
'''
def generate_attr_headers(attrs):
with open("src/attr_strings.h", "wb") as attr_strings, \
open("src/attr_enum.h", "wb") as attr_enum, \
open("src/attr_sizes.h", "wb") as attr_sizes:
for f in (attr_strings, attr_enum, attr_sizes):
f.write(HEADER.encode('utf-8'))
for attr in attrs:
attr_upper = attr.upper().replace('-', '_').replace(':', '_')
attr_strings.write(('"%s",\n' % attr).encode('utf-8'))
attr_enum.write(('HTML_ATTR_%s,\n' % attr_upper).encode('utf-8'))
attr_sizes.write(('%d, ' % len(attr)).encode('utf-8'))
attr_sizes.write(b'\n')
def generate_attr_perfect_hash(attrs, repetitions=400):
p = subprocess.Popen(
'gperf -LANSI-C -H attr_hash -m{} /dev/stdin'.format(repetitions).split(),
stdout=subprocess.PIPE,
stdin=subprocess.PIPE)
stdout = p.communicate('\n'.join(attrs).encode('utf-8'))[0]
if p.wait() != 0:
raise SystemExit(p.returncode)
raw = stdout.decode('utf-8').splitlines()
for i, line in enumerate(raw):
if line.startswith('in_word_set'):
break
else:
raise SystemExit('Failed to find in_word_set()')
lines = raw[:i - 1]
del raw[:i - 1]
raw = '\n'.join(raw)
wordlist = re.search("wordlist\[\]\s+=\s+{(.*?)}", raw, re.DOTALL)
if wordlist is None:
raise SystemExit('Failed to find wordlist')
wordlist = [w.strip().replace('"', '') for w in wordlist.group(1).split(',')]
attrlist = ["\tHTML_ATTR_" + (w.upper().replace('-', '_').replace(':', '_') if w else 'LAST')
for w in wordlist]
processed = '\n'.join(lines) + '\n\n'
processed += 'static const HTMLAttr HTML_ATTR_MAP[] = {\n%s\n};' % '\n,'.join(attrlist)
processed = re.sub(
r'.+^attr_hash',
HEADER + 'static inline unsigned int\nattr_hash',
processed,
flags=re.DOTALL | re.MULTILINE)
with open('src/attr_perf.h', 'wb') as f:
f.write(processed.encode('utf-8'))
f.write(b'\n')
def get_attr_names():
# HTML Attributes from
# https://developer.mozilla.org/en-US/docs/Web/HTML/Attributes
raw = open('/t/Attributes', 'rb').read()
root = html.fromstring(raw)
table = root.xpath('//table[@class="standard-table"]/tbody')[0]
for tr in table.findall('tr'):
td = tr.find('td')
code = td.find('code')
attr = code.text
if attr and '*' not in attr:
yield attr.strip()
# SVG Attributes from
# https://developer.mozilla.org/en-US/docs/Web/SVG/Attribute
raw = open('/t/Attribute', 'rb').read()
root = html.fromstring(raw)
h2 = root.xpath('//h2[@id="SVG_Attributes"]')[0]
for ul in h2.xpath('following-sibling::div[1]/ul'):
for attr in ul.xpath('./li/code/a/text()'):
yield attr.strip()
def main():
os.chdir(os.path.dirname(self_path))
attrs = sorted(set(get_attr_names()) | {'data-reactid'})
generate_attr_headers(attrs)
generate_attr_perfect_hash(attrs)
if __name__ == '__main__':
main()
|