#!/usr/bin/env python
# vim:fileencoding=utf-8
# License: Apache 2.0 Copyright: 2017, Kovid Goyal <kovid at kovidgoyal.net>

from __future__ import absolute_import, division, print_function, unicode_literals

import os
import re
import subprocess

from lxml import html

self_path = os.path.abspath(__file__)
HEADER = '''\
// Do not edit
// Generated by genattrs.py

'''


def generate_attr_headers(attrs):
    with open("src/attr_strings.h", "wb") as attr_strings, \
            open("src/attr_enum.h", "wb") as attr_enum, \
            open("src/attr_sizes.h", "wb") as attr_sizes:
        for f in (attr_strings, attr_enum, attr_sizes):
            f.write(HEADER.encode('utf-8'))
        for attr in attrs:
            attr_upper = attr.upper().replace('-', '_').replace(':', '_')
            attr_strings.write(('"%s",\n' % attr).encode('utf-8'))
            attr_enum.write(('HTML_ATTR_%s,\n' % attr_upper).encode('utf-8'))
            attr_sizes.write(('%d, ' % len(attr)).encode('utf-8'))
        attr_sizes.write(b'\n')


def generate_attr_perfect_hash(attrs, repetitions=400):
    p = subprocess.Popen(
        'gperf -LANSI-C -H attr_hash -m{} /dev/stdin'.format(repetitions).split(),
        stdout=subprocess.PIPE,
        stdin=subprocess.PIPE)
    stdout = p.communicate('\n'.join(attrs).encode('utf-8'))[0]
    if p.wait() != 0:
        raise SystemExit(p.returncode)
    raw = stdout.decode('utf-8').splitlines()
    for i, line in enumerate(raw):
        if line.startswith('in_word_set'):
            break
    else:
        raise SystemExit('Failed to find in_word_set()')
    lines = raw[:i - 1]
    del raw[:i - 1]
    raw = '\n'.join(raw)
    wordlist = re.search("wordlist\[\]\s+=\s+{(.*?)}", raw, re.DOTALL)
    if wordlist is None:
        raise SystemExit('Failed to find wordlist')
    wordlist = [w.strip().replace('"', '') for w in wordlist.group(1).split(',')]
    attrlist = ["\tHTML_ATTR_" + (w.upper().replace('-', '_').replace(':', '_') if w else 'LAST')
                for w in wordlist]
    processed = '\n'.join(lines) + '\n\n'
    processed += 'static const HTMLAttr HTML_ATTR_MAP[] = {\n%s\n};' % '\n,'.join(attrlist)
    processed = re.sub(
        r'.+^attr_hash',
        HEADER + 'static inline unsigned int\nattr_hash',
        processed,
        flags=re.DOTALL | re.MULTILINE)
    with open('src/attr_perf.h', 'wb') as f:
        f.write(processed.encode('utf-8'))
        f.write(b'\n')


def get_attr_names():
    # HTML Attributes from
    # https://developer.mozilla.org/en-US/docs/Web/HTML/Attributes
    raw = open('/t/Attributes', 'rb').read()
    root = html.fromstring(raw)
    table = root.xpath('//table[@class="standard-table"]/tbody')[0]
    for tr in table.findall('tr'):
        td = tr.find('td')
        code = td.find('code')
        attr = code.text
        if attr and '*' not in attr:
            yield attr.strip()
    # SVG Attributes from
    # https://developer.mozilla.org/en-US/docs/Web/SVG/Attribute
    raw = open('/t/Attribute', 'rb').read()
    root = html.fromstring(raw)
    h2 = root.xpath('//h2[@id="SVG_Attributes"]')[0]
    for ul in h2.xpath('following-sibling::div[1]/ul'):
        for attr in ul.xpath('./li/code/a/text()'):
            yield attr.strip()


def main():
    os.chdir(os.path.dirname(self_path))
    attrs = sorted(set(get_attr_names()) | {'data-reactid'})
    generate_attr_headers(attrs)
    generate_attr_perfect_hash(attrs)


if __name__ == '__main__':
    main()
