#!/usr/bin/env python

import json
import re
from pathlib import Path
from pprint import pformat
from urllib.request import urlopen

with urlopen('http://wpt.live/dom/nodes/selectors.js') as fd:
    js = fd.read().decode()

js = js.replace(r'\_', '_')  # Unescape underscores
js = re.sub(r'/\*.*?\*/', '', js, flags=re.S)  # Remove /* comments */
js = re.sub(r'( +|^)//.*$', '', js, flags=re.M)  # Remove // comments
js = re.sub(r',\s+testType:[0-fx\s\|]+\}', '}', js)  # Remove testType
js = re.sub(r'(\{|,\s+)(\w+):', r'\1"\2":', js)  # Use strings for keys
js = re.sub(r',\s+(\]|\})', r'\1', js, flags=re.M)  # Remove trailing commas

invalid_selectors = json.loads(re.search(
    r'var invalidSelectors = (\[.*?\]);', js, flags=re.S).group(1))
valid_selectors = json.loads(re.search(
    r'var validSelectors = (\[.*?\]);', js, flags=re.S).group(1))

python = f'''# File generated by make_selectors.py, do not edit

invalid_selectors = {pformat(invalid_selectors, indent=4, width=79)}

valid_selectors = {pformat(valid_selectors, indent=4, width=79)}
'''
python = python.replace('= [   ', '= [\n    ')
python = python.replace('    {   ', '    {\n        ')
python = python.replace(': [   ', ': [\n            ')
python = python.replace('                      ', '            ')

Path(__file__).parent.joinpath('w3_selectors.py').write_text(python)
