File: update_html5lib_tests.py

package info (click to toggle)
firefox-esr 91.13.0esr-1~deb11u1
links: PTS, VCS
area: main
in suites: bullseye
size: 3,375,652 kB
sloc: cpp: 5,762,054; javascript: 5,481,714; ansic: 3,121,191; python: 851,492; asm: 331,172; xml: 178,949; java: 155,554; sh: 63,704; makefile: 20,127; perl: 12,825; yacc: 4,583; cs: 3,846; objc: 3,026; lex: 1,720; exp: 762; pascal: 635; php: 436; lisp: 260; awk: 231; ruby: 103; sed: 53; sql: 46; csh: 45
file content (154 lines) | stat: -rw-r--r-- 5,397 bytes
parent folder | download | duplicates (4)
from __future__ import print_function

import sys
import os
import hashlib
import urllib
import itertools
import re
import json
import glob
import shutil

try:
    import genshi
    from genshi.template import MarkupTemplate

    from html5lib.tests import support
except ImportError:
    print("""This script requires the Genshi templating library and html5lib source

It is recommended that these are installed in a virtualenv:

virtualenv venv
source venv/bin/activate
pip install genshi
cd venv
git clone git@github.com:html5lib/html5lib-python.git html5lib
cd html5lib
git submodule init
git submodule update
pip install -e ./

Then run this script again, with the virtual environment still active.
When you are done, type "deactivate" to deactivate the virtual environment.
""")

TESTS_PATH = "html/syntax/parsing/"

def get_paths():
    script_path = os.path.dirname(os.path.abspath(__file__))
    repo_base = get_repo_base(script_path)
    tests_path = os.path.join(repo_base, TESTS_PATH)
    return script_path, tests_path

def get_repo_base(path):
    while path:
        if os.path.exists(os.path.join(path, ".git")):
            return path
        else:
            path = os.path.dirname(path)

def get_expected(data):
    data = "#document\n" + data
    return data

def get_hash(data, container=None):
    if container == None:
        container = ""
    return hashlib.sha1(b"#container%s#data%s"%(container.encode("utf8"),
                                               data.encode("utf8"))).hexdigest()

def make_tests(script_dir, out_dir, input_file_name, test_data):
    tests = []
    innerHTML_tests = []
    ids_seen = {}
    print(input_file_name)
    for test in test_data:
        if "script-off" in test:
            continue
        is_innerHTML = "document-fragment" in test
        data = test["data"]
        container = test["document-fragment"] if is_innerHTML else None
        assert test["document"], test
        expected = get_expected(test["document"])
        test_list = innerHTML_tests if is_innerHTML else tests
        test_id = get_hash(data, container)
        if test_id in ids_seen:
            print("WARNING: id %s seen multiple times in file %s this time for test (%s, %s) before for test %s, skipping"%(test_id, input_file_name, container, data, ids_seen[test_id]))
            continue
        ids_seen[test_id] = (container, data)
        test_list.append({'string_uri_encoded_input':"\"%s\""%urllib.parse.quote(data.encode("utf8")),
                          'input':data,
                          'expected':expected,
                          'string_escaped_expected':json.dumps(urllib.parse.quote(expected.encode("utf8"))),
                          'id':test_id,
                          'container':container
                          })
    path_normal = None
    if tests:
        path_normal = write_test_file(script_dir, out_dir,
                                      tests, "html5lib_%s"%input_file_name,
                                      "html5lib_test.xml")
    path_innerHTML = None
    if innerHTML_tests:
        path_innerHTML = write_test_file(script_dir, out_dir,
                                         innerHTML_tests, "html5lib_innerHTML_%s"%input_file_name,
                                         "html5lib_test_fragment.xml")

    return path_normal, path_innerHTML

def write_test_file(script_dir, out_dir, tests, file_name, template_file_name):
    file_name = os.path.join(out_dir, file_name + ".html")
    short_name = os.path.basename(file_name)

    with open(os.path.join(script_dir, template_file_name), "r") as f:
        template = MarkupTemplate(f)

    stream = template.generate(file_name=short_name, tests=tests)

    with open(file_name, "w") as f:
        f.write(str(stream.render('html', doctype='html5',
                              encoding="utf8"), "utf-8"))
    return file_name

def escape_js_string(in_data):
    return in_data.encode("utf8").encode("string-escape")

def serialize_filenames(test_filenames):
    return "[" + ",\n".join("\"%s\""%item for item in test_filenames) + "]"

def main():

    script_dir, out_dir = get_paths()

    test_files = []
    inner_html_files = []

    if len(sys.argv) > 2:
        test_iterator = zip(
            itertools.repeat(False),
            sorted(os.path.abspath(item) for item in
                   glob.glob(os.path.join(sys.argv[2], "*.dat"))))
    else:
        test_iterator = itertools.chain(
            zip(itertools.repeat(False),
                           sorted(support.get_data_files("tree-construction"))),
            zip(itertools.repeat(True),
                           sorted(support.get_data_files(
                        os.path.join("tree-construction", "scripted")))))

    for (scripted, test_file) in test_iterator:
        input_file_name = os.path.splitext(os.path.basename(test_file))[0]
        if scripted:
            input_file_name = "scripted_" + input_file_name
        test_data = support.TestData(test_file)
        test_filename, inner_html_file_name = make_tests(script_dir, out_dir,
                                                         input_file_name, test_data)
        if test_filename is not None:
            test_files.append(test_filename)
        if inner_html_file_name is not None:
            inner_html_files.append(inner_html_file_name)

if __name__ == "__main__":
    main()