File: gen-odf-tokens.py

package info (click to toggle)
liborcus 0.20.1-2
links: PTS, VCS
area: main
in suites: experimental
size: 16,644 kB
sloc: xml: 78,349; cpp: 74,365; sh: 4,626; makefile: 2,787; python: 2,614
file content (192 lines) | stat: -rwxr-xr-x 5,851 bytes
parent folder | download | duplicates (4)
#!/usr/bin/env python
########################################################################
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
#
########################################################################

import xml.parsers.expat
import sys
import argparse
from pathlib import Path

import token_util


NS_RNG = "http://relaxng.org/ns/structure/1.0"


class TokenParser:

    def __init__ (self, strm):
        self.__strm = strm
        self.__elem = None
        self.tokens = set()

    def start_element(self, name, attrs):
        self.__elem = name
        if name in {f"{NS_RNG}:element", f"{NS_RNG}:attribute"} and "name" in attrs:
            tokens = attrs['name'].split(':')
            n = len(tokens)
            if n != 2:
                sys.stderr.write("unrecognized token type: "+attrs['name'])
                sys.exit(1)

            self.tokens.add(tokens[1])

            for token in tokens:
                self.tokens.add(token)

    def character(self, data):
        if self.__elem == f"{NS_RNG}:value":
            s = data.strip()
            if len(s) > 0:
                self.tokens.add(s)

    def parse(self):
        p = xml.parsers.expat.ParserCreate(encoding="utf-8", namespace_separator=":")
        p.StartElementHandler = self.start_element
        p.CharacterDataHandler = self.character
        p.Parse(self.__strm, 1)

        self.tokens = sorted(self.tokens)


class NSParser:

    def __init__ (self, strm):
        self.__strm = strm
        self.__elem = None
        self.ns_values = dict()  # namespace values

    def start_element(self, name, attrs):
        self.__elem = name
        if name.endswith("grammar"):
            names = attrs.keys()
            for name in names:
                tokens = name.split(':')
                if len(tokens) < 2 or tokens[0] != "xmlns":
                    continue

                val = attrs[name]
                self.ns_values[tokens[1]] = val

    def parse(self):
        p = xml.parsers.expat.ParserCreate(encoding="utf-8")
        p.StartElementHandler = self.start_element
        p.Parse(self.__strm, 1)

        ns_values = list()
        for k, v in self.ns_values.items():
            if v == "http://relaxng.org/ns/structure/1.0":
                # skip the relaxNG namespace since it is only used in the schema document.
                continue
            ns_values.append((k, v))

        self.ns_values = sorted(ns_values, key=lambda x: x[0])


def gen_namespace_tokens(filepath, ns_values):

    # header (.hpp)
    filepath_hpp = filepath + "_hpp.inl"
    outfile = open(filepath_hpp, 'w')
    outfile.write("namespace orcus {\n\n")
    for key, _ in ns_values:
        outfile.write("extern const xmlns_id_t NS_odf_")
        outfile.write(key)
        outfile.write(";\n")
    outfile.write("\nextern const xmlns_id_t* NS_odf_all;\n")
    outfile.write("\n}\n\n")
    outfile.close()

    # source (.cpp)
    filepath_cpp = filepath + "_cpp.inl"
    outfile = open(filepath_cpp, 'w')
    outfile.write("namespace orcus {\n\n")
    for key, value in ns_values:
        outfile.write("const xmlns_id_t NS_odf_")
        outfile.write(key)
        outfile.write(" = \"")
        outfile.write(value)
        outfile.write("\"")
        outfile.write(";\n")

    outfile.write("\n")
    outfile.write("namespace {\n\n")
    outfile.write("const xmlns_id_t odf_ns[] = {\n")
    for key, _ in ns_values:
        outfile.write("    NS_odf_")
        outfile.write(key)
        outfile.write(",\n")
    outfile.write("    nullptr\n")
    outfile.write("};\n\n")
    outfile.write("} // anonymous\n\n")

    outfile.write("const xmlns_id_t* NS_odf_all = odf_ns;\n\n")

    outfile.write("}\n\n")
    outfile.close()


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--ns-file-prefix", type=str,
        help="file name prefix for optioal namespace constant files")
    parser.add_argument(
        "--summary-output", type=Path,
        help="optional output file to write collected token data summary")
    parser.add_argument(
        "--token-constants", type=Path,
        help="path to C++ output file where token consants are to be written to")
    parser.add_argument(
        "--token-names", type=Path,
        help="path to C++ output file where token names are to be written to")
    parser.add_argument(
        "odf_schema", metavar="ODF-SCHEMA", type=Path, help="path to RNG ODF schema file")
    args = parser.parse_args()

    if not args.odf_schema.is_file():
        print(f"{args.odf_schema} is not a valid file.", file=sys.stderr)
        sys.exit(1)

    schema_content = args.odf_schema.read_text()
    parser = TokenParser(schema_content)
    parser.parse()
    tokens = parser.tokens

    parser = NSParser(schema_content)
    parser.parse()
    ns_values = parser.ns_values

    if args.summary_output:
        summary_content_buf = list()
        summary_content_buf.append("list of tokens:")

        for token in tokens:
            summary_content_buf.append(f"- \"{token}\"")

        summary_content_buf.append("list of namespaces:")

        for ns, value in ns_values:
            summary_content_buf.append(f"- {ns}: \"{value}\"")

        args.summary_output.write_text("\n".join(summary_content_buf))

    if args.token_constants:
        with open(args.token_constants, "w") as f:
            token_util.gen_token_constants(f, tokens)

    if args.token_names:
        with open(args.token_names, "w") as f:
            token_util.gen_token_names(f, tokens)

    if args.ns_file_prefix is not None:
        gen_namespace_tokens(args.ns_file_prefix, ns_values)


if __name__ == '__main__':
    main()