1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192
|
#!/usr/bin/env python
########################################################################
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
#
########################################################################
import xml.parsers.expat
import sys
import argparse
from pathlib import Path
import token_util
NS_RNG = "http://relaxng.org/ns/structure/1.0"
class TokenParser:
def __init__ (self, strm):
self.__strm = strm
self.__elem = None
self.tokens = set()
def start_element(self, name, attrs):
self.__elem = name
if name in {f"{NS_RNG}:element", f"{NS_RNG}:attribute"} and "name" in attrs:
tokens = attrs['name'].split(':')
n = len(tokens)
if n != 2:
sys.stderr.write("unrecognized token type: "+attrs['name'])
sys.exit(1)
self.tokens.add(tokens[1])
for token in tokens:
self.tokens.add(token)
def character(self, data):
if self.__elem == f"{NS_RNG}:value":
s = data.strip()
if len(s) > 0:
self.tokens.add(s)
def parse(self):
p = xml.parsers.expat.ParserCreate(encoding="utf-8", namespace_separator=":")
p.StartElementHandler = self.start_element
p.CharacterDataHandler = self.character
p.Parse(self.__strm, 1)
self.tokens = sorted(self.tokens)
class NSParser:
def __init__ (self, strm):
self.__strm = strm
self.__elem = None
self.ns_values = dict() # namespace values
def start_element(self, name, attrs):
self.__elem = name
if name.endswith("grammar"):
names = attrs.keys()
for name in names:
tokens = name.split(':')
if len(tokens) < 2 or tokens[0] != "xmlns":
continue
val = attrs[name]
self.ns_values[tokens[1]] = val
def parse(self):
p = xml.parsers.expat.ParserCreate(encoding="utf-8")
p.StartElementHandler = self.start_element
p.Parse(self.__strm, 1)
ns_values = list()
for k, v in self.ns_values.items():
if v == "http://relaxng.org/ns/structure/1.0":
# skip the relaxNG namespace since it is only used in the schema document.
continue
ns_values.append((k, v))
self.ns_values = sorted(ns_values, key=lambda x: x[0])
def gen_namespace_tokens(filepath, ns_values):
# header (.hpp)
filepath_hpp = filepath + "_hpp.inl"
outfile = open(filepath_hpp, 'w')
outfile.write("namespace orcus {\n\n")
for key, _ in ns_values:
outfile.write("extern const xmlns_id_t NS_odf_")
outfile.write(key)
outfile.write(";\n")
outfile.write("\nextern const xmlns_id_t* NS_odf_all;\n")
outfile.write("\n}\n\n")
outfile.close()
# source (.cpp)
filepath_cpp = filepath + "_cpp.inl"
outfile = open(filepath_cpp, 'w')
outfile.write("namespace orcus {\n\n")
for key, value in ns_values:
outfile.write("const xmlns_id_t NS_odf_")
outfile.write(key)
outfile.write(" = \"")
outfile.write(value)
outfile.write("\"")
outfile.write(";\n")
outfile.write("\n")
outfile.write("namespace {\n\n")
outfile.write("const xmlns_id_t odf_ns[] = {\n")
for key, _ in ns_values:
outfile.write(" NS_odf_")
outfile.write(key)
outfile.write(",\n")
outfile.write(" nullptr\n")
outfile.write("};\n\n")
outfile.write("} // anonymous\n\n")
outfile.write("const xmlns_id_t* NS_odf_all = odf_ns;\n\n")
outfile.write("}\n\n")
outfile.close()
def main():
parser = argparse.ArgumentParser()
parser.add_argument(
"--ns-file-prefix", type=str,
help="file name prefix for optioal namespace constant files")
parser.add_argument(
"--summary-output", type=Path,
help="optional output file to write collected token data summary")
parser.add_argument(
"--token-constants", type=Path,
help="path to C++ output file where token consants are to be written to")
parser.add_argument(
"--token-names", type=Path,
help="path to C++ output file where token names are to be written to")
parser.add_argument(
"odf_schema", metavar="ODF-SCHEMA", type=Path, help="path to RNG ODF schema file")
args = parser.parse_args()
if not args.odf_schema.is_file():
print(f"{args.odf_schema} is not a valid file.", file=sys.stderr)
sys.exit(1)
schema_content = args.odf_schema.read_text()
parser = TokenParser(schema_content)
parser.parse()
tokens = parser.tokens
parser = NSParser(schema_content)
parser.parse()
ns_values = parser.ns_values
if args.summary_output:
summary_content_buf = list()
summary_content_buf.append("list of tokens:")
for token in tokens:
summary_content_buf.append(f"- \"{token}\"")
summary_content_buf.append("list of namespaces:")
for ns, value in ns_values:
summary_content_buf.append(f"- {ns}: \"{value}\"")
args.summary_output.write_text("\n".join(summary_content_buf))
if args.token_constants:
with open(args.token_constants, "w") as f:
token_util.gen_token_constants(f, tokens)
if args.token_names:
with open(args.token_names, "w") as f:
token_util.gen_token_names(f, tokens)
if args.ns_file_prefix is not None:
gen_namespace_tokens(args.ns_file_prefix, ns_values)
if __name__ == '__main__':
main()
|