File: extract_xml_metadata.py

package info (click to toggle)
votca 2025.1-1
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 132,424 kB
  • sloc: xml: 345,964; cpp: 80,067; python: 15,957; sh: 4,580; perl: 2,169; javascript: 202; makefile: 34
file content (260 lines) | stat: -rwxr-xr-x 7,903 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
#!/usr/bin/env python3
#
# Copyright 2009-2020 The VOTCA Development Team (http://www.votca.org)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""Module to extract metadata from the xml files."""

import argparse
import xml.etree.ElementTree as ET
from pathlib import Path
from typing import Iterable, List, Optional, Tuple

msg = "extract_metadata.py -i file.xml"

parser = argparse.ArgumentParser(description=msg)
parser.add_argument('-i', required=True,
                    help="Input file in XML format")
parser.add_argument('-o', help="Optional output file", default=None)
parser.add_argument("-m", "--mode", help="Operation mode: xtp, csg",
                    choices=["xtp", "csg"], default="xtp")

MAXIMUM_LINE_LENGTH = 60

DEFAULTS_TABLE_HEADER = """
.. list-table::
   :header-rows: 1
   :widths: 30 20 15 15
   :align: center

   * - Property Name
     - Default Value
     - Description
     - Valid Input"""


def xtp_table_header(x: str) -> str:
    """Create a table for XTP calculators."""
    return f"""
{x}
The following table contains the defaults input options for the calculator, The default `OPTIONAL` means this option is switched off, if no input is given. 
`REQUIRED` arguments have to be specified, otherwise an error is thrown.
{DEFAULTS_TABLE_HEADER}"""


def csg_table_header(x: str):
    """Create a CSG table with its default values."""
    return f"""
{x}
The following table contains the input options for CSG,

.. list-table::
   :header-rows: 1
   :align: center

   * - Property Name
     - Default Value
     - Description"""


XTP_TABLE_LINE = """
   * - {}
     - {}
     - {}
     - {}""".format

CGS_TABLE_LINE = """
   * - {}
     - {}
     - {}""".format


def main():
    """Parse the command line arguments and run workflow."""
    args = parser.parse_args()
    file_name = Path(args.i)
    if args.mode == "xtp":
        table = xtp_create_rst_table(file_name)
    elif args.mode == "csg":
        table = csg_create_rst_table(file_name)

    # Print
    if args.o is not None:
        path = Path(args.o)
        create_parent_folder(path)
        with open(path, 'w')as f:
            f.write(table)
    else:
        print(table)


def get_root_children(file_name: Path) -> List[ET.Element]:
    """Get all the node children from root."""
    tree = ET.parse(file_name)
    root = tree.getroot()
    resolve_links(file_name, root)
    return list(root)


def resolve_links(file_name: Path, elem: ET.Element):
    if "link" in elem.attrib:
        s=elem.attrib.get("link")
        #python split method cannot split for "," and " " so we replace all "," by " "
        s.replace(',', ' ')
        links = elem.attrib.get("link").split(" ")
        for link in links:
            package_path = file_name.parent/"subpackages"/link
            tree = ET.parse(package_path)
            root = tree.getroot()
            for child in root:
                elem.append(child)
    for child in elem:
        resolve_links(file_name, child)


def xtp_extract_metadata(file_name: Path) -> Tuple[str, ET.Element]:
    """Get the description and elements from the xml file."""
    data = get_root_children(file_name)[0]
    header = data.attrib.get("help", "")
    return header, data


def csg_extract_metadata(file_name: Path) -> Tuple[str, List[ET.Element]]:
    """Get the description and elements from the xml file."""
    children = get_root_children(file_name)
    header = children[0].text
    return header, children[1:]


def xtp_get_recursive_attributes(elem: ET.Element, root_name: str = "") -> str:
    """Get recursively the attributes of an ``ET.Element``."""
    s = ""
    name = root_name + elem.tag
    if list(elem):
        return ''.join(xtp_get_recursive_attributes(el, f"{name}.") for el in list(elem))

    description = split_line(elem.attrib.get("help", ""))
    default = split_line(elem.attrib.get("default", ""))
    choices = multiline(elem.attrib.get("choices", ""))
    s += XTP_TABLE_LINE(name, default, description, choices)

    return s


def csg_get_recursive_attributes(elem: ET.Element, root_name: str = "") -> str:
    """Get recursively the attributes of an ``ET.Element``."""
    s = ""
    if list(elem):
        children = iter(elem)
        desc_elem = elem.find("DESC")
        if desc_elem is not None:
            description = desc_elem.text
        else:
            description = ""
        description = split_line(description)

        name = root_name + elem.tag
        default = "" if elem.text is None else ' '.join(elem.text.split())
        s += CGS_TABLE_LINE(name, default, description)
        s += ''.join(
            csg_get_recursive_attributes(
                el, f"{name}.") for el in children if el.tag != "DESC")
        return s

    return s


def xtp_create_rst_table(file_name: Path) -> str:
    """Create an RST table using the metadata in the XML file."""
    header, elements = xtp_extract_metadata(file_name)
    header = generate_title(file_name.stem) + header
    s = xtp_table_header(header) if elements is not None else f"{header}\n"
    for elem in elements:
        s += xtp_get_recursive_attributes(elem)

    s += generate_note(file_name.stem)
    return s


def csg_create_rst_table(file_name: Path) -> str:
    """Create an RST table using the metadata in the XML file."""
    header, elements = csg_extract_metadata(file_name)
    s = csg_table_header(header)
    for elem in elements:
        s += csg_get_recursive_attributes(elem)
    return s


def create_parent_folder(path: Path) -> None:
    """Create parent folder for ``path`` if it doesn't exists."""
    if not path.parent.exists():
        path.parent.mkdir(parents=True)


def split_line(line: str, sep: Optional[str] = None) -> str:
    """Split line if larger than ``MAXIMUM_LINE_LENGTH``."""
    if len(line) > MAXIMUM_LINE_LENGTH:
        return wrap_line(line, sep)
    else:
        return line


def wrap_line(line: str, sep: Optional[str]) -> str:
    """Split a line into lines smaller than ``max_len``."""
    acc = [[]]  # type: List[List[str]]
    count = 7  # mulitiple lines start at column number 7
    for word in line.split(sep=sep):
        # If cumulative sum or the length of word is greater than MAXIMUM_LINE_LENGTH
        if any(x > MAXIMUM_LINE_LENGTH for x in (count, len(word))):
            count = 7
            acc.append([])
        # Append words to the last list
        acc[-1].append(word)
        count += 1 + len(word)

    it = iter(" ".join(word for word in line) for line in acc)
    return column_multiline(it)


def generate_title(stem: str) -> str:
    """Generate title in rst format using ``file_name``."""
    return f"{stem}\n{'*' * len(stem)}\n"


def generate_note(stem: str) -> str:
    """Generate note specifying path to the xml file."""
    note = f"""
.. note::
   An *xml* file containing the defaults for the `{stem}` calculator can be created via `-p {stem} -o FILENAME` command line options `
"""
    return note


def multiline(line: str) -> str:
    """Split the comma separated words into lines."""
    xs = line.split(',')
    if len(xs) >= 2:
        return column_multiline(iter(xs))
    else:
        return line


def column_multiline(xs: Iterable[str]) -> str:
    """Create a column with multiple lines."""
    spaces = f"\n{' ':^6s} | "
    return "| " + next(xs) + spaces + f"{spaces}".join(xs)


if __name__ == "__main__":
    main()