File: format_input_file.py

package info (click to toggle)
cp2k 2025.1-1.1
  • links: PTS, VCS
  • area: main
  • in suites: trixie
  • size: 366,832 kB
  • sloc: fortran: 955,049; f90: 21,676; ansic: 18,058; python: 13,378; sh: 12,179; xml: 2,173; makefile: 964; pascal: 845; perl: 492; lisp: 272; cpp: 137; csh: 16
file content (188 lines) | stat: -rwxr-xr-x 6,176 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
#!/usr/bin/env python3

# author: Ole Schuett

import re
import sys
from pathlib import Path
from typing import List, Tuple, Iterator

VERBATIM_SECTIONS = ["COORD", "TRAINING_SET"]


# ======================================================================================
def separate_comment(line: str) -> Tuple[str, str]:
    m = re.match(r"([^#!]*)([#!].*)?", line)
    assert m
    body = m.group(1).strip() if m.group(1) else ""
    comment = m.group(2).strip() if m.group(2) else ""
    return body, comment


# ======================================================================================
def format_line(line: str) -> str:
    body, comment = separate_comment(line)
    tokens = body.split()
    tokens[0] = tokens[0].upper()
    padded_comment = f"  {comment}" if comment else ""
    return " ".join(tokens) + padded_comment


# ======================================================================================
def indent(lines: List[str]) -> List[str]:
    output = []
    for line in lines:
        output.append(f"  {line}")
    return output


# ======================================================================================
class Child:
    def __init__(self) -> None:
        self.sortkey = ""  # An empty sortkey means it's not sortable.

    def render(self, verbatim: bool) -> List[str]:
        return []


# ======================================================================================
class Section(Child):
    def __init__(self, preamble: List[str], line: str, children: List[Child]) -> None:
        self.preamble = preamble
        self.line = line
        self.children = children
        self.name = self.line.split()[0].upper()[1:]
        self.sortkey = f"2__{self.name}"  # Sections come after keywords.

    def render(self, verbatim: bool) -> List[str]:
        sortable = all(c.sortkey for c in self.children)
        verbatim = self.name in VERBATIM_SECTIONS
        if sortable and not verbatim:
            self.children.sort(key=lambda c: c.sortkey)

        output = self.preamble + [format_line(self.line)]
        for c in self.children:
            output += indent(c.render(verbatim))
        output.append(f"&END {self.name}")
        return output


# ======================================================================================
class Keyword(Child):
    def __init__(self, preamble: List[str], line: str) -> None:
        self.preamble = preamble
        self.line = line
        self.name = self.line.split()[0].upper()
        self.sortable = self.name[0].isalpha()  # Do not sort numeric keywords.
        self.sortkey = f"1__{self.name}" if self.sortable else ""  # Keywords come first

    def render(self, verbatim: bool) -> List[str]:
        if verbatim or not self.sortable:
            return self.preamble + [self.line]
        else:
            return self.preamble + [format_line(self.line)]


# ======================================================================================
class Preprocessor(Child):
    def __init__(self, preamble: List[str], line: str) -> None:
        self.preamble = preamble
        self.line = line
        self.sortkey = ""  # Pre-processor lines are not sortable.

    def render(self, verbatim: bool) -> List[str]:
        tokens = self.line.split(" ", 1)
        tokens[0] = tokens[0].upper()
        return self.preamble + [" ".join(tokens)]


# ======================================================================================
class Epilog(Child):
    def __init__(self, lines: List[str]) -> None:
        self.lines = lines
        self.sortkey = "9__"  # Epilogs come last.

    def render(self, verbatim: bool) -> List[str]:
        return self.lines


# ======================================================================================
def parse_children(lines_iter: Iterator[str]) -> List[Child]:
    children: List[Child] = []
    preamble: List[str] = []

    while True:
        try:
            line = next(lines_iter)
        except StopIteration:
            if preamble:
                children.append(Epilog(preamble))  # left-over preamble
            return children

        # Strip prior indentation and trailing spaces.
        line = line.strip()

        # Remove empty lines.
        if not line:
            continue

        # Split off the comment part.
        body, comment = separate_comment(line)

        # Found comment line.
        if not body:
            preamble.append(comment)

        # Found pre-processor line.
        elif body.startswith("@"):
            children.append(Preprocessor(preamble, line))
            preamble = []

        # Found section end.
        elif body.upper().startswith("&END"):
            if preamble:
                children.append(Epilog(preamble))  # left-over preamble
            return children

        # Found section begining.
        elif body.startswith("&"):
            sub_children = parse_children(lines_iter)
            children.append(Section(preamble, line, sub_children))
            preamble = []

        # Found keyword.
        else:
            children.append(Keyword(preamble, line))
            preamble = []


# ======================================================================================
def main() -> None:
    if len(sys.argv) != 2:
        print("Usage: format_input_file.py <file>")
        sys.exit(1)

    # Parse input file.
    input_file = Path(sys.argv[1])
    lines_iter = iter(input_file.read_text(encoding="utf8").split("\n"))
    children = parse_children(lines_iter)

    # Sort the top sections, but always put &GLOBAL first and &FORCE_EVAL last.
    if not any(isinstance(c, Preprocessor) for c in children):
        sortkey_overwrite = {"2__GLOBAL": "0", "2__FORCE_EVAL": "8"}
        children.sort(key=lambda c: sortkey_overwrite.get(c.sortkey, c.sortkey))

    # Render top sections.
    output: List[str] = []
    for c in children:
        output += c.render(verbatim=False)
        if isinstance(c, Section):
            output.append("")  # Insert empty line after each top section.

    # Write output back to file
    input_file.write_text("\n".join(output), encoding="utf8")


main()

# EOF