File: component_reader.py

package info (click to toggle)
python-mcstasscript 0.0.46%2Bgit20250402111921.bfa5a26-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 11,440 kB
  • sloc: python: 13,421; makefile: 14
file content (521 lines) | stat: -rw-r--r-- 19,300 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
import os
import math
import re


def remove_c_comments(code):
    """
    Removes comments from a multiline piece of c code
    """
    # Remove single-line comments
    code = re.sub(r'//.*', '', code)

    # Remove multi-line comments
    code = re.sub(r'/\*.*?\*/', '', code, flags=re.DOTALL)

    # Remove empty lines
    code = '\n'.join([line for line in code.split('\n') if line.strip()])

    return code


def c_integer_literal_base(s: str) -> int:
    """
    Determine the base of a C style integer literal.

    base    literal
    ------- --------------------
    binary      0b### for # in (0,1)
    octal       0#### for # in (0,7)
    decimal     N#### for N in (1,9) and # in (0,9)
    hexadecimal 0x### for # in (0,9)&(A,F)

    The ambiguity between octal and decimal for '0' is not important
    since both result in the same integer value.
    """
    if len(s) == 0:
        return 0
    if '0' != s[0]:
        return 10
    if 'x' in s:
        return 16
    if 'b' in s:
        return 2
    return 8


class ComponentInfo:
    """
    Internal class used to store information on parameters of components
    """

    def __init__(self):
        self.name = ""
        self.category = ""
        self.parameter_names = []
        self.parameter_defaults = {}
        self.parameter_types = {}
        self.parameter_comments = {}
        self.parameter_units = {}


class ComponentReader:
    """
    Class for retrieving information on available McStas components

    Recursively reads all component files in hardcoded list of
    folders that represents the component categories in McStas.
    The results are stored in a dictionary with ComponentInfo
    instances, the keys are the names of the components. After
    the components in the McStas installation are read, any
    components present in the current work directory is read,
    and these will overwrite existing information, consistent
    with how McStas reads component definitions.

    """

    def __init__(self, mcstas_path, input_path="."):
        """
        Reads all component files in standard folders. Recursive, so
        subfolders of these folders are included.

        Parameters
        ----------
        mcstas_path : str
            Path to McStas folder, used to find the installed components

        keyword arguments:
            input_path : str
                Path to work directory, most often current directory

        """

        # Hardcoded whitelist of foldernames
        folder_list = ["sources",
                       "optics",
                       "samples",
                       "monitors",
                       "misc",
                       "contrib",
                       "obsolete",
                       "union",
                       "astrox"]

        self.component_path = {}
        self.component_category = {}

        for folder in folder_list:
            abs_path = os.path.join(mcstas_path, folder)
            abs_path = os.path.abspath(abs_path)
            self._find_components(abs_path)

        # Will overwrite McStas components with definitions in input_folder
        current_directory = os.getcwd()

        # Set up absolute input_path
        if os.path.isabs(input_path):
            input_directory = input_path
        else:
            if input_path == ".":
                # Default case, avoid having /./ in absolute path
                input_directory = current_directory
            else:
                input_directory = os.path.join(current_directory, input_path)

        if not os.path.isdir(input_directory):
            print("input_path: ", input_directory)
            raise ValueError("Can't find given input_path,"
                             + " directory must exist.")

        """
        If components are present both in the McStas install and the
        work directory, the version in the work directory is used. The user
        is informed of this behavior when the instrument object is created.
        """

        self.load_components_from_folder(input_directory, "work directory")

    def load_components_from_folder(self, folder, name, verbose=True):
        """
        Loads McStas components from given absolute path

        folder : (str) Path for folder to search for components in
        name : (str) Used for displaying help messages about these components
        verbose : (bool) If True, help messages are shown about the process
        """

        if not os.path.isdir(folder):
            if verbose:
                print("Did not find specified folder: " + folder)
            return

        overwritten_components = []
        for file in os.listdir(folder):
            if file.endswith(".comp"):
                abs_path = os.path.join(folder, file)
                component_name = os.path.split(abs_path)[1].split(".")[-2]

                if component_name in self.component_path:
                    overwritten_components.append(file)

                self.component_path[component_name] = abs_path
                self.component_category[component_name] = name

        # Report components found in the work directory and install to the user
        if len(overwritten_components) > 0 and verbose:
            print(f"The following components are found in the {name}"
                  + " / input_path:")
            for component_name in overwritten_components:
                print("    ", component_name)

            print("These definitions will be used instead of the installed "
                  + "versions.")

    def show_categories(self):
        """
        Method that will show all component categories available

        Sorted alphabetically for easier readability and consistency
        """
        categories = []
        for component, category in self.component_category.items():
            if category not in categories:
                categories.append(category)

        categories.sort()
        for category in categories:
            print(" " + category)

    def show_components_in_category(self, category_input, **kwargs):
        """
        Method that will show all components in given category

        """
        if "line_length" in kwargs:
            line_limit = int(kwargs["line_length"])
            if line_limit < 20:
                raise ValueError("line_length should be more than 20 "
                                 + "characters, was " + str(line_limit))
        else:
            line_limit = 100

        empty_category = True
        to_print = []
        for component, category in self.component_category.items():
            if category == category_input:
                to_print.append(component)
                empty_category = False

        to_print.sort()
        if empty_category:
            print("No components found in this category! "
                  + "Available categories:")
            self.show_categories()

        elif len(to_print) < 10:
            for component in to_print:
                print(" " + component)
        else:
            # Prints in columns, maximum 4 and maximum line length line_limit
            columns = 5
            total_line_length = 1000
            while(total_line_length > line_limit):
                columns = columns - 1

                c_length = math.ceil(len(to_print)/columns)
                last_length = len(to_print) - (columns-1)*c_length

                column = []
                longest_name = []
                for col in range(0, columns-1):
                    current_list = to_print[c_length*col:c_length*(col+1)]
                    column.append(current_list)
                    longest_name.append(len(max(current_list, key=len)))

                column.append(to_print[c_length*(columns-1):])
                longest_name.append(len(max(column[columns-1], key=len)))

                total_line_length = 1 + sum(longest_name) + (columns-1)*3

            for line_nr in range(c_length):
                print(" ", end="")
                for col in range(columns-1):
                    this_name = column[col][line_nr]
                    print(this_name
                          + " "*(longest_name[col] - len(this_name))
                          + "   ", end="")  # More columns left, dont break
                if line_nr < last_length:
                    this_name = column[columns-1][line_nr]
                    print(this_name)
                else:
                    print("")

    def load_all_components(self):
        """
        Method that loads information on all components into memory.

        """

        return_dict = {}
        for comp_name, abs_path in self.component_path.items():
            return_dict[comp_name] = self.read_component_file(abs_path)

        return return_dict

    def read_name(self, component_name):
        """
        Returns ComponentInfo of component with name component_name.

        Uses table of absolute paths to all known components, and
        reads the appropriate file in order to generate the information.

        """

        if component_name not in self.component_path:
            raise NameError("No component named "
                            + component_name
                            + " in McStas installation or "
                            + "current work directory.")

        output = self.read_component_file(self.component_path[component_name])

        # Category loaded using path, in case of Work directory it fails
        if self.component_category[component_name] == "work directory":
            output.category = "work directory"  # Corrects category

        return output

    def _find_components(self, absolute_path):
        """
        Recursive read function, can read either file or entire folder

        Updates the component_info_dict with the findings that are
        stored as ComoponentInfo instances.

        """

        if not os.path.isabs(absolute_path):
            raise RuntimeError("_find_components received non absolute path")

        if not os.path.isdir(absolute_path):
            if absolute_path.endswith(".comp"):
                # read this file
                component_name = os.path.split(absolute_path)[1].split(".")[-2]
                self.component_path[component_name] = absolute_path

                head = os.path.split(absolute_path)[0]
                component_category = os.path.split(head)[1]
                self.component_category[component_name] = component_category
        else:
            for file in os.listdir(absolute_path):
                absolute_file_path = os.path.join(absolute_path, file)
                self._find_components(absolute_file_path)

    def read_component_file(self, absolute_path):
        """
        Reads a component file and expands component_info_dict

        The information is stored as ComponentClass instances.
        """

        result = ComponentInfo()

        file_o = open(absolute_path, "r")

        line_number = 0
        while True:
            line_number += 1
            line = file_o.readline()

            if not line:
                # Exit at end of file
                break

            # find parameter comments
            if line.startswith("* %P"):

                while True:
                    this_line = file_o.readline()

                    if this_line.startswith("DEFINE COMPONENT"):
                        # No more comments to read through
                        break

                    if ":" in this_line:
                        tokens = this_line.split(":")

                        variable_name = tokens[0]
                        variable_name = variable_name.replace("*", "")
                        variable_name = variable_name.strip()
                        if " " in variable_name:
                            name_tokens = variable_name.split(" ")
                            variable_name = name_tokens[0]

                        if len(tokens[1]) > 2:
                            comment = tokens[1].strip()

                            if "[" in comment:  # Search for unit
                                # If found, store it and remove from string
                                unit = comment[comment.find("[") + 1:
                                               comment.find("]")]
                                result.parameter_units[variable_name] = unit
                                comment = comment[comment.find("]") + 1:]
                                comment = comment.strip()

                            # Store the comment
                            result.parameter_comments[variable_name] = comment
                    elif "[" in this_line and "]" in this_line:
                        tokens = this_line.split("[")

                        variable_name = tokens[0]
                        variable_name = variable_name.replace("*", "")
                        variable_name = variable_name.strip()

                        unit = this_line[this_line.find("[") + 1:
                                         this_line.find("]")]
                        result.parameter_units[variable_name] = unit

                        comment = this_line[this_line.find("]") + 1:]
                        comment = comment.strip()
                        result.parameter_comments[variable_name] = comment

            # find definition parameters and their values
            if (line.strip().startswith("DEFINITION PARAMETERS")
                    or line.strip().startswith("SETTING PARAMETERS")):

                define_section = line
                while True:
                    line = file_o.readline()

                    end_keywords = ("SHARE", "INITIALIZE", "INITIALISE", "DECLARE", "TRACE", "DEPENDENCY")
                    if line.strip().upper().startswith(end_keywords) or not line:
                        break

                    define_section += line

                clean_define_section = remove_c_comments(define_section)

                # Define the delimiters as a list of strings
                delimiters = ["DEFINITION PARAMETERS", "SETTING PARAMETERS", "OUTPUT PARAMETERS"]

                # Create a regex pattern using alternation and join the delimiters with the '|' symbol
                delimiter_pattern = r'\s*(' + '|'.join(map(re.escape, delimiters)) + r')\s*'

                # Split the text using pattern
                clean_define_sections = re.split(delimiter_pattern, clean_define_section)

                # Extract parameters from definition and settings part
                parameter_section = ""
                for index, section in enumerate(clean_define_sections):
                    if section in ("DEFINITION PARAMETERS", "SETTING PARAMETERS"):
                        parameter_section += clean_define_sections[index + 1].strip("(").strip(")") + ", "

                # Convert parameter section to single line, then split in parts separated by comma
                parameter_section = parameter_section.replace('\n', ' ')
                parameter_parts = parameter_section.split(",")
                # Combine parts that should be together, for example by brackets
                parameter_parts = self.correct_for_brackets(parameter_parts)

                # Each part now corresponds to a parameter to be read
                for part in parameter_parts:

                    temp_par_type = "double"

                    part = part.strip()

                    possible_declare = part.split(" ")
                    possible_type = possible_declare[0].strip()
                    if "int" == possible_type:
                        temp_par_type = "int"
                        # remove int from part
                        part = "".join(possible_declare[1:])
                    if "string" == possible_type:
                        temp_par_type = "string"
                        # remove string from part
                        part = "".join(possible_declare[1:])
                    if "double" == possible_type:
                        temp_par_type = "double"
                        # remove double from part
                        part = "".join(possible_declare[1:])
                    if "vector" == possible_type:
                        temp_par_type = "double"
                        # remove double from part
                        part = "".join(possible_declare[1:])

                    part = part.replace(" ", "")
                    if part == "":
                        continue

                    if "=" not in part:
                        # no default value, required parameter
                        result.parameter_names.append(part)
                        result.parameter_defaults[part] = None
                        result.parameter_types[part] = temp_par_type
                    else:
                        # default value available
                        name_value = part.split("=")
                        par_name = name_value[0].strip()
                        par_value = name_value[1].strip()

                        if temp_par_type == "double":
                            try:
                                par_value = float(par_value)
                            except ValueError:
                                # value could be parameter name
                                par_value = par_value
                        elif temp_par_type == "int":
                            par_value = int(par_value, c_integer_literal_base(par_value))

                        result.parameter_names.append(par_name)
                        result.parameter_defaults[par_name] = par_value
                        result.parameter_types[par_name] = temp_par_type

                # End while loop running through file when parameters are read
                break

        file_o.close()

        result.name = os.path.split(absolute_path)[1].split(".")[-2]

        tail = os.path.split(absolute_path)[0]
        result.category = os.path.split(tail)[1]

        """
        To lower memory use one could remove all comments and units that
        does not correspond to a found parameter name.
        """

        return result

    def correct_for_brackets(self, parameter_parts):
        """
        Given list of string elements, correct for brackets will
        combine terms until curly brackets are balanced, for example:

        ["A", "{B", "C", "D}", "E"] would return ["A", "{B,C,D}", "E"]

        Default values of vectors can be given in such a manner in
        McStas components, and without this each part would be recognized
        as different parameters.
        """
        corrected_parts = []
        index = 0
        while True:

            current_part = parameter_parts[index]
            inner_index = 0
            while True:
                if current_part.count("{") == current_part.count("}"):
                    corrected_parts.append(current_part)
                    index += inner_index
                    break
                else:
                    inner_index += 1
                    current_part += "," + parameter_parts[index+inner_index]

            index += 1

            if index >= len(parameter_parts):
                break

        return corrected_parts