File: model_conf_parser.py

package info (click to toggle)
macsyfinder 2.1.4-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 134,860 kB
  • sloc: python: 20,583; xml: 953; sh: 37; makefile: 16
file content (157 lines) | stat: -rw-r--r-- 6,433 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
#########################################################################
# MacSyFinder - Detection of macromolecular systems in protein dataset  #
#               using systems modelling and similarity search.          #
# Authors: Sophie Abby, Bertrand Neron                                  #
# Copyright (c) 2014-2024  Institut Pasteur (Paris) and CNRS.           #
# See the COPYRIGHT file for details                                    #
#                                                                       #
# This file is part of MacSyFinder package.                             #
#                                                                       #
# MacSyFinder is free software: you can redistribute it and/or modify   #
# it under the terms of the GNU General Public License as published by  #
# the Free Software Foundation, either version 3 of the License, or     #
# (at your option) any later version.                                   #
#                                                                       #
# MacSyFinder is distributed in the hope that it will be useful,        #
# but WITHOUT ANY WARRANTY; without even the implied warranty of        #
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the          #
# GNU General Public License for more details .                         #
#                                                                       #
# You should have received a copy of the GNU General Public License     #
# along with MacSyFinder (COPYING).                                     #
# If not, see <https://www.gnu.org/licenses/>.                          #
#########################################################################

import xml.etree.ElementTree as Et
import logging
from typing import Any, Callable

from macsypy.error import MacsypyError

_log = logging.getLogger(__name__)


class ModelConfParser:
    """
    Handle model_conf.xml configuration file.
    """

    def __init__(self, path: str) -> None:
        """

        :param str path: The path to the configuration file
        """
        self._path = path


    def parse(self) -> dict[str: Any]:
        """
        Parse the xml 'model_conf' file set at the root of a data package

        :return: The specific configuration for a model family
        :rtype: dict with the name of variables as keys and value as values
        """
        model_conf_node = self._get_model_conf_node()
        weights_node = model_conf_node.find("./weights")

        filtering_opt = {}
        weights = {}
        if weights_node:
            weights = self.parse_weights(weights_node)

        filtering_node = model_conf_node.find("./filtering")
        if filtering_node:
            filtering_opt = self.parse_filtering(filtering_node)

        model_conf = {k: v for conf_part in (weights, filtering_opt) for k, v in conf_part.items()}
        return model_conf


    def _get_model_conf_node(self) -> Et.ElementTree:
        """
        Find the root of the document

        :return: the document root of model_conf
        """
        try:
            tree = Et.parse(self._path)
            model_node = tree.getroot()
        except Exception as err:
            msg = f"unable to parse model configuration '{self._path}' : {err}"
            _log.critical(msg)
            raise MacsypyError(msg) from None
        return model_node


    def parse_weights(self, weights_node: Et.ElementTree) -> dict[str: float]:
        """
        Parse the node 'weights' containing the scoring weight configuration

        :param weights_node: the node 'weights'
        :return: the configuration option/value about the scores
        """
        elements = {'itself': float,
                    'exchangeable': float,
                    'mandatory': float,
                    'accessory': float,
                    'neutral': float,
                    'out_of_cluster': float,
                    'redundancy_penalty': float}
        weights_conf = self._parse_section(weights_node, elements)
        # rename options as in the other part of MSF
        weights_conf = {(f"{k}_weight"if k != 'redundancy_penalty' else k): v for k, v in weights_conf.items()}
        return weights_conf


    def parse_filtering(self, filtering_node: Et.ElementTree) -> dict[str: Any]:
        """
        Parse the node 'filtering' containing the filtering options configuration

        :param filtering_node: the node 'filtering'
        :return: the configuration option/value about the filtering
        """
        def parse_cut_ga(value):
            if value.lower() in ('true', 1):
                return True
            elif value.lower() in ('false', 0):
                return False
            else:
                msg = f"cannot parse 'cut_ga' element in '{self._path}' expect True, 1, False, 0 got : '{value}'"
                _log.critical(msg)
                raise MacsypyError(msg)

        elements = {'e_value_search': float,
                    'i_evalue_sel': float,
                    'coverage_profile': float,
                    'cut_ga': parse_cut_ga,
                    }
        fiter_conf = self._parse_section(filtering_node, elements)
        return fiter_conf


    def _parse_section(self, section_node: Et.ElementTree, allowed_elements: dict[str: Callable]) -> dict[str: Any]:
        """
        Parse a node containing configurations options and value

        :param section_node:
        :param allowed_elements: The elements allowed in this section
                                 Only these elements are parsed and in the final dictionnary
        :type allowed_elements: a dict with options name as keys and function to parse the element
        :return: dict
        """
        section = {}
        for child in section_node:
            element = child.tag
            if element in allowed_elements:
                value = child.text
                try:
                    value = allowed_elements[element](value)
                except (TypeError, ValueError) as err:
                    msg = f"The model configuration file '{self._path}' cannot be parsed: {err}"
                    _log.critical(msg)
                    raise MacsypyError(msg) from None
            else:
                _log.warning(f"unknown element '{element}' in '{self._path}' ignore it.")
                continue
            section[element] = value
        return section