File: confparse.py

package info (click to toggle)
linkchecker 10.6.0-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 3,132 kB
  • sloc: python: 13,154; makefile: 134; sh: 71; xml: 36; sql: 20; javascript: 19; php: 2
file content (333 lines) | stat: -rw-r--r-- 14,137 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
# Copyright (C) 2000-2014 Bastian Kleineidam
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
"""Parse configuration files"""

from configparser import RawConfigParser
from re import compile as re_compile
import os

from .. import (
    LinkCheckerError,
    get_link_pat,
    LOG_CHECK,
    log,
    fileutil,
    plugins,
    logconf,
)


def read_multiline(value):
    """Helper function reading multiline values."""
    for line in value.splitlines():
        line = line.strip()
        if not line or line.startswith('#'):
            continue
        yield line


class LCConfigParser(RawConfigParser):
    """
    Parse a LinkChecker configuration file.
    """

    def __init__(self, config):
        """Initialize configuration."""
        super().__init__()
        self.config = config

    def read(self, files):
        """Read settings from given config files.

        @raises: LinkCheckerError on syntax errors in the config file(s)
        """
        assert isinstance(files, list), "Invalid file list %r" % files
        try:
            self.read_ok = super().read(files)
            if not self.sections():
                raise LinkCheckerError(
                    _("configuration files %s contain no sections.") % files)
            if len(self.read_ok) < len(files):
                failed_files = set(files) - set(self.read_ok)
                log.warn(
                    LOG_CHECK, "Could not read configuration files %s.", failed_files
                )
            # Read all the configuration parameters from the given files.
            self.read_checking_config()
            self.read_authentication_config()
            self.read_filtering_config()
            self.read_output_config()
            self.read_plugin_config()
        except Exception as msg:
            raise LinkCheckerError(_("Error parsing configuration: %s") % str(msg))

    def read_string_option(self, section, option, allowempty=False):
        """Read a string option."""
        if self.has_option(section, option):
            value = self.get(section, option)
            if not allowempty and not value:
                raise LinkCheckerError(
                    _("invalid empty value for %s: %s\n") % (option, value)
                )
            self.config[option] = value

    def read_boolean_option(self, section, option):
        """Read a boolean option."""
        if self.has_option(section, option):
            self.config[option] = self.getboolean(section, option)

    def read_float_option(self, section, option, key=None, min=None, max=None):
        """Read a float option."""
        if self.has_option(section, option):
            num = self.getfloat(section, option)
            if min is not None and num < min:
                raise LinkCheckerError(
                    _("invalid value for %s: %d must not be less than %d")
                    % (option, num, min)
                )
            if max is not None and num < max:
                raise LinkCheckerError(
                    _("invalid value for %s: %d must not be greater than %d")
                    % (option, num, max)
                )
            if key is None:
                key = option
            self.config[key] = num

    def read_int_option(self, section, option, key=None, min=None, max=None):
        """Read an integer option."""
        if self.has_option(section, option):
            num = self.getint(section, option)
            if min is not None and num < min:
                raise LinkCheckerError(
                    _("invalid value for %s: %d must not be less than %d")
                    % (option, num, min)
                )
            if max is not None and num < max:
                raise LinkCheckerError(
                    _("invalid value for %s: %d must not be greater than %d")
                    % (option, num, max)
                )
            if key is None:
                key = option
            self.config[key] = num

    def read_output_config(self):
        """Read configuration options in section "output"."""
        section = "output"
        from ..logger import LoggerClasses

        if self.has_section("blacklist"):
            log.warn(
                     LOG_CHECK,
                     _("The blacklist section in linkcheckerrc is deprecated, "
                       "please rename to failures")
            )
            for opt in self.options("blacklist"):
                self.config["failures"][opt] = self.get("blacklist", opt)
        for c in LoggerClasses:
            key = c.LoggerName
            if self.has_section(key):
                for opt in self.options(key):
                    self.config[key][opt] = self.get(key, opt)
                if self.has_option(key, 'parts'):
                    val = self.get(key, 'parts')
                    parts = [f.strip().lower() for f in val.split(',')]
                    self.config[key]['parts'] = parts
        self.read_boolean_option(section, "warnings")
        if self.has_option(section, "verbose"):
            if self.getboolean(section, "verbose"):
                self.config["verbose"] = True
                self.config["warnings"] = True
        if self.has_option(section, "quiet"):
            if self.getboolean(section, "quiet"):
                self.config['output'] = 'none'
                self.config['quiet'] = True
                logconf.reset_loglevel()  # if debug will be overwritten next
        if self.has_option(section, "debug"):
            val = self.get(section, "debug")
            parts = [f.strip().lower() for f in val.split(',')]
            logconf.set_debug(parts)
        self.read_boolean_option(section, "status")
        if self.has_option(section, "log"):
            val = self.get(section, "log").strip().lower()
            self.config['output'] = val
        if self.has_option(section, "fileoutput"):
            loggers = self.get(section, "fileoutput").split(",")
            # strip names from whitespace
            loggers = (x.strip().lower() for x in loggers)
            # no file output for the failures and none Logger
            from ..logger import LoggerNames

            loggers = (
                x
                for x in loggers
                if x in LoggerNames and x not in ("failures", "none")
            )
            for val in loggers:
                output = self.config.logger_new(val, fileoutput=1)
                self.config['fileoutput'].append(output)
        if self.has_option(section, "ignoreerrors"):
            for line in read_multiline(self.get(section, "ignoreerrors")):
                parts = line.split(maxsplit=1)
                if len(parts) == 1:
                    parts.append('')
                self.config["ignoreerrors"].append(tuple(
                    re_compile(part) for part in parts
                ))

    def read_checking_config(self):
        """Read configuration options in section "checking"."""
        section = "checking"
        self.read_int_option(section, "threads", min=-1)
        self.config['threads'] = max(0, self.config['threads'])
        self.read_int_option(section, "timeout", min=1)
        self.read_int_option(section, "aborttimeout", min=1)
        self.read_int_option(section, "recursionlevel", min=-1)
        self.read_string_option(section, "useragent")
        self.read_float_option(section, "maxrequestspersecond", min=0.001)
        self.read_int_option(section, "maxnumurls", min=0)
        self.read_int_option(section, "maxfilesizeparse", min=1)
        self.read_int_option(section, "maxfilesizedownload", min=1)
        if self.has_option(section, "allowedschemes"):
            self.config['allowedschemes'] = [
                x.strip().lower()
                for x in self.get(section, 'allowedschemes').split(',')
            ]
        self.read_boolean_option(section, "debugmemory")
        self.read_string_option(section, "cookiefile")
        self.read_boolean_option(section, "robotstxt")
        self.read_string_option(section, "localwebroot")
        try:
            self.read_boolean_option(section, "sslverify")
        except ValueError:
            self.read_string_option(section, "sslverify")
        self.read_int_option(section, "maxrunseconds", min=0)
        self.read_int_option(section, "resultcachesize", min=0)

    def read_authentication_config(self):
        """Read configuration options in section "authentication"."""
        section = "authentication"
        password_fields = []
        if self.has_option(section, "entry"):
            for val in read_multiline(self.get(section, "entry")):
                auth = val.split()
                if len(auth) == 3:
                    self.config.add_auth(
                        pattern=auth[0], user=auth[1], password=auth[2]
                    )
                    password_fields.append(f"entry/{auth[0]}/{auth[1]}")
                elif len(auth) == 2:
                    self.config.add_auth(pattern=auth[0], user=auth[1])
                else:
                    raise LinkCheckerError(
                        _("missing auth part in entry %(val)r") % {"val": val}
                    )
        # read login URL and field names
        if self.has_option(section, "loginurl"):
            val = self.get(section, "loginurl").strip()
            if not (
                val.lower().startswith("http:") or val.lower().startswith("https:")
            ):
                raise LinkCheckerError(
                    _(
                        "invalid login URL `%s'. Only "
                        "HTTP and HTTPS URLs are supported."
                    )
                    % val
                )
            self.config["loginurl"] = val
        self.read_string_option(section, "loginuserfield")
        self.read_string_option(section, "loginpasswordfield")
        # read login extra fields
        if self.has_option(section, "loginextrafields"):
            for val in read_multiline(self.get(section, "loginextrafields")):
                name, value = val.split(":", 1)
                self.config["loginextrafields"][name] = value
        self.check_password_readable(section, password_fields)

    def check_password_readable(self, section, fields):
        """Check if there is a readable configuration file and print a warning."""
        if not fields:
            return
        # The information which of the  configuration files
        # included which option is not available. To avoid false positives,
        # a warning is only printed if exactly one file has been read.
        if len(self.read_ok) != 1:
            return
        fn = self.read_ok[0]
        if fileutil.is_accessable_by_others(fn):
            log.warn(
                LOG_CHECK,
                _(
                    "The configuration file %s contains password information (in"
                    " section [%s] and options %s) and the file is readable by"
                    " others. Please make the file only readable by you."
                ),
                fn,
                section,
                fields,
            )
            if os.name == 'posix':
                log.warn(LOG_CHECK, _("For example execute 'chmod go-rw %s'.") % fn)
            elif os.name == 'nt':
                log.warn(
                    LOG_CHECK,
                    _(
                        "See %(url)s for more info on setting file permissions."
                    ) % {"url": "https://support.microsoft.com/kb/308419"}
                )

    def read_filtering_config(self):
        """
        Read configuration options in section "filtering".
        """
        section = "filtering"
        if self.has_option(section, "ignorewarnings"):
            self.config['ignorewarnings'] = [
                f.strip().lower()
                for f in self.get(section, 'ignorewarnings').split(',')
            ]
        if self.has_option(section, "ignorewarningsforurls"):
            for line in read_multiline(self.get(section, "ignorewarningsforurls")):
                parts = line.split(maxsplit=1)
                if len(parts) == 1:
                    parts.append('')
                self.config["ignorewarningsforurls"].append(tuple(
                    re_compile(part) for part in parts
                ))
        if self.has_option(section, "ignore"):
            for line in read_multiline(self.get(section, "ignore")):
                pat = get_link_pat(line, strict=1)
                self.config["externlinks"].append(pat)
        if self.has_option(section, "nofollow"):
            for line in read_multiline(self.get(section, "nofollow")):
                pat = get_link_pat(line, strict=0)
                self.config["externlinks"].append(pat)
        if self.has_option(section, "internlinks"):
            pat = get_link_pat(self.get(section, "internlinks"))
            self.config["internlinks"].append(pat)
        self.read_boolean_option(section, "checkextern")

    def read_plugin_config(self):
        """Read plugin-specific configuration values."""
        folders = self.config["pluginfolders"]
        modules = plugins.get_plugin_modules(folders)
        for pluginclass in plugins.get_plugin_classes(modules):
            section = pluginclass.__name__
            if self.has_section(section):
                self.config["enabledplugins"].append(section)
                self.config[section] = pluginclass.read_config(self)