File: files.py

package info (click to toggle)
python-mkdocs 1.6.1%2Bdfsg1-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 7,812 kB
  • sloc: python: 14,346; javascript: 10,535; perl: 143; sh: 57; makefile: 30; xml: 11
file content (626 lines) | stat: -rw-r--r-- 23,569 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
from __future__ import annotations

import enum
import fnmatch
import logging
import os
import posixpath
import shutil
import warnings
from functools import cached_property
from pathlib import PurePath, PurePosixPath
from typing import TYPE_CHECKING, Callable, Iterable, Iterator, Mapping, Sequence, overload
from urllib.parse import quote as urlquote

import pathspec
import pathspec.gitignore
import pathspec.util

from mkdocs import utils

if TYPE_CHECKING:
    import jinja2.environment

    from mkdocs.config.defaults import MkDocsConfig
    from mkdocs.structure.pages import Page


log = logging.getLogger(__name__)


class InclusionLevel(enum.Enum):
    EXCLUDED = -3
    """The file is excluded and will not be processed."""
    DRAFT = -2
    """The file is excluded from the final site, but will still be populated during `mkdocs serve`."""
    NOT_IN_NAV = -1
    """The file is part of the site, but doesn't produce nav warnings."""
    UNDEFINED = 0
    """Still needs to be computed based on the config. If the config doesn't kick in, acts the same as `included`."""
    INCLUDED = 1
    """The file is part of the site. Documentation pages that are omitted from the nav will produce warnings."""

    def all(self):
        return True

    def is_included(self):
        return self.value > self.DRAFT.value

    def is_excluded(self):
        return self.value <= self.DRAFT.value

    def is_in_serve(self):
        return self.value >= self.DRAFT.value

    def is_in_nav(self):
        return self.value > self.NOT_IN_NAV.value

    def is_not_in_nav(self):
        return self.value <= self.NOT_IN_NAV.value


class Files:
    """A collection of [File][mkdocs.structure.files.File] objects."""

    def __init__(self, files: Iterable[File]) -> None:
        self._src_uris = {f.src_uri: f for f in files}

    def __iter__(self) -> Iterator[File]:
        """Iterate over the files within."""
        return iter(self._src_uris.values())

    def __len__(self) -> int:
        """The number of files within."""
        return len(self._src_uris)

    def __contains__(self, path: str) -> bool:
        """Soft-deprecated, prefer `get_file_from_path(path) is not None`."""
        return PurePath(path).as_posix() in self._src_uris

    @property
    def src_paths(self) -> dict[str, File]:
        """Soft-deprecated, prefer `src_uris`."""
        return {file.src_path: file for file in self}

    @property
    def src_uris(self) -> Mapping[str, File]:
        """
        A mapping containing every file, with the keys being their
        [`src_uri`][mkdocs.structure.files.File.src_uri].
        """
        return self._src_uris

    def get_file_from_path(self, path: str) -> File | None:
        """Return a File instance with File.src_uri equal to path."""
        return self._src_uris.get(PurePath(path).as_posix())

    def append(self, file: File) -> None:
        """Add file to the Files collection."""
        if file.src_uri in self._src_uris:
            warnings.warn(
                "To replace an existing file, call `remove` before `append`.", DeprecationWarning
            )
            del self._src_uris[file.src_uri]
        self._src_uris[file.src_uri] = file

    def remove(self, file: File) -> None:
        """Remove file from Files collection."""
        try:
            del self._src_uris[file.src_uri]
        except KeyError:
            raise ValueError(f'{file.src_uri!r} not in collection')

    def copy_static_files(
        self,
        dirty: bool = False,
        *,
        inclusion: Callable[[InclusionLevel], bool] = InclusionLevel.is_included,
    ) -> None:
        """Copy static files from source to destination."""
        for file in self:
            if not file.is_documentation_page() and inclusion(file.inclusion):
                file.copy_file(dirty)

    def documentation_pages(
        self, *, inclusion: Callable[[InclusionLevel], bool] = InclusionLevel.is_included
    ) -> Sequence[File]:
        """Return iterable of all Markdown page file objects."""
        return [file for file in self if file.is_documentation_page() and inclusion(file.inclusion)]

    def static_pages(self) -> Sequence[File]:
        """Return iterable of all static page file objects."""
        return [file for file in self if file.is_static_page()]

    def media_files(self) -> Sequence[File]:
        """Return iterable of all file objects which are not documentation or static pages."""
        return [file for file in self if file.is_media_file()]

    def javascript_files(self) -> Sequence[File]:
        """Return iterable of all javascript file objects."""
        return [file for file in self if file.is_javascript()]

    def css_files(self) -> Sequence[File]:
        """Return iterable of all CSS file objects."""
        return [file for file in self if file.is_css()]

    def add_files_from_theme(self, env: jinja2.Environment, config: MkDocsConfig) -> None:
        """Retrieve static files from Jinja environment and add to collection."""

        def filter(name):
            # '.*' filters dot files/dirs at root level whereas '*/.*' filters nested levels
            patterns = ['.*', '*/.*', '*.py', '*.pyc', '*.html', '*readme*', 'mkdocs_theme.yml']
            # Exclude translation files
            patterns.append("locales/*")
            patterns.extend(f'*{x}' for x in utils.markdown_extensions)
            patterns.extend(config.theme.static_templates)
            for pattern in patterns:
                if fnmatch.fnmatch(name.lower(), pattern):
                    return False
            return True

        for path in env.list_templates(filter_func=filter):
            # Theme files do not override docs_dir files
            if self.get_file_from_path(path) is None:
                for dir in config.theme.dirs:
                    # Find the first theme dir which contains path
                    if os.path.isfile(os.path.join(dir, path)):
                        self.append(File(path, dir, config.site_dir, config.use_directory_urls))
                        break

    @property
    def _files(self) -> Iterable[File]:
        warnings.warn("Do not access Files._files.", DeprecationWarning)
        return self

    @_files.setter
    def _files(self, value: Iterable[File]):
        warnings.warn("Do not access Files._files.", DeprecationWarning)
        self._src_uris = {f.src_uri: f for f in value}


class File:
    """
    A MkDocs File object.

    It represents how the contents of one file should be populated in the destination site.

    A file always has its `abs_dest_path` (obtained by joining `dest_dir` and `dest_path`),
    where the `dest_dir` is understood to be the *site* directory.

    `content_bytes`/`content_string` (new in MkDocs 1.6) can always be used to obtain the file's
    content. But it may be backed by one of the two sources:

    *   A physical source file at `abs_src_path` (by default obtained by joining `src_dir` and
        `src_uri`). `src_dir` is understood to be the *docs* directory.

        Then `content_bytes`/`content_string` will read the file at `abs_src_path`.

        `src_dir` *should* be populated for real files and should be `None` for generated files.

    *   Since MkDocs 1.6 a file may alternatively be stored in memory - `content_string`/`content_bytes`.

        Then `src_dir` and `abs_src_path` will remain `None`. `content_bytes`/`content_string` need
        to be written to, or populated through the `content` argument in the constructor.

        But `src_uri` is still populated for such files as well! The virtual file pretends as if it
        originated from that path in the `docs` directory, and other values are derived.

    For static files the file is just copied to the destination, and `dest_uri` equals `src_uri`.

    For Markdown files (determined by the file extension in `src_uri`) the destination content
    will be the rendered content, and `dest_uri` will have the `.html` extension and some
    additional transformations to the path, based on `use_directory_urls`.
    """

    src_uri: str
    """The pure path (always '/'-separated) of the source file relative to the source directory."""

    use_directory_urls: bool
    """Whether directory URLs ('foo/') should be used or not ('foo.html').

    If `False`, a Markdown file is mapped to an HTML file of the same name (the file extension is
    changed to `.html`). If True, a Markdown file is mapped to an HTML index file (`index.html`)
    nested in a directory using the "name" of the file in `path`. Non-Markdown files retain their
    original path.
    """

    src_dir: str | None
    """The OS path of the top-level directory that the source file originates from.

    Assumed to be the *docs_dir*; not populated for generated files."""

    dest_dir: str
    """The OS path of the destination directory (top-level site_dir) that the file should be copied to."""

    inclusion: InclusionLevel = InclusionLevel.UNDEFINED
    """Whether the file will be excluded from the built site."""

    generated_by: str | None = None
    """If not None, indicates that a plugin generated this file on the fly.

    The value is the plugin's entrypoint name and can be used to find the plugin by key in the PluginCollection."""

    _content: str | bytes | None = None
    """If set, the file's content will be read from here.

    This logic is handled by `content_bytes`/`content_string`, which should be used instead of
    accessing this attribute."""

    @property
    def src_path(self) -> str:
        """Same as `src_uri` (and synchronized with it) but will use backslashes on Windows. Discouraged."""
        return os.path.normpath(self.src_uri)

    @src_path.setter
    def src_path(self, value: str):
        self.src_uri = PurePath(value).as_posix()

    @property
    def dest_path(self) -> str:
        """Same as `dest_uri` (and synchronized with it) but will use backslashes on Windows. Discouraged."""
        return os.path.normpath(self.dest_uri)

    @dest_path.setter
    def dest_path(self, value: str):
        self.dest_uri = PurePath(value).as_posix()

    page: Page | None = None

    @overload
    @classmethod
    def generated(
        cls,
        config: MkDocsConfig,
        src_uri: str,
        *,
        content: str | bytes,
        inclusion: InclusionLevel = InclusionLevel.UNDEFINED,
    ) -> File:
        """
        Create a virtual file backed by in-memory content.

        It will pretend to be a file in the docs dir at `src_uri`.
        """

    @overload
    @classmethod
    def generated(
        cls,
        config: MkDocsConfig,
        src_uri: str,
        *,
        abs_src_path: str,
        inclusion: InclusionLevel = InclusionLevel.UNDEFINED,
    ) -> File:
        """
        Create a virtual file backed by a physical temporary file at `abs_src_path`.

        It will pretend to be a file in the docs dir at `src_uri`.
        """

    @classmethod
    def generated(
        cls,
        config: MkDocsConfig,
        src_uri: str,
        *,
        content: str | bytes | None = None,
        abs_src_path: str | None = None,
        inclusion: InclusionLevel = InclusionLevel.UNDEFINED,
    ) -> File:
        """
        Create a virtual file, backed either by in-memory `content` or by a file at `abs_src_path`.

        It will pretend to be a file in the docs dir at `src_uri`.
        """
        if (content is None) == (abs_src_path is None):
            raise TypeError("File must have exactly one of 'content' or 'abs_src_path'")
        f = cls(
            src_uri,
            src_dir=None,
            dest_dir=config.site_dir,
            use_directory_urls=config.use_directory_urls,
            inclusion=inclusion,
        )
        f.generated_by = config.plugins._current_plugin or '<unknown>'
        f.abs_src_path = abs_src_path
        f._content = content
        return f

    def __init__(
        self,
        path: str,
        src_dir: str | None,
        dest_dir: str,
        use_directory_urls: bool,
        *,
        dest_uri: str | None = None,
        inclusion: InclusionLevel = InclusionLevel.UNDEFINED,
    ) -> None:
        self.src_path = path
        self.src_dir = src_dir
        self.dest_dir = dest_dir
        self.use_directory_urls = use_directory_urls
        if dest_uri is not None:
            self.dest_uri = dest_uri
        self.inclusion = inclusion

    def __repr__(self):
        return (
            f"{type(self).__name__}({self.src_uri!r}, src_dir={self.src_dir!r}, "
            f"dest_dir={self.dest_dir!r}, use_directory_urls={self.use_directory_urls!r}, "
            f"dest_uri={self.dest_uri!r}, inclusion={self.inclusion})"
        )

    @utils.weak_property
    def edit_uri(self) -> str | None:
        """
        A path relative to the source repository to use for the "edit" button.

        Defaults to `src_uri` and can be overwritten.
        For generated files this should be set to `None`.
        """
        return self.src_uri if self.generated_by is None else None

    def _get_stem(self) -> str:
        """Soft-deprecated, do not use."""
        filename = posixpath.basename(self.src_uri)
        stem, ext = posixpath.splitext(filename)
        return 'index' if stem == 'README' else stem

    name = cached_property(_get_stem)
    """Return the name of the file without its extension."""

    def _get_dest_path(self, use_directory_urls: bool | None = None) -> str:
        """Soft-deprecated, do not use."""
        if self.is_documentation_page():
            parent, filename = posixpath.split(self.src_uri)
            if use_directory_urls is None:
                use_directory_urls = self.use_directory_urls
            if not use_directory_urls or self.name == 'index':
                # index.md or README.md => index.html
                # foo.md => foo.html
                return posixpath.join(parent, self.name + '.html')
            else:
                # foo.md => foo/index.html
                return posixpath.join(parent, self.name, 'index.html')
        return self.src_uri

    dest_uri = cached_property(_get_dest_path)
    """The pure path (always '/'-separated) of the destination file relative to the destination directory."""

    def _get_url(self, use_directory_urls: bool | None = None) -> str:
        """Soft-deprecated, do not use."""
        url = self.dest_uri
        dirname, filename = posixpath.split(url)
        if use_directory_urls is None:
            use_directory_urls = self.use_directory_urls
        if use_directory_urls and filename == 'index.html':
            url = (dirname or '.') + '/'
        return urlquote(url)

    url = cached_property(_get_url)
    """The URI of the destination file relative to the destination directory as a string."""

    @cached_property
    def abs_src_path(self) -> str | None:
        """
        The absolute concrete path of the source file. Will use backslashes on Windows.

        Note: do not use this path to read the file, prefer `content_bytes`/`content_string`.
        """
        if self.src_dir is None:
            return None
        return os.path.normpath(os.path.join(self.src_dir, self.src_uri))

    @cached_property
    def abs_dest_path(self) -> str:
        """The absolute concrete path of the destination file. Will use backslashes on Windows."""
        return os.path.normpath(os.path.join(self.dest_dir, self.dest_uri))

    def url_relative_to(self, other: File | str) -> str:
        """Return url for file relative to other file."""
        return utils.get_relative_url(self.url, other.url if isinstance(other, File) else other)

    @property
    def content_bytes(self) -> bytes:
        """
        Get the content of this file as a bytestring.

        May raise if backed by a real file (`abs_src_path`) if it cannot be read.

        If used as a setter, it defines the content of the file, and `abs_src_path` becomes unset.
        """
        content = self._content
        if content is None:
            assert self.abs_src_path is not None
            with open(self.abs_src_path, 'rb') as f:
                return f.read()
        if not isinstance(content, bytes):
            content = content.encode()
        return content

    @content_bytes.setter
    def content_bytes(self, value: bytes):
        assert isinstance(value, bytes)
        self._content = value
        self.abs_src_path = None

    @property
    def content_string(self) -> str:
        """
        Get the content of this file as a string. Assumes UTF-8 encoding, may raise.

        May also raise if backed by a real file (`abs_src_path`) if it cannot be read.

        If used as a setter, it defines the content of the file, and `abs_src_path` becomes unset.
        """
        content = self._content
        if content is None:
            assert self.abs_src_path is not None
            with open(self.abs_src_path, encoding='utf-8-sig', errors='strict') as f:
                return f.read()
        if not isinstance(content, str):
            content = content.decode('utf-8-sig', errors='strict')
        return content

    @content_string.setter
    def content_string(self, value: str):
        assert isinstance(value, str)
        self._content = value
        self.abs_src_path = None

    def copy_file(self, dirty: bool = False) -> None:
        """Copy source file to destination, ensuring parent directories exist."""
        if dirty and not self.is_modified():
            log.debug(f"Skip copying unmodified file: '{self.src_uri}'")
            return
        log.debug(f"Copying media file: '{self.src_uri}'")
        output_path = self.abs_dest_path
        os.makedirs(os.path.dirname(output_path), exist_ok=True)
        content = self._content
        if content is None:
            assert self.abs_src_path is not None
            try:
                utils.copy_file(self.abs_src_path, output_path)
            except shutil.SameFileError:
                pass  # Let plugins write directly into site_dir.
        elif isinstance(content, str):
            with open(output_path, 'w', encoding='utf-8') as output_file:
                output_file.write(content)
        else:
            with open(output_path, 'wb') as output_file:
                output_file.write(content)

    def is_modified(self) -> bool:
        if self._content is not None:
            return True
        assert self.abs_src_path is not None
        if os.path.isfile(self.abs_dest_path):
            return os.path.getmtime(self.abs_dest_path) < os.path.getmtime(self.abs_src_path)
        return True

    def is_documentation_page(self) -> bool:
        """Return True if file is a Markdown page."""
        return utils.is_markdown_file(self.src_uri)

    def is_static_page(self) -> bool:
        """Return True if file is a static page (HTML, XML, JSON)."""
        return self.src_uri.endswith(('.html', '.htm', '.xml', '.json'))

    def is_media_file(self) -> bool:
        """Return True if file is not a documentation or static page."""
        return not (self.is_documentation_page() or self.is_static_page())

    def is_javascript(self) -> bool:
        """Return True if file is a JavaScript file."""
        return self.src_uri.endswith(('.js', '.javascript', '.mjs'))

    def is_css(self) -> bool:
        """Return True if file is a CSS file."""
        return self.src_uri.endswith('.css')


_default_exclude = pathspec.gitignore.GitIgnoreSpec.from_lines(['.*', '/templates/'])


def set_exclusions(files: Iterable[File], config: MkDocsConfig) -> None:
    """Re-calculate which files are excluded, based on the patterns in the config."""
    exclude: pathspec.gitignore.GitIgnoreSpec | None = config.get('exclude_docs')
    exclude = _default_exclude + exclude if exclude else _default_exclude
    drafts: pathspec.gitignore.GitIgnoreSpec | None = config.get('draft_docs')
    nav_exclude: pathspec.gitignore.GitIgnoreSpec | None = config.get('not_in_nav')

    for file in files:
        if file.inclusion == InclusionLevel.UNDEFINED:
            if exclude.match_file(file.src_uri):
                file.inclusion = InclusionLevel.EXCLUDED
            elif drafts and drafts.match_file(file.src_uri):
                file.inclusion = InclusionLevel.DRAFT
            elif nav_exclude and nav_exclude.match_file(file.src_uri):
                file.inclusion = InclusionLevel.NOT_IN_NAV
            else:
                file.inclusion = InclusionLevel.INCLUDED


def get_files(config: MkDocsConfig) -> Files:
    """Walk the `docs_dir` and return a Files collection."""
    files: list[File] = []
    conflicting_files: list[tuple[File, File]] = []
    for source_dir, dirnames, filenames in os.walk(config['docs_dir'], followlinks=True):
        relative_dir = os.path.relpath(source_dir, config['docs_dir'])
        dirnames.sort()
        filenames.sort(key=_file_sort_key)

        files_by_dest: dict[str, File] = {}
        for filename in filenames:
            file = File(
                os.path.join(relative_dir, filename),
                config['docs_dir'],
                config['site_dir'],
                config['use_directory_urls'],
            )
            # Skip README.md if an index file also exists in dir (part 1)
            prev_file = files_by_dest.setdefault(file.dest_uri, file)
            if prev_file is not file:
                conflicting_files.append((prev_file, file))
            files.append(file)
            prev_file = file

    set_exclusions(files, config)
    # Skip README.md if an index file also exists in dir (part 2)
    for a, b in conflicting_files:
        if b.inclusion.is_included():
            if a.inclusion.is_included():
                log.warning(
                    f"Excluding '{a.src_uri}' from the site because it conflicts with '{b.src_uri}'."
                )
            try:
                files.remove(a)
            except ValueError:
                pass  # Catching this to avoid errors if attempting to remove the same file twice.
        else:
            try:
                files.remove(b)
            except ValueError:
                pass

    return Files(files)


def file_sort_key(f: File, /):
    """
    Replicates the sort order how `get_files` produces it - index first, directories last.

    To sort a list of `File`, pass as the `key` argument to `sort`.
    """
    parts = PurePosixPath(f.src_uri).parts
    if not parts:
        return ()
    return (parts[:-1], f.name != "index", parts[-1])


def _file_sort_key(f: str):
    """Always sort `index` or `README` as first filename in list. This works only on basenames of files."""
    return (os.path.splitext(f)[0] not in ('index', 'README'), f)


def _sort_files(filenames: Iterable[str]) -> list[str]:
    """Soft-deprecated, do not use."""
    return sorted(filenames, key=_file_sort_key)


def _filter_paths(basename: str, path: str, is_dir: bool, exclude: Iterable[str]) -> bool:
    warnings.warn(
        "_filter_paths is not used since MkDocs 1.5 and will be removed soon.", DeprecationWarning
    )
    for item in exclude:
        # Items ending in '/' apply only to directories.
        if item.endswith('/') and not is_dir:
            continue
        # Items starting with '/' apply to the whole path.
        # In any other cases just the basename is used.
        match = path if item.startswith('/') else basename
        if fnmatch.fnmatch(match, item.strip('/')):
            return True
    return False