File: _dataset_loader.py

package info (click to toggle)
python-pyvista 0.44.1-11
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 159,804 kB
  • sloc: python: 72,164; sh: 118; makefile: 68
file content (840 lines) | stat: -rw-r--r-- 30,281 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
"""Abstraction layer for downloading, reading, and loading dataset files.

The classes and methods in this module define an API for working with either
a single file or multiple files which may be downloaded and/or loaded as an
example dataset.

Many datasets have a straightforward input to output mapping:
    file -> read -> dataset

However, some file formats require multiple input files for reading (e.g.
separate data and header files):
    (file1, file1) -> read -> dataset

Or, a dataset may be combination of two separate datasets:
    file1 -> read -> dataset1 ┬─> combined_dataset
    file2 -> read -> dataset2 ┘

In some cases, the input may be a folder instead of a file (e.g. DICOM):
    folder -> read -> dataset

In addition, there may be a need to customize the reading function to read
files with specific options enabled (e.g. set a time value), or perform
post-read processing to modify the dataset (e.g. set active scalars).

This module aims to serve these use cases and provide a flexible way of
downloading, reading, and processing files with a generic mapping:
    file or files or folder -> fully processed dataset(s) in any form

"""

# ruff: noqa: PTH102,PTH103,PTH107,PTH112,PTH113,PTH117,PTH118,PTH119,PTH122,PTH123,PTH202
from __future__ import annotations

from abc import abstractmethod
import functools
import os
from pathlib import Path
from typing import TYPE_CHECKING
from typing import Any
from typing import Generic
from typing import Protocol
from typing import Sequence
from typing import Tuple
from typing import Type
from typing import TypeVar
from typing import Union
from typing import cast
from typing import final
from typing import runtime_checkable

import pyvista as pv
from pyvista.core._typing_core import NumpyArray
from pyvista.core.utilities.fileio import get_ext

if TYPE_CHECKING:
    from collections.abc import Callable

# Define TypeVars for two main class definitions used by this module:
#   1. classes for single file inputs: T -> T
#   2. classes for multi-file inputs: (T, ...) -> (T, ...)
# Any properties with these typevars should have a one-to-one mapping for all files
_FilePropStrType_co = TypeVar(
    '_FilePropStrType_co',
    str,
    Tuple[str, ...],
    covariant=True,
)
_FilePropIntType_co = TypeVar(
    '_FilePropIntType_co',
    int,
    Tuple[int, ...],
    covariant=True,
)

DatasetObject = Union[pv.DataSet, pv.Texture, NumpyArray[Any], pv.MultiBlock]
DatasetType = Union[
    Type[pv.DataSet],
    Type[pv.Texture],
    Type[NumpyArray[Any]],
    Type[pv.MultiBlock],
]


class _BaseFilePropsProtocol(Generic[_FilePropStrType_co, _FilePropIntType_co]):
    @property
    @abstractmethod
    def path(self) -> _FilePropStrType_co:
        """Return the path(s) of all files."""

    @property
    def num_files(self) -> int:
        """Return the number of files from path or paths.

        If a path is a folder, the number of files contained in the folder is returned.
        """
        path = self.path
        paths = [path] if isinstance(path, str) else path
        return sum(1 if os.path.isfile(p) else len(_get_all_nested_filepaths(p)) for p in paths)

    @property
    def unique_extension(self) -> str | tuple[str, ...]:
        """Return the unique file extension(s) from all files."""
        return _get_unique_extension(self.path)

    @property
    @abstractmethod
    def _filesize_bytes(self) -> _FilePropIntType_co:
        """Return the file size(s) of all files in bytes."""

    @property
    @abstractmethod
    def _filesize_format(self) -> _FilePropStrType_co:
        """Return the formatted size of all file(s)."""

    @property
    @abstractmethod
    def _total_size_bytes(self) -> int:
        """Return the total size of all files in bytes."""

    @property
    @abstractmethod
    def total_size(self) -> str:
        """Return the total size of all files formatted as a string."""

    @property
    @abstractmethod
    def _reader(
        self,
    ) -> pv.BaseReader | tuple[pv.BaseReader | None, ...] | None:
        """Return the base file reader(s) used to read the files."""

    @property
    def unique_reader_type(
        self,
    ) -> type[pv.BaseReader] | tuple[type[pv.BaseReader], ...] | None:
        """Return unique reader type(s) from all file readers."""
        return _get_unique_reader_type(self._reader)


class _SingleFilePropsProtocol(_BaseFilePropsProtocol[str, int]):
    """Define file properties of a single file."""


class _MultiFilePropsProtocol(
    _BaseFilePropsProtocol[Tuple[str, ...], Tuple[int, ...]],
):
    """Define file properties of multiple files."""


@runtime_checkable
class _Downloadable(Protocol[_FilePropStrType_co]):
    """Class which downloads file(s) from a source."""

    @property
    @abstractmethod
    def source_name(self) -> _FilePropStrType_co:
        """Return the name of the download relative to the base url."""

    @property
    @abstractmethod
    def base_url(self) -> _FilePropStrType_co:
        """Return the base url of the download."""

    @property
    def source_url_raw(self) -> _FilePropStrType_co:
        """Return the raw source of the download.

        This is the full URL used to download the data directly.
        """
        name = self.source_name
        name_iter = [name] if isinstance(name, str) else name
        url = self.base_url
        base_url_iter = [url] if isinstance(url, str) else url
        url_raw = [os.path.join(base_url, name) for base_url, name in zip(base_url_iter, name_iter)]
        return url_raw[0] if isinstance(name, str) else tuple(url_raw)

    @property
    def source_url_blob(self) -> _FilePropStrType_co:
        """Return the blob source of the download.

        This URL is useful for linking to the source webpage for
        a human to open on a browser.
        """
        # Make single urls iterable and replace 'raw' with 'blob'
        url_raw = self.source_url_raw
        url_iter = [url_raw] if isinstance(url_raw, str) else url_raw
        url_blob = [url.replace('/raw/', '/blob/') for url in url_iter]
        return url_blob[0] if isinstance(url_raw, str) else tuple(url_blob)

    @property
    @abstractmethod
    def path(self) -> _FilePropStrType_co:
        """Return the file path of downloaded file."""

    @abstractmethod
    def download(self) -> _FilePropStrType_co:
        """Download and return the file path(s)."""


class _DatasetLoader:
    """Load a dataset."""

    def __init__(self, load_func: Callable[..., DatasetObject]):
        self._load_func = load_func
        self._dataset: DatasetObject | None = None

    @property
    @final
    def dataset(self) -> DatasetObject | None:
        """Return the loaded dataset object(s)."""
        return self._dataset

    def load(self, *args, **kwargs) -> DatasetObject:
        """Load and return the dataset."""
        # Subclasses should override this as needed
        return self._load_func(*args, **kwargs)

    @final
    def load_and_store_dataset(self) -> DatasetObject:
        """Load the dataset and store it."""
        dataset = self.load()
        self._dataset = dataset
        return dataset

    @final
    def clear_dataset(self):
        """Clear the stored dataset object from memory."""
        del self._dataset

    @property
    @final
    def dataset_iterable(self) -> tuple[DatasetObject, ...]:
        """Return a tuple of all dataset object(s), including any nested objects.

        If the dataset is a MultiBlock, the MultiBlock itself is also returned as the first
        item. Any nested MultiBlocks are not included, only their datasets.

        E.g. for a composite dataset:
            MultiBlock -> (MultiBlock, Block0, Block1, ...)
        """
        dataset = self.dataset

        def _flat(obj):
            if isinstance(obj, Sequence):
                output_list = []
                for item in obj:
                    (
                        output_list.extend(item)
                        if isinstance(item, Sequence)
                        else output_list.append(item)
                    )
                    if any(isinstance(item, Sequence) for item in output_list):
                        return _flat(output_list)
                return output_list
            else:
                return [obj]

        flat = _flat(dataset)
        if isinstance(dataset, pv.MultiBlock):
            flat.insert(0, dataset)
        return tuple(flat)

    @property
    @final
    def unique_dataset_type(
        self,
    ) -> DatasetType | tuple[DatasetType, ...] | None:
        """Return unique dataset type(s) from all datasets."""
        return _get_unique_dataset_type(self.dataset_iterable)

    @property
    @final
    def unique_cell_types(
        self,
    ) -> tuple[pv.CellType, ...]:
        """Return unique cell types from all datasets."""
        cell_types: dict[pv.CellType, None] = {}
        for data in self.dataset_iterable:
            # Get the underlying dataset for the texture
            if isinstance(data, pv.Texture):
                data = cast(pv.ImageData, pv.wrap(data.GetInput()))
            try:
                if isinstance(data, pv.ExplicitStructuredGrid):
                    # extract_cells_by_type does not support this datatype
                    # so get cells manually
                    cells = (c.type for c in data.cell)
                    [cell_types.update({cell_type: None}) for cell_type in cells]
                else:
                    for cell_type in pv.CellType:
                        extracted = data.extract_cells_by_type(cell_type)  # type: ignore[union-attr]
                        if extracted.n_cells > 0:
                            cell_types[cell_type] = None
            except AttributeError:
                continue
        return tuple(sorted(cell_types.keys()))


class _SingleFile(_SingleFilePropsProtocol):
    """Wrap a single file."""

    def __init__(self, path):
        from pyvista.examples.downloads import USER_DATA_PATH

        self._path = (
            path if path is None or os.path.isabs(path) else os.path.join(USER_DATA_PATH, path)
        )

    @property
    def path(self) -> str:
        return self._path

    @property
    def _filesize_bytes(self) -> int:
        return _get_file_or_folder_size(self.path)

    @property
    def _filesize_format(self) -> str:
        return _format_file_size(self._filesize_bytes)

    @property
    def _total_size_bytes(self) -> int:
        return self._filesize_bytes

    @property
    def total_size(self) -> str:
        return self._filesize_format

    @property
    def _reader(self) -> pv.BaseReader | None:
        return None


class _SingleFileDatasetLoader(_SingleFile, _DatasetLoader):
    """Wrap a single file for loading.

    Specify the read function and/or load functions for reading and processing the
    dataset. The read function is called on the file path first, then, if a load
    function is specified, the load function is called on the output from the read
    function.

    Parameters
    ----------
    path
        Path of the file to be loaded.

    read_func
        Specify the function used to read the file. Defaults to :func:`pyvista.read`.
        This can be used for customizing the reader's properties, or using another
        read function (e.g. :func:`pyvista.read_texture` for textures). The function
        must have the file path as the first argument and should return a dataset.
        If default arguments are required by your desired read function, consider
        using :class:`functools.partial` to pre-set the arguments before passing it
        as an argument to the loader.

    load_func
        Specify the function used to load the file. Defaults to `None`. This is typically
        used to specify any processing of the dataset after reading. The load function
        typically will accept a dataset as an input and return a dataset.

    """

    def __init__(
        self,
        path: str,
        read_func: Callable[[str], DatasetType] | None = None,
        load_func: Callable[[DatasetType], Any] | None = None,
    ):
        _SingleFile.__init__(self, path)
        _DatasetLoader.__init__(self, load_func)  # type: ignore[arg-type]
        self._read_func = pv.read if path and read_func is None else read_func

    @property
    def _reader(self) -> pv.BaseReader | None:
        # TODO: return the actual reader used, and not just a lookup
        #       (this will require an update to the 'read_func' API)
        try:
            return pv.get_reader(self.path)
        except ValueError:
            # Cannot be read directly (requires custom reader)
            return None

    @property
    def path_loadable(self) -> str:
        return self.path

    def load(self):
        path = self.path
        read_func = self._read_func
        load_func = self._load_func
        try:
            # Read and load normally
            return read_func(path) if load_func is None else load_func(read_func(path))
        except OSError:
            # Handle error generated by pv.read if reading a directory
            if read_func is pv.read and Path(path).is_dir():
                # Re-define read function to read all files in a directory as a multiblock
                read_func = lambda path: _load_as_multiblock(
                    [
                        _SingleFileDatasetLoader(str(Path(path, fname)))
                        for fname in sorted(os.listdir(path))
                    ],
                )
                return read_func(path) if load_func is None else load_func(read_func(path))
            else:
                raise RuntimeError(f'Error loading dataset from path:\n\t{self.path}')


class _DownloadableFile(_SingleFile, _Downloadable[str]):
    """Wrap a single file which must be downloaded.

    If downloading a file from an archive, set the filepath of the zip as
    ``path`` and set ``target_file`` as the file to extract. If the path is
    a zip file and no target file is specified, the entire archive is downloaded
    and extracted and the root directory of the path is returned.

    """

    def __init__(
        self,
        path: str,
        target_file: str | None = None,
    ):
        _SingleFile.__init__(self, path)

        from pyvista.examples.downloads import SOURCE
        from pyvista.examples.downloads import USER_DATA_PATH
        from pyvista.examples.downloads import _download_archive_file_or_folder
        from pyvista.examples.downloads import download_file
        from pyvista.examples.downloads import file_from_files
        from pyvista.examples.examples import dir_path

        if Path(path).is_absolute():
            # Absolute path must point to a built-in dataset
            assert Path(path).parent == Path(
                dir_path,
            ), "Absolute path must point to a built-in dataset."
            self._base_url = "https://github.com/pyvista/pyvista/raw/main/pyvista/examples/"
            self._source_name = Path(path).name
            # the dataset is already downloaded (it's built-in)
            # so make download() simply return the local filepath
            self._download_func = lambda source: path
        else:
            # Relative path, use vars from downloads.py
            self._base_url = SOURCE
            self._download_func = download_file
            self._source_name = Path(path).name if Path(path).is_absolute() else path

        target_file = '' if target_file is None and (get_ext(path) == '.zip') else target_file
        if target_file is not None:
            # download from archive
            self._download_func = functools.partial(
                _download_archive_file_or_folder,
                target_file=target_file,
            )
            # The file path currently points to the archive, not the target file itself
            # Try to resolve the full path to the target file (without downloading) if
            # the archive already exists in the cache
            fullpath = None
            if os.path.isfile(self.path):
                try:
                    # Get file path
                    fullpath = file_from_files(target_file, self.path)
                except (FileNotFoundError, RuntimeError):
                    # Get folder path
                    fullpath = os.path.join(USER_DATA_PATH, path + '.unzip', target_file)
                    fullpath = fullpath if os.path.isdir(fullpath) else None
            # set the file path as the relative path of the target file if
            # the fullpath could not be resolved (i.e. not yet downloaded)
            self._path = target_file if fullpath is None else fullpath

    @property
    def source_name(self) -> str:
        return self._source_name

    @property
    def base_url(self) -> str:
        return self._base_url

    def download(self) -> str:
        path = self._download_func(self._source_name)
        assert os.path.isfile(path) or os.path.isdir(path)
        # Reset the path since the full path for archive files
        # isn't known until after downloading
        self._path = path
        return path


class _SingleFileDownloadableDatasetLoader(_SingleFileDatasetLoader, _DownloadableFile):
    """Wrap a single file which must first be downloaded and which can also be loaded.

    .. warning::

       ``download()`` should be called before accessing other attributes. Otherwise,
       calling ``load()`` or ``path`` may fail or produce unexpected results.

    """

    def __init__(
        self,
        path: str,
        read_func: Callable[[str], DatasetType] | None = None,
        load_func: Callable[[DatasetType], DatasetType] | None = None,
        target_file: str | None = None,
    ):
        _SingleFileDatasetLoader.__init__(self, path, read_func=read_func, load_func=load_func)
        _DownloadableFile.__init__(self, path, target_file=target_file)


class _MultiFileDatasetLoader(_DatasetLoader, _MultiFilePropsProtocol):
    """Wrap multiple files for loading.

    Some use cases for loading multi-file examples include:

    1. Multiple input files, and each file is read/loaded independently
       E.g.: loading two separate datasets for the example
       See ``download_bolt_nut`` for a reference implementation.

    2. Multiple input files, but only one is read or loaded directly
       E.g.: loading a single dataset from a file format where data and metadata are
       stored in separate files, such as ``.raw`` and ``.mhd``.
       See ``download_head`` for a reference implementation.

    3. Multiple input files, all of which make up part of the loaded dataset
       E.g.: loading six separate image files for cubemaps
       See ``download_sky_box_cube_map`` for a reference implementation.

    Parameters
    ----------
    files_func
        Specify the function which will return a sequence of :class:`_SingleFile`
        objects required for loading the dataset. Alternatively, a directory can be
        specified, in which case a separate single-file dataset loader is created
        for each file with a default reader.

    load_func
        Specify the function used to load the files. By default, :meth:`load()` is called
        on all the files (if loadable) and a tuple containing the loaded datasets is returned.

    """

    def __init__(
        self,
        files_func: str | Callable[[], Sequence[_SingleFileDatasetLoader | _DownloadableFile]],
        load_func: Callable[[Sequence[_SingleFileDatasetLoader]], Any] | None = None,
    ):
        self._files_func = files_func
        self._file_loaders_ = None
        if load_func is None:
            load_func = _load_as_dataset_or_multiblock

        _DatasetLoader.__init__(self, load_func)

    @property
    def _file_objects(self):
        if self._file_loaders_ is None and not isinstance(self._files_func, str):
            self._file_loaders_ = self._files_func()
        return self._file_loaders_

    @property
    def path(self) -> tuple[str, ...]:
        return tuple(_flatten_nested_sequence([file.path for file in self._file_objects]))

    @property
    def path_loadable(self) -> tuple[str, ...]:
        return tuple(
            [
                file.path
                for file in self._file_objects
                if isinstance(file, _SingleFileDatasetLoader)
            ],
        )

    @property
    def _filesize_bytes(self) -> tuple[int, ...]:
        return tuple(
            _flatten_nested_sequence([file._filesize_bytes for file in self._file_objects]),
        )

    @property
    def _filesize_format(self) -> tuple[str, ...]:
        return tuple([_format_file_size(size) for size in self._filesize_bytes])

    @property
    def _total_size_bytes(self) -> int:
        return sum([file._total_size_bytes for file in self._file_objects])

    @property
    def total_size(self) -> str:
        return _format_file_size(self._total_size_bytes)

    @property
    def _reader(
        self,
    ) -> pv.BaseReader | tuple[pv.BaseReader | None, ...] | None:
        # TODO: return the actual reader used, and not just a lookup
        #       (this will require an update to the 'read_func' API)
        reader = _flatten_nested_sequence([file._reader for file in self._file_objects])
        # flatten in case any file objects themselves are multifiles
        reader_out: list[pv.BaseReader] = []
        for r in reader:
            reader_out.extend(r) if isinstance(r, Sequence) else reader_out.append(r)
        return tuple(reader_out)

    def load(self):
        return self._load_func(self._file_objects)


class _MultiFileDownloadableDatasetLoader(
    _MultiFileDatasetLoader,
    _Downloadable[Tuple[str, ...]],
):
    """Wrap multiple files for downloading and loading."""

    @property
    def source_name(self) -> tuple[str, ...]:
        name = [file.source_name for file in self._file_objects if isinstance(file, _Downloadable)]
        return tuple(_flatten_nested_sequence(name))

    @property
    def base_url(self) -> tuple[str, ...]:
        url = [file.base_url for file in self._file_objects if isinstance(file, _Downloadable)]
        return tuple(_flatten_nested_sequence(url))

    def download(self) -> tuple[str, ...]:
        path = [file.download() for file in self._file_objects if isinstance(file, _Downloadable)]
        # flatten paths in case any loaders have multiple files
        path_out = _flatten_nested_sequence(path)
        assert all(os.path.isfile(p) or os.path.isdir(p) for p in path_out)
        return tuple(path_out)


_ScalarType = TypeVar('_ScalarType', int, str, pv.BaseReader)


def _flatten_nested_sequence(nested: Sequence[_ScalarType | Sequence[_ScalarType]]):
    """Flatten nested sequences of objects."""
    flat: list[_ScalarType] = []
    for item in nested:
        if isinstance(item, Sequence) and not isinstance(item, str):
            flat.extend(item)
        else:
            flat.append(item)
    return flat


def _download_dataset(
    dataset_loader: _SingleFileDownloadableDatasetLoader | _MultiFileDownloadableDatasetLoader,
    load: bool = True,
    metafiles: bool = False,
):
    """Download and load a dataset file or files.

    Parameters
    ----------
    dataset_loader
        SingleFile or MultiFile object(s) of the dataset(s) to download or load.

    load
        Read and load the file after downloading. When ``False``,
        return the path or paths to the example's file(s).

    metafiles
        When ``load`` is ``False``, set this value to ``True`` to
        return all files required to load the example, including any metafiles.
        If ``False``, only the paths of files which are explicitly loaded are
        returned. E.g if a file format uses two files to specify the header info
        and file data separately, setting ``metafiles=True`` will return a tuple
        with both file paths, whereas setting ``metafiles=False`` will only return
        the single path of the header file as a string.

    Returns
    -------
    Any
        Loaded dataset or path(s) to the example's files depending on the ``load``
        parameter. Dataset may be a texture, mesh, multiblock, array, tuple of meshes,
        or any other output loaded by the example.

    """
    # Download all files for the dataset, include any metafiles
    path = dataset_loader.download()

    # Exclude non-loadable metafiles from result (if any)
    if not metafiles and isinstance(dataset_loader, _MultiFileDownloadableDatasetLoader):
        path = dataset_loader.path_loadable
        # Return scalar if only one loadable file
        path = path[0] if len(path) == 1 else path

    return dataset_loader.load() if load else path


def _load_as_multiblock(
    files: Sequence[_SingleFileDatasetLoader | _MultiFileDatasetLoader],
    names: Sequence[str] | None = None,
) -> pv.MultiBlock:
    """Load multiple files as a MultiBlock.

    This function can be used as a loading function for :class:`MultiFileLoadable`
    If the use of the ``names`` parameter is needed, use :class:`functools.partial`
    to partially specify the names parameter before passing it as loading function.
    """
    multi = pv.MultiBlock()
    if names is None:
        # set names, use filename without ext by default or dirname
        paths = _flatten_nested_sequence(
            [file.path_loadable for file in files if isinstance(file, _DatasetLoader)],
        )
        paths = [Path(path) for path in paths]
        names = [
            path.name[: -len(get_ext(path.name))] if path.is_file() else path.name for path in paths
        ]

    for file, name in zip(files, names):
        if not isinstance(file, _DatasetLoader):
            continue
        loaded = file.load()
        assert isinstance(
            loaded,
            (pv.MultiBlock, pv.DataSet),
        ), f"Only MultiBlock or DataSet objects can be loaded as a MultiBlock. Got {type(loaded)}.'"
        multi.append(loaded, name)
    return multi


def _load_as_cubemap(files: str | _SingleFile | Sequence[_SingleFile]) -> pv.Texture:
    """Load multiple files as a cubemap.

    Input may be a single directory with 6 cubemap files, or a sequence
    of 6 files
    """
    path = (
        files
        if isinstance(files, str)
        else (files.path if isinstance(files, _SingleFile) else [file.path for file in files])
    )

    return (
        pv.cubemap(path)
        if isinstance(files, str) and os.path.isdir(files)
        else pv.cubemap_from_filenames(path)
    )


def _load_as_dataset_or_multiblock(files):
    multiblock = _load_as_multiblock(files)
    return multiblock[0] if len(multiblock) == 1 else multiblock


def _load_and_merge(files: Sequence[_SingleFile]):
    """Load all loadable files as separate datasets and merge them."""
    loaded = [file.load() for file in files if isinstance(file, _DatasetLoader)]
    assert len(loaded) > 0
    return pv.merge(loaded)


def _get_file_or_folder_size(filepath) -> int:
    if os.path.isfile(filepath):
        return os.path.getsize(filepath)
    assert os.path.isdir(filepath), 'Expected a file or folder path.'
    all_filepaths = _get_all_nested_filepaths(filepath)
    return sum(os.path.getsize(file) for file in all_filepaths)


def _format_file_size(size: int) -> str:
    size_flt = float(size)
    for unit in ('B', 'KB', 'MB'):
        if round(size_flt * 10) / 10 < 1000.0:
            return f"{int(size_flt)} {unit}" if unit == 'B' else f"{size_flt:3.1f} {unit}"
        size_flt /= 1000.0
    return f"{size_flt:.1f} GB"


def _get_file_or_folder_ext(path: str):
    """Wrap the `get_ext` function to handle special cases for directories."""
    if os.path.isfile(path):
        return get_ext(path)
    assert os.path.isdir(path), 'Expected a file or folder path.'
    all_paths = _get_all_nested_filepaths(path)
    ext = [get_ext(file) for file in all_paths]
    assert len(ext) != 0, f'No files with extensions were found in"\n\t{path}'
    return ext


def _get_all_nested_filepaths(filepath, exclude_readme=True):
    """Walk through directory and get all file paths.

    Optionally exclude any readme files (if any).
    """
    assert os.path.isfile(filepath) or os.path.isdir(filepath)
    condition = lambda name: True if not exclude_readme else not name.lower().startswith('readme')
    return next(
        [os.path.join(path, name) for name in files if condition(name)]
        for path, _, files in os.walk(filepath)
    )


def _get_unique_extension(path: str | Sequence[str]):
    """Return a file extension or unique set of file extensions from a path or paths."""
    ext_set = set()
    fname_sequence = [path] if isinstance(path, str) else path

    # Add all file extensions to the set
    for file in fname_sequence:
        ext = _get_file_or_folder_ext(file)
        ext_set.add(ext) if isinstance(ext, str) else ext_set.update(ext)

    # Format output
    ext_output = tuple(ext_set)
    return ext_output[0] if len(ext_output) == 1 else tuple(sorted(ext_output))


def _get_unique_reader_type(
    reader: pv.BaseReader | tuple[pv.BaseReader | None, ...] | None,
) -> type[pv.BaseReader] | tuple[type[pv.BaseReader], ...] | None:
    """Return a reader type or tuple of unique reader types."""
    if reader is None or (isinstance(reader, Sequence) and all(r is None for r in reader)):
        return None
    reader_set: set[type[pv.BaseReader]] = set()
    reader_type = (
        [type(reader)]
        if not isinstance(reader, Sequence)
        else [type(r) for r in reader if r is not None]
    )

    # Add all reader types to the set
    reader_set.update(reader_type)

    # Format output
    reader_output = tuple(reader_set)
    return reader_output[0] if len(reader_output) == 1 else tuple(reader_output)


def _get_unique_dataset_type(
    dataset_iterable: tuple[DatasetObject, ...],
) -> DatasetType | tuple[DatasetType, ...]:
    """Return a dataset type or tuple of unique dataset types."""
    dataset_types: dict[DatasetType, None] = {}  # use dict as an ordered set
    for dataset in dataset_iterable:
        dataset_types[type(dataset)] = None
    output = tuple(dataset_types.keys())
    return output[0] if len(output) == 1 else output