File: bids_validator.py

package info (click to toggle)
python-bids-validator 1.14.6-1
links: PTS, VCS
area: main
in suites: forky, sid, trixie
size: 308 kB
sloc: python: 192; sh: 15; makefile: 7
file content (153 lines) | stat: -rw-r--r-- 5,401 bytes
"""Validation class for BIDS projects."""
import re
import os
import json
from functools import lru_cache


class BIDSValidator():
    """Object for BIDS (Brain Imaging Data Structure) verification.

    The main method of this class is `is_bids()`. You should use it for
    checking whether a file path is compatible with BIDS.

    """

    def __init__(self, index_associated=True):
        """Initialize BIDSValidator object.

        Parameters
        ----------
        index_associated : bool
            Specifies if an associated data should be checked. If it is true
            then any file paths in directories `code/`, `derivatives/`,
            `sourcedata/` and `stimuli/` will pass the validation, else they
            won't. Defaults to True.

        """
        self.dir_rules = os.path.join(os.path.dirname(__file__)) + "/rules/"
        self.index_associated = index_associated

    def is_bids(self, path):
        """Check if file path adheres to BIDS.

        Main method of the validator. Uses other class methods for checking
        different aspects of the file path.

        Parameters
        ----------
        path : str
            Path of a file to be checked. Must be relative to root of a BIDS
            dataset, and must include a leading forward slash `/`.

        Notes
        -----
        When you test a file path, make sure that the path is relative to the
        root of the BIDS dataset the file is part of. That is, as soon as the
        file path contains parts outside of the BIDS dataset, the validation
        will fail. For example "home/username/my_dataset/participants.tsv" will
        fail, although "/participants.tsv" is a valid BIDS file.

        Examples
        --------
        >>> from bids_validator import BIDSValidator
        >>> validator = BIDSValidator()
        >>> filepaths = ["/sub-01/anat/sub-01_rec-CSD_T1w.nii.gz",
        ... "/sub-01/anat/sub-01_acq-23_rec-CSD_T1w.exe", # wrong extension
        ... "home/username/my_dataset/participants.tsv", # not relative to root
        ... "/participants.tsv"]
        >>> for filepath in filepaths:
        ...     print(validator.is_bids(filepath))
        True
        False
        False
        True

        """
        return any(
            check(path) for check in (
                self.is_top_level,
                self.is_associated_data,
                self.is_session_level,
                self.is_subject_level,
                self.is_phenotypic,
                self.is_file
            )
        )

    def is_top_level(self, path):
        """Check if the file has appropriate name for a top-level file."""
        regexps = self.get_regular_expressions(self.dir_rules +
                                               'top_level_rules.json')

        return any(re.search(regexp, path) for regexp in regexps)

    def is_associated_data(self, path):
        """Check if file is appropriate associated data."""
        if not self.index_associated:
            return False

        regexps = self.get_regular_expressions(self.dir_rules +
                                               'associated_data_rules.json')

        return any(re.search(regexp, path) for regexp in regexps)

    def is_session_level(self, path):
        """Check if the file has appropriate name for a session level."""
        regexps = self.get_regular_expressions(self.dir_rules +
                                               'session_level_rules.json')

        return any(self.conditional_match(regexp, path) for regexp in regexps)

    def is_subject_level(self, path):
        """Check if the file has appropriate name for a subject level."""
        regexps = self.get_regular_expressions(self.dir_rules +
                                               'subject_level_rules.json')

        return any(re.search(regexp, path) for regexp in regexps)

    def is_phenotypic(self, path):
        """Check if file is phenotypic data."""
        regexps = self.get_regular_expressions(self.dir_rules +
                                               'phenotypic_rules.json')

        return any(re.search(regexp, path) for regexp in regexps)

    def is_file(self, path):
        """Check if file is phenotypic data."""
        regexps = self.get_regular_expressions(self.dir_rules +
                                               'file_level_rules.json')

        return any(re.search(regexp, path) for regexp in regexps)

    @staticmethod
    @lru_cache
    def get_regular_expressions(file_name):
        """Read regular expressions from a file."""
        regexps = []

        with open(file_name) as fin:
            rules = json.load(fin)

        for key in list(rules.keys()):
            rule = rules[key]

            regexp = rule["regexp"]

            if "tokens" in rule:
                tokens = rule["tokens"]

                for token in list(tokens):
                    regexp = regexp.replace(token, "|".join(tokens[token]))

            regexps.append(regexp)

        return regexps

    @staticmethod
    def conditional_match(expression, path):
        """Find conditional match."""
        match = re.compile(expression).findall(path)
        match = match[0] if len(match) >= 1 else False
        # adapted from JS code and JS does not support conditional groups
        return bool(match) and (match[1] == match[2][1:] or not match[1])