1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153
|
"""Validation class for BIDS projects."""
import re
import os
import json
from functools import lru_cache
class BIDSValidator():
"""Object for BIDS (Brain Imaging Data Structure) verification.
The main method of this class is `is_bids()`. You should use it for
checking whether a file path is compatible with BIDS.
"""
def __init__(self, index_associated=True):
"""Initialize BIDSValidator object.
Parameters
----------
index_associated : bool
Specifies if an associated data should be checked. If it is true
then any file paths in directories `code/`, `derivatives/`,
`sourcedata/` and `stimuli/` will pass the validation, else they
won't. Defaults to True.
"""
self.dir_rules = os.path.join(os.path.dirname(__file__)) + "/rules/"
self.index_associated = index_associated
def is_bids(self, path):
"""Check if file path adheres to BIDS.
Main method of the validator. Uses other class methods for checking
different aspects of the file path.
Parameters
----------
path : str
Path of a file to be checked. Must be relative to root of a BIDS
dataset, and must include a leading forward slash `/`.
Notes
-----
When you test a file path, make sure that the path is relative to the
root of the BIDS dataset the file is part of. That is, as soon as the
file path contains parts outside of the BIDS dataset, the validation
will fail. For example "home/username/my_dataset/participants.tsv" will
fail, although "/participants.tsv" is a valid BIDS file.
Examples
--------
>>> from bids_validator import BIDSValidator
>>> validator = BIDSValidator()
>>> filepaths = ["/sub-01/anat/sub-01_rec-CSD_T1w.nii.gz",
... "/sub-01/anat/sub-01_acq-23_rec-CSD_T1w.exe", # wrong extension
... "home/username/my_dataset/participants.tsv", # not relative to root
... "/participants.tsv"]
>>> for filepath in filepaths:
... print(validator.is_bids(filepath))
True
False
False
True
"""
return any(
check(path) for check in (
self.is_top_level,
self.is_associated_data,
self.is_session_level,
self.is_subject_level,
self.is_phenotypic,
self.is_file
)
)
def is_top_level(self, path):
"""Check if the file has appropriate name for a top-level file."""
regexps = self.get_regular_expressions(self.dir_rules +
'top_level_rules.json')
return any(re.search(regexp, path) for regexp in regexps)
def is_associated_data(self, path):
"""Check if file is appropriate associated data."""
if not self.index_associated:
return False
regexps = self.get_regular_expressions(self.dir_rules +
'associated_data_rules.json')
return any(re.search(regexp, path) for regexp in regexps)
def is_session_level(self, path):
"""Check if the file has appropriate name for a session level."""
regexps = self.get_regular_expressions(self.dir_rules +
'session_level_rules.json')
return any(self.conditional_match(regexp, path) for regexp in regexps)
def is_subject_level(self, path):
"""Check if the file has appropriate name for a subject level."""
regexps = self.get_regular_expressions(self.dir_rules +
'subject_level_rules.json')
return any(re.search(regexp, path) for regexp in regexps)
def is_phenotypic(self, path):
"""Check if file is phenotypic data."""
regexps = self.get_regular_expressions(self.dir_rules +
'phenotypic_rules.json')
return any(re.search(regexp, path) for regexp in regexps)
def is_file(self, path):
"""Check if file is phenotypic data."""
regexps = self.get_regular_expressions(self.dir_rules +
'file_level_rules.json')
return any(re.search(regexp, path) for regexp in regexps)
@staticmethod
@lru_cache
def get_regular_expressions(file_name):
"""Read regular expressions from a file."""
regexps = []
with open(file_name) as fin:
rules = json.load(fin)
for key in list(rules.keys()):
rule = rules[key]
regexp = rule["regexp"]
if "tokens" in rule:
tokens = rule["tokens"]
for token in list(tokens):
regexp = regexp.replace(token, "|".join(tokens[token]))
regexps.append(regexp)
return regexps
@staticmethod
def conditional_match(expression, path):
"""Find conditional match."""
match = re.compile(expression).findall(path)
match = match[0] if len(match) >= 1 else False
# adapted from JS code and JS does not support conditional groups
return bool(match) and (match[1] == match[2][1:] or not match[1])
|