1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262
|
# or for easier access of certain commonly used module methods
import datetime
import pprint
import CTDopts.CTDopts # once you installed it, it's just CTDopts
from CTDopts.CTDopts import CTDModel, args_from_file, parse_cl_directives, flatten_dict, override_args, ArgumentRestrictionError
# let's set up a PrettyPrinter so nested dictionaries are easier to follow later
pp = pprint.PrettyPrinter(indent=4)
pretty_print = pp.pprint
# First, we'll set up a CTD model. There are two different ways to do that:
# 1. Define it in Python using CTDopts.CTDModel's methods
# 2. load it from a CTD file
# Every CTD Model has to have at least a name and a version, plus any of the optional attributes below them.
model = CTDModel(
name='exampleTool', # required
version='1.0', # required
description='This is an example tool presenting CTDopts usage',
manual='manual string',
docurl='http://dummy.url/docurl.html',
category='testing',
executableName='exampletool',
executablePath='/path/to/exec/exampletool-1.0/exampletool'
)
# The parameters of the tool have to be registered the following way:
model.add(
'positive_int', # parameter name
type=int, # parameter type. For a list of CTD-supported types see CTDopts.CTDTYPE_TO_TYPE.keys()
num_range=(0, None), # numeric range restriction: tuple with minimum and maximum values. None means unlimited
default=5,
tags=['advanced', 'magic'], # for certain workflow engines that make use of parameter tags
description='A positive integer parameter'
)
model.add(
'input_files',
required=True,
type='input-file', # or 'output-file'
is_list=True, # for list parameters with an arbitrary number of values
file_formats=['fastq', 'fastq.gz'], # filename restrictions
description='A list of filenames to feed this dummy tool with'
)
model.add(
'output',
required=True,
type='output-prefix', # or 'input-prefix'
is_list=False,
file_formats=['fastq', 'fastq.gz'], # filename restrictions
description='Output directory'
)
model.add(
'this_that',
type=str,
choices=['this', 'that'], # controlled vocabulary
default='this',
description='A controlled vocabulary parameter. Allowed values `this` or `that`.'
)
# Certain tools may want to group parameters together. One can define them like this:
subparams = model.add_group('subparams', 'Grouped settings')
# register sub-parameters to a group:
subparams.add(
'param_1',
type=float,
default=5.5,
description='Some floating point setting.'
)
subparams.add(
'param_2',
is_list=True,
type=float,
tags=['advanced'],
default=[0.0, 2.5, 5.0],
description='A list of floating point settings'
)
subsubparams = subparams.add_group('subsubparam', 'A group of sub-subsettings')
subsubparams.add(
'param_3',
type=int,
default=2,
description="A subsetting's subsetting"
)
# Now we have a CTDModel. To write the model to a CTD (xml) file:
print('Model being written to exampleTool.ctd...\n')
model.write_ctd('exampleTool.ctd')
# However, if we already have a CTD model for a tool, we can spare the pain of defining it like above, we can just
# load it from a file directly. Like this:
print('Model loaded from exampleTool.ctd...\n')
model_2 = CTDModel(from_file='exampleTool.ctd')
# We can list all the model's parameters. The below call will get a list of all Parameter objects registered in the model.
# These objects store name, type, default, restriction, parent group etc. information we set above.
params = model.list_parameters()
print("For debugging purposes we can output a human readable representation of Parameter objects. Here's the first one:")
print(params[0])
print
# Let's print out the name attributes of these parameters.
print('The following parameters were registered in the model:')
print([p.name for p in params])
print
# In the above model, certain parameters were registered under parameter groups. We can access their 'lineage' and see
# their nesting levels. Let's display nesting levels separated by colons:
print('The same parameters with subgroup information, if they were registered under parameter groups:')
print([':'.join(p.get_lineage(name_only=True)) for p in params])
print()
# (Parameter.get_lineage() returns a list of ParameterGroups down to the leaf Parameter. `name_only` setting returns
# only the names of the objects, instead of the actual Parameter objects.
# Some of the parameters had default values in the model. We can get those:
print('A dictionary of parameters with default values, returned by CTDModel.get_defaults():')
defaults = model_2.get_defaults()
pretty_print(defaults)
print()
print('As you can see, parameter values are usually stored in nested dictionaries. If you want a flat dictionary, you can'
'get that using CTDopts.flatten_dict(). Flat keys can be either tuples of tree node (subgroup) names down to the parameter...')
flat_defaults = flatten_dict(defaults)
pretty_print(flat_defaults)
print()
print('...or they can be strings where nesing levels are separated by colons:')
flat_defaults_colon = flatten_dict(defaults, as_string=True)
pretty_print(flat_defaults_colon)
print()
print('We can create dictionaries of arguments on our own that we want to validate against the model.'
'CTDopts can read them from argument-storing CTD files or from the command line, but we can just define them in a '
'nested dictionary on our own as well. We start with defining them explicitly.')
new_values = {
'positive_int': 111,
'input_files': ['file1.fastq', 'file2.fastq', 'file3.fastq'],
'subparams': {'param_1': '999.0'}
}
pretty_print(new_values)
print()
print("We can validate these arguments against the model, and get a dictionary with parameter types correctly casted "
"and defaults set. Note that subparams:param_1 was casted from string to a floating point number because that's how it "
"was defined in the model.")
validated = model.validate_args(new_values)
pretty_print(validated)
print()
print('We can write a CTD file containing these validated argument values. Just call CTDModel.write_ctd() with an extra '
'parameter: the nested argument dictionary containing the actual values.')
model.write_ctd('exampleTool_preset_params.ctd', validated)
print()
print('As mentioned earlier, CTDopts can load argument values from CTD files. Feel free to change some values in '
"exampleTool_preset_params.ctd you've just written, and load it back.")
args_from_ctd = args_from_file('exampleTool_preset_params.ctd')
pretty_print(args_from_ctd)
print()
print("Notice that all the argument values are strings now. This is because we didn't validate them against the model, "
"just loaded some stuff from a file into a dictionary. If you want to cast them, call CTDModel.validate_args():")
validated_2 = model.validate_args(args_from_ctd)
pretty_print(validated_2)
print()
print("Now certain parameters may have restrictions that we might want to validate for as well. Let's set the parameter "
"positive_int to a negative value, and try to validate it with a strictness level enforce_restrictions=1. This "
"will register a warning, but still accept the value.")
validated_2['positive_int'] = -5
_ = model.validate_args(validated_2, enforce_restrictions=1)
print()
print("Validation enforcement levels can be 0, 1 or 2 for type-casting, restriction-checking and required argument presence. "
"They can be set with the keywords enforce_type, enforce_restrictions and enforce_required respectively. Let's increase "
"strictness for restriction checking. CTDModel.validate_args() will now raise an exception that we'll catch:\n")
try:
model.validate_args(validated_2, enforce_restrictions=2) # , enforce_type=0, enforce_required=0
except ArgumentRestrictionError as ee:
# other exceptions: ArgumentTypeError, ArgumentMissingError, all subclasses of Argumenterror
print(ee)
print()
print("One might want to combine arguments loaded from a CTD file with arguments coming from elsewhere, like the command line."
"In that case, the method CTDopts.override_args(*arg_dicts) creates a combined argument dictionary where argument values "
"are always taken from the rightmost (last) dictionary that has them. Let's override a few parameters:")
override = {
'this_that': 'that',
'positive_int': 777
}
overridden = override_args(validated, override)
pretty_print(overridden)
print()
print("So how to deal with command line arguments? If we have a model, we can look for its arguments. "
"Call CTDModel.parse_cl_args() with either a string of the command line call or a list with the split words. "
"By default, it will assume a '--' prefix before parameter names, but it can be overridden with prefix='-'."
"Grouped parameters are expected in --group:subgroup:param_x format.")
cl_args = model.parse_cl_args('--positive_int 44 --subparams:param_2 5.0 5.5 6.0 --input_files a.fastq b.fastq --output out_dir/')
pretty_print(cl_args)
print()
# # you can get unmatchable command line arguments with get_remaining=True like:
# cl_args, unparsed = model.parse_cl_args('--positive_int 44 --subparams:param_2 5.0 5.5 6.0 --unrelated_stuff abc', get_remaining=True)
print("Override other parameters with them, and validate it against the model:")
overridden_with_cl = override_args(validated, cl_args)
validated_3 = model.validate_args(overridden_with_cl)
pretty_print(validated_3)
print
print("One last thing: certain command line directives that are specific to CTD functionality can be parsed for, "
"to help your script performing common tasks. These are CTD argument input, CTD model file writing and CTD argument "
"file writing. CTDopts.parse_cl_directives() can also be customized as to what directives to look for if the defaults "
"--input_ctd, --write_tool_ctd and --write_param_ctd respectively don't satisfy you.")
directives_1 = parse_cl_directives('--input_ctd exampleTool_preset_params.ctd --write_param_ctd new_preset_params.ctd')
pretty_print(directives_1)
directives_2 = parse_cl_directives('-inctd exampleTool_preset_params_2.ctd -toolctd ', input_ctd='inctd', write_tool_ctd='toolctd', prefix='-')
pretty_print(directives_2)
# the returned dictionary always contains the following three keys:
# 'input_ctd'
# 'write_tool_ctd'
# 'write_param_ctd'
# and their values are either a filename (if it was passed), a boolean True, if the flag was set but no filename provided
# (expecting the tool to use default values, like toolName.ctd) or None if the flag wasn't used at all.
# for example, if directives['input_ctd'] is set, one would load arguments from that file with
# CTDopts.args_from_file(directives['input_ctd']), validate them and run the tool.
# If I found directives['write_tool_ctd'], I'd immediately output the tool's CTD model with
# model.write_ctd(directives['write_tool_ctd']), etc.
print("Finally, writing CTDs with logging information, passing a dictionary"
"with a 'log' keyword, using any or all of the fields shown below.")
time_start = datetime.datetime.now(datetime.timezone.utc).isoformat()
# do stuff
output = 'Output of my program, however I generated or logged it'
errors = 'Standard error output of my program, however I caught or redirected them'
warnings = 'Warnings of my program'
exitstatus = '1'
time_finish = datetime.datetime.now(datetime.timezone.utc).isoformat()
log = {
'time_start': time_start, # make sure to give it a legal XML date string if you can.
'time_finish': time_finish, # You can generate them with datetime.datetime.now(pytz.utc).isoformat()
'status': exitstatus,
'output': output,
'warning': warnings,
'error': errors
}
model.write_ctd('exampleTool_w_logging.ctd', validated_3, log)
# Methods you might find helpful to deal with argument dictionaries (see docstrings):
# CTDopts.set_nested_key(dictionary, tuple_w_levels, value) and
# CTDopts.get_nested_key(dictionary, tuple_w_levels for nested dictionaries
|