1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291
|
#!/usr/bin/env python3
import argparse
from collections import namedtuple
import json
import os
import shutil
from typing import Any
import features
SpreadTaskNames = namedtuple(
'SpreadTaskNames', ['original', 'suite', 'task', 'variant'])
def _remove_json_extension(file_name: str) -> str:
return os.path.splitext(file_name)[0] if file_name.endswith('.json') else file_name
def _parse_file_name(file_name: str) -> SpreadTaskNames:
'''
Given a file name in the format with double slashes <backend>:<system>:suite--path--task:variant
and optionally a json extension, it returns the original name, the suite name, the task name,
and the variant name. So in the example, it returns:
- original_name = <backend>:<system>:suite/path/task:variant
- suite_name = suite/path
- task_name = task
- variant_name = variant
:param file_name: The file name to parse
:returns: A namedtuple with the original name, the suite name, the task name and the variant name. If variant is not present, it's value is an empty string.
'''
file_name = _remove_json_extension(file_name)
original_name = file_name.replace('--', '/')
task = ':'.join(original_name.split(':')[2:])
suite_name = '/'.join(task.split('/')[:-1])
task_name = task.split('/')[-1]
variant_name = ''
if task_name.count(':') == 1:
variant_name = task_name.split(':')[1]
task_name = task_name.split(':')[0]
return SpreadTaskNames(original_name, suite_name, task_name, variant_name)
def _compose_test(dir: str, file: str, failed_tests: set[str]) -> features.TaskFeatures:
'''
Creates a dictionary with the features of a test and test information.
The features are read from the file and the test information is extracted from the file name.
:param dir: The directory where the file is located
:param file: The file name
:param failed_tests: String containing the names of failing tests
:returns: A dictionary with test information and features
'''
with open(os.path.join(dir, file), 'r', encoding='utf-8') as f:
original, suite_name, result_name, variant_name = _parse_file_name(
file)
task_features = features.TaskFeatures(
suite=suite_name,
task_name=result_name,
variant=variant_name,
success=original not in failed_tests
)
task_features.update(json.loads(f.read()))
return task_features
def _compose_env_variables(env_variables: list[str]) -> list[features.EnvVariables]:
'''
Given environment variables as a list of strings key=value, it creates
a list of dictionaries of [{"name": <env1-name>, "value": <env1-value>}...]
:param env_variables: a list of strings with key=value environment variables
:returns: A list of dictionaries
'''
composed = []
for env in env_variables:
name, sep, value = env.partition('=')
if sep != '=':
raise ValueError("Not a key=value pair {}".format(env))
composed.append(features.EnvVariables(
name=name.strip(), value=value.strip()))
return composed
def compose_system(dir: str, system: str, failed_tests: set[str], env_variables: list[str], scenarios: list[str]) -> features.SystemFeatures:
'''
Given a containing directory, a system-identifying string, and other information
about failed tests, environment variables, and scenarios, it creates a dictionary
containing the feature information found in the files contained in the directory
for that system.
:param dir: Directory that contains feature-tagging files
:param system: Identifying string to select only files with that string
:param failed_tests: String containing the names of failing tests
:param env_variables: List of strings with key=value environment variables
:param scenarios: List of strings with scenario names
:returns: Dictionary containing all tests and tests information for the system
'''
files = [file for file in os.listdir(
dir) if system in file and file.count(':') >= 2]
return features.SystemFeatures(
schema_version='0.0.0',
system=system,
scenarios=[scenario.strip()
for scenario in scenarios] if scenarios else [],
env_variables=_compose_env_variables(env_variables),
tests=[_compose_test(dir, file, failed_tests) for file in files]
)
def get_system_list(dir: str) -> set[str]:
'''
Constructs a list of all systems from the filenames in the specified directory
:param dir: Directory containing feature-tagging information for tests
:returns: Set of identifying strings for systems
'''
files = [f for f in os.listdir(dir)
if os.path.isfile(os.path.join(dir, f))]
return {':'.join(file.split(':')[:2])
for file in files if file.count(':') >= 2}
def _replace_tests(old_json_file: str, new_json_file: str) -> features.SystemFeatures:
'''
The new_json_file contains a subset of the tests found in the old_json_file.
This function leaves not-rerun tests untouched, while replacing old test
runs with their rerun counterparts found in new_json_file. The resulting
json in output therefore contains a mix of tests that were not rerun and
the latest version of tests that were rerun.
:param old_json_file: file path of first run of composed features
:param new_json_file: file path of rerun of composed features
:returns: dictionary that contains the first run data with rerun tests
replaced by the rerun data from the new_json_file
'''
with open(old_json_file, 'r', encoding='utf-8') as f:
old_json = json.load(f)
with open(new_json_file, 'r', encoding='utf-8') as f:
new_json = json.load(f)
for test in new_json['tests']:
for old_test in old_json['tests']:
if old_test['task_name'] == test['task_name'] and old_test['suite'] == test['suite'] and old_test['variant'] == test['variant']:
old_test.clear()
for key, value in test.items():
old_test[key] = value
break
return old_json
def _get_original_and_rerun_list(filenames: list[str]) -> tuple[list[str], list[str]]:
'''
Given a list of filenames, gets two lists of rerun information:
the first list contains the first run (of systems that were rerun)
while the second list contains all reruns, sorted from earliest to latest.
Note: the list of first runs ONLY contains the first run of reruns;
it does not contain systems that had no rerun.
:param filenames: a list of filenames
:returns: the list of first runs and the list of all reruns
'''
reruns = [file for file in filenames if not _remove_json_extension(
file).endswith('_1')]
originals = [file for file in filenames
if _remove_json_extension(file).endswith('_1') and
any(rerun for rerun in reruns if rerun.startswith(_remove_json_extension(file)[:-2]))]
reruns.sort(key=lambda x: int(_remove_json_extension(x).split('_')[-1]))
return originals, reruns
def _get_name_without_run_number(test: str) -> str:
'''
Given a name like <some-name>_<some-number> (optionally with extension),
returns <some-name>. If the name doesn't end with _<some-number>, then
it will return the original name without extension.
'''
test_split = _remove_json_extension(test).split('_')
if test_split[-1].isdigit():
return '_'.join(test_split[:-1])
return _remove_json_extension(test)
def replace_old_runs(dir: str, output_dir: str) -> None:
'''
Given the directory in input (dir) that contains a set of files of original
run data together with rerun data, this populates the specified output_dir
with a consolidated set of composed features, one per system. An original
composed features file is a file that ends in _1.json. A rerun composed
features file is a file that ends in _<num>.json where <num> is greater
than 1. The numbering is automatically generated when the compose features
script was called with the --run-attempt
:param dir: directory containing composed feature files with varying run
attempt numbers
:param output_dir: directory where to write the consolidated composed features
'''
filenames = [f for f in os.listdir(
dir) if os.path.isfile(os.path.join(dir, f))]
originals, reruns = _get_original_and_rerun_list(filenames)
for rerun in reruns:
result_name = _get_name_without_run_number(rerun)
original = list(
filter(lambda x: x.startswith(result_name), originals))
if len(original) != 1:
raise RuntimeError(
f'The rerun {rerun} does not have a corresponding original run')
tests = _replace_tests(os.path.join(
dir, original[0]), os.path.join(dir, rerun))
with open(os.path.join(output_dir, result_name + '.json'), 'w', encoding='utf-8') as f:
f.write(json.dumps(tests))
# Search for system test results that had no reruns and
# simply copy their result file to the output folder
for file in filenames:
if file not in originals and file not in reruns:
shutil.copyfile(os.path.join(dir, file),
os.path.join(output_dir, _get_name_without_run_number(file) + '.json'))
def main():
description = '''
Can be run in two modes: composed feature generation or composed feature consolidation
Composed feature generation mode
Given a directory containing files with outputs of journal-analzyer.py with filenames
of format <backend>:<system>:suite--path--<test>:<variant>, it will construct a json
file for each <backend>:<system> with feature-tagging information, accompanied with
additional test information.
Composed feature consolidation mode
Given a directory containing files of pre-composed feature information with filenames like
<backend>:<system>_<run-attempt>.json, it writes the consolidated feature information in a
new directory (specified with the --output flag) where the latest rerun data replaces the old.
So if a file contains one test that was later rerun, the new consolidated file will contain
unaltered content from the original run except for the one test rerun that will replace
the old.
'''
parser = argparse.ArgumentParser(
description=description, formatter_class=argparse.RawDescriptionHelpFormatter)
parser.add_argument('-d', '--dir', type=str, required=True,
help='Path to the folder containing json files')
parser.add_argument('-o', '--output', type=str,
help='Output directory', required=True)
parser.add_argument('-s', '--scenarios', type=str, nargs='*',
help='List of useful metadata tags to describe the testing scenario', default='')
parser.add_argument('-e', '--env-variables', type=str, nargs='*',
help='List of environment variables as key=value', default='')
parser.add_argument('-f', '--failed-tests', type=argparse.FileType('r'),
help='File containing the space-separated names of failed tests')
parser.add_argument('--run-attempt', type=int, choices=range(1, 10), help='''
Run attempt number of the json files contained in the folder [1,10).
Only needed when rerunning spread for failed tests. When specified, will append the run attempt
number on the filename, which will then be used when running this script with the --replace-old-runs
flag to determine replacement order''')
parser.add_argument('-r', '--replace-old-runs', action='store_true',
help='When set, will process pre-composed runs and consolidate them into the output dir')
args = parser.parse_args()
os.makedirs(args.output, exist_ok=True)
if args.replace_old_runs:
replace_old_runs(args.dir, args.output)
exit(0)
failed_tests = set()
if args.failed_tests:
for failed_test in args.failed_tests:
failed_tests.update(failed_test.split())
attempt = ''
if args.run_attempt:
attempt = '_%s' % args.run_attempt
systems = get_system_list(args.dir)
for system in systems:
composed = compose_system(dir=args.dir,
system=system,
failed_tests=failed_tests,
env_variables=args.env_variables,
scenarios=args.scenarios)
with open(os.path.join(args.output, system + attempt + '.json'), 'w', encoding='utf-8') as f:
json.dump(composed, f)
if __name__ == '__main__':
main()
|