1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372
|
#!/usr/bin/env python3
# Copyright 2021 WebAssembly Community Group participants
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""A test case update script.
This script is a utility to update wasm-opt based lit tests with new FileCheck
patterns. It is based on LLVM's update_llc_test_checks.py script.
"""
import argparse
import glob
import os
import re
import subprocess
import sys
import tempfile
script_dir = os.path.dirname(__file__)
script_name = os.path.basename(__file__)
NOTICE = (';; NOTE: Assertions have been generated by {script} and should not' +
' be edited.')
RUN_LINE_RE = re.compile(r'^\s*;;\s*RUN:\s*(.*)$')
CHECK_PREFIX_RE = re.compile(r'.*--check-prefix[= ](\S+).*')
MODULE_RE = re.compile(r'^\(module.*$', re.MULTILINE)
DECL_ITEMS = '|'.join(['type', 'global', 'memory', 'data', 'table',
'elem', 'tag', 'start', 'func'])
IMPORT_ITEM = r'import\s*"[^"]*"\s*"[^"]*"\s*\((?:' + DECL_ITEMS + ')'
EXPORT_ITEM = r'export\s*"[^"]*"\s*\((?:' + DECL_ITEMS + ')'
ALL_ITEMS = DECL_ITEMS + '|' + IMPORT_ITEM + '|' + EXPORT_ITEM
# Regular names as well as the "declare" in (elem declare ... to get declarative
# segments included in the output.
ITEM_NAME = r'\$[^\s()]*|\$"[^"]*"|declare'
# FIXME: This does not handle nested string contents. For example,
# (data (i32.const 10) "hello(")
# will look unterminated, due to the '(' inside the string. As a result, the
# code below will consider more elements after the |data| to be part of it,
# until it sees enough closing ')' symbols.
ITEM_RE = re.compile(r'(?:^\s*\(rec\s*)?(^\s*)\((' + ALL_ITEMS + r')\s+(' + ITEM_NAME + ').*$',
re.MULTILINE)
FUZZ_EXEC_FUNC = re.compile(r'^\[fuzz-exec\] calling (?P<name>\S*)$')
def indentKindName(match):
# Return the indent, kind, and name from an ITEM_RE match
return (match[1], match[2].split()[0], match[3])
def warn(msg):
print(f'warning: {msg}', file=sys.stderr)
def itertests(args):
"""
Yield (filename, lines) for each test specified in the command line args
"""
for pattern in args.tests:
tests = glob.glob(pattern, recursive=True)
if not tests:
warn(f'No tests matched {pattern}. Ignoring it.')
continue
for test in tests:
with open(test) as f:
lines = [line.rstrip() for line in f]
first_line = lines[0] if lines else ''
if script_name not in first_line and not args.force:
warn(f'Skipping test {test} which was not generated by '
f'{script_name}. Use -f to override.')
continue
yield test, lines
def find_run_lines(test, lines):
line_matches = [RUN_LINE_RE.match(l) for l in lines]
matches = [match.group(1) for match in line_matches if match]
if not matches:
warn(f'No RUN lines found in {test}. Ignoring.')
return []
run_lines = [matches[0]]
for line in matches[1:]:
if run_lines[-1].endswith('\\'):
run_lines[-1] = run_lines[-1].rstrip('\\') + ' ' + line
else:
run_lines.append(line)
return run_lines
def run_command(args, test, tmp, command):
env = dict(os.environ)
env['PATH'] = args.binaryen_bin + os.pathsep + env['PATH']
command = command.replace('%s', test)
command = command.replace('%S', os.path.dirname(test))
command = command.replace('%t', tmp)
command = command.replace('foreach', os.path.join(script_dir, 'foreach.py'))
return subprocess.check_output(command, shell=True, env=env).decode('utf-8')
def find_end(module, start):
# Find the index one past the closing parenthesis corresponding to the first
# open parenthesis at `start`.
assert module[start] == '('
depth = 1
for end in range(start + 1, len(module)):
if depth == 0:
break
elif module[end] == '(':
depth += 1
elif module[end] == ')':
depth -= 1
return end
def split_modules(text):
# Return a list of strings; one for each module
module_starts = [match.start() for match in MODULE_RE.finditer(text)]
if len(module_starts) < 2:
return [text]
first_module = text[:module_starts[1]]
modules = [first_module]
for i in range(1, len(module_starts) - 1):
module = text[module_starts[i]:module_starts[i + 1]]
modules.append(module)
last_module = text[module_starts[-1]:]
modules.append(last_module)
return modules
def parse_output_modules(text):
# Return a list containing, for each module in the text, a list of
# ((kind, name), [line]) for module items.
modules = []
for module in split_modules(text):
items = []
for match in ITEM_RE.finditer(module):
_, kind, name = indentKindName(match)
end = find_end(module, match.end(1))
lines = module[match.start():end].split('\n')
items.append(((kind, name), lines))
modules.append(items)
return modules
def parse_output_fuzz_exec(text):
# Returns the same data as `parse_output_modules`, but can't tell where
# module boundaries are, so always just returns items for a single module.
items = []
for line in text.split('\n'):
func = FUZZ_EXEC_FUNC.match(line)
if func:
# Add a '$' prefix to the name because that is how it will be parsed
# in the input.
name = '$' + func.group("name")
items.append((('func', name), [line]))
elif line.startswith('[host limit'):
# Skip mentions of host limits that we hit. This can happen even
# before we reach the execution of a function (if it happens during
# instantiation of the module), in which case |items| may be empty,
# and we'd error on the code below.
pass
elif line:
assert items, 'unexpected non-invocation line'
items[-1][1].append(line)
return [items]
def get_command_output(args, kind, test, lines, tmp):
# Return list of maps from prefixes to lists of module items of the form
# ((kind, name), [line]). The outer list has an entry for each module.
command_output = []
for line in find_run_lines(test, lines):
commands = [cmd.strip() for cmd in line.rsplit('|', 1)]
if (len(commands) > 2 or
(len(commands) == 2 and not commands[1].startswith('filecheck '))):
warn('pipes only supported for one command piped to `filecheck`')
filecheck_cmd = ''
if len(commands) > 1 and commands[1].startswith('filecheck '):
filecheck_cmd = commands[1]
commands = commands[:1]
prefix = ''
if filecheck_cmd.startswith('filecheck '):
prefix_match = CHECK_PREFIX_RE.match(filecheck_cmd)
if prefix_match:
prefix = prefix_match.group(1)
else:
prefix = 'CHECK'
output = run_command(args, test, tmp, commands[0])
if prefix:
if kind == 'wat':
module_outputs = parse_output_modules(output)
elif kind == 'fuzz-exec':
module_outputs = parse_output_fuzz_exec(output)
else:
assert False, "unknown output kind"
for i in range(len(module_outputs)):
if len(command_output) == i:
command_output.append({})
command_output[i][prefix] = module_outputs[i]
return command_output
def update_test(args, test, lines, tmp):
# Do not update `args` directly because the changes should only apply to the
# current test.
all_items = args.all_items
output_kind = args.output
if lines and script_name in lines[0]:
# Apply previously used options for this file
if '--all-items' in lines[0]:
all_items = True
output = re.search(r'--output=(?P<kind>\S*)', lines[0])
if output:
output_kind = output.group('kind')
# Skip the notice if it is already in the output
lines = lines[1:]
command_output = get_command_output(args, output_kind, test, lines, tmp)
prefixes = set(prefix
for module_output in command_output
for prefix in module_output.keys())
check_line_re = re.compile(r'^\s*;;\s*(' + '|'.join(prefixes) +
r')(?:-NEXT|-LABEL|-NOT)?:.*$')
# Filter out whitespace between check blocks
if lines:
filtered = [lines[0]]
for i in range(1, len(lines) - 1):
if lines[i] or not check_line_re.match(lines[i - 1]) or \
not check_line_re.match(lines[i + 1]):
filtered.append(lines[i])
filtered.append(lines[-1])
lines = filtered
named_items = []
for line in lines:
match = ITEM_RE.match(line)
if match:
_, kind, name = indentKindName(match)
named_items.append((kind, name))
script = script_name
if all_items:
script += ' --all-items'
if output_kind != 'wat':
script += f' --output={output_kind}'
output_lines = [NOTICE.format(script=script)]
def emit_checks(indent, prefix, lines):
def pad(line):
return line if not line or line.startswith(' ') else ' ' + line
output_lines.append(f'{indent};; {prefix}: {pad(lines[0])}')
for line in lines[1:]:
output_lines.append(f'{indent};; {prefix}-NEXT:{pad(line)}')
input_modules = [m.split('\n') for m in split_modules('\n'.join(lines))]
if len(input_modules) > len(command_output):
warn('Fewer output modules than input modules:'
'not all modules will get checks.')
# Remove extra newlines at the end of modules
input_modules = [m[:-1] for m in input_modules[:-1]] + [input_modules[-1]]
for module_idx in range(len(input_modules)):
output = command_output[module_idx] \
if module_idx < len(command_output) else {}
for line in input_modules[module_idx]:
# Skip pre-existing check lines; we will regenerate them.
if check_line_re.match(line):
continue
match = ITEM_RE.match(line)
if not match:
output_lines.append(line)
continue
indent, kind, name = indentKindName(match)
for prefix, items in output.items():
# If the output for this prefix contains an item with this
# name, emit all the items up to and including the matching
# item
has_item = False
for kind_name, lines in items:
if name and (kind, name) == kind_name:
has_item = True
break
if has_item:
first = True
while True:
kind_name, lines = items.pop(0)
if all_items or kind_name in named_items:
if not first:
output_lines.append('')
first = False
emit_checks(indent, prefix, lines)
if name and (kind, name) == kind_name:
break
output_lines.append(line)
# Output any remaining checks for each prefix
first = True
for prefix, items in output.items():
for kind_name, lines in items:
if all_items or kind_name in named_items:
if not first:
output_lines.append('')
first = False
emit_checks('', prefix, lines)
if args.dry_run:
print('\n'.join(output_lines))
else:
with open(test, 'w') as f:
for line in output_lines:
f.write(line + '\n')
def main():
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
'--binaryen-bin', dest='binaryen_bin', default='bin',
help=('Specifies the path to the Binaryen executables in the CMake build'
' directory. Default: bin/ of current directory (i.e. assume an'
' in-tree build).'))
parser.add_argument(
'--all-items', action='store_true',
help=('Emit checks for all module items, even those that do not appear'
' in the input.'))
parser.add_argument(
'--output', choices=['wat', 'fuzz-exec'], default='wat',
help=('The kind of output test commands are expected to produce.'))
parser.add_argument(
'-f', '--force', action='store_true',
help=('Generate FileCheck patterns even for test files whose existing '
'patterns were not generated by this script.'))
parser.add_argument(
'--dry-run', action='store_true',
help=('Print the updated test file contents instead of changing the '
'test files'))
parser.add_argument('tests', nargs='+', help='The test files to update')
args = parser.parse_args()
args.binaryen_bin = os.path.abspath(args.binaryen_bin)
tmp = tempfile.mktemp()
for test, lines in itertests(args):
update_test(args, test, lines, tmp)
if __name__ == '__main__':
main()
|