1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427
|
#!/usr/bin/env python
"""
Usage: python vtk_reindent_code.py [--test] <file1> [<file2> ...]
This script takes old-style "Whitesmiths" indented VTK source files as
input, and re-indents the braces according to the new VTK style.
Only the brace indentation is modified.
If called with the --test option, then it will print an error message
for each file that it would modify, but it will not actually modify the
files.
Written by David Gobbi on Sep 30, 2015.
"""
import sys
import os
import re
def reindent(filename, dry_run=False):
"""Reindent a file from Whitesmiths style to Allman style"""
# The first part of this function clears all strings and comments
# where non-grammatical braces might be hiding. These changes will
# not be saved back to the file, they just simplify the parsing.
# look for ', ", /*, and //
keychar = re.compile(r"""[/"']""")
# comments of the form /* */
c_comment = re.compile(r"\/\*(\*(?!\/)|[^*])*\*\/")
c_comment_start = re.compile(r"\/\*(\*(?!\/)|[^*])*$")
c_comment_end = re.compile(r"^(\*(?!\/)|[^*])*\*\/")
# comments of the form //
cpp_comment = re.compile(r"\/\/.*")
# string literals ""
string_literal = re.compile(r'"([^\\"]|\\.)*"')
string_literal_start = re.compile(r'"([^\\"]|\\.)*\\$')
string_literal_end = re.compile(r'^([^\\"]|\\.)*"')
# character literals ''
char_literal = re.compile(r"'([^\\']|\\.)*'")
char_literal_start = re.compile(r"'([^\\']|\\.)*\\$")
char_literal_end = re.compile(r"^([^\\']|\\.)*'")
# read the file
try:
f = open(filename)
lines = f.readlines()
f.close()
except:
sys.stderr.write(filename + ": ")
sys.stderr.write(str(sys.exc_info()[1]) + "\n")
sys.exit(1)
# convert strings to "", char constants to '', and remove comments
n = len(lines) # 'lines' is the input
newlines = [] # 'newlines' is the output
cont = None # set if e.g. we found /* and we are looking for */
for i in range(n):
line = lines[i].rstrip()
if cont is not None:
# look for closing ' or " or */
match = cont.match(line)
if match:
# found closing ' or " or */
line = line[match.end():]
cont = None
else:
# this whole line is in the middle of a string or comment
if cont is c_comment_end:
# still looking for */, clear the whole line
newlines.append("")
continue
else:
# still looking for ' or ", set line to backslash
newlines.append('\\')
continue
# start at column 0 and search for ', ", /*, or //
pos = 0
while True:
match = keychar.search(line, pos)
if match is None:
break
pos = match.start()
end = match.end()
# was the match /* ... */ ?
match = c_comment.match(line, pos)
if match:
line = line[0:pos] + " " + line[match.end():]
pos += 1
continue
# does the line have /* ... without the */ ?
match = c_comment_start.match(line, pos)
if match:
if line[-1] == '\\':
line = line[0:pos] + ' \\'
else:
line = line[0:pos]
cont = c_comment_end
break
# does the line have // ?
match = cpp_comment.match(line, pos)
if match:
if line[-1] == '\\':
line = line[0:pos] + ' \\'
else:
line = line[0:pos]
break
# did we find "..." ?
match = string_literal.match(line, pos)
if match:
line = line[0:pos] + "\"\"" + line[match.end():]
pos += 2
continue
# did we find "... without the final " ?
match = string_literal_start.match(line, pos)
if match:
line = line[0:pos] + "\"\"\\"
cont = string_literal_end
break
# did we find '...' ?
match = char_literal.match(line, pos)
if match:
line = line[0:pos] + "\' \'" + line[match.end():]
pos += 3
continue
# did we find '... without the final ' ?
match = char_literal_start.match(line, pos)
if match:
line = line[0:pos] + "\' \'\\"
cont = char_literal_end
break
# if we got to here, we found / that wasn't /* or //
pos += 1
# strip any trailing whitespace!
newlines.append(line.rstrip())
# The second part of this function looks for braces in the simplified
# code that we wrote to "newlines" after removing the contents of all
# string literals, character literals, and comments.
# Whenever we encounter an opening brace, we push its position onto a
# stack. Whenever we encounter the matching closing brace, we indent
# the braces as a pair.
# For #if directives, we check whether there are mismatched braces
# within the conditional block, and if so, we print a warning and reset
# the stack to the depth that it had at the start of the block.
# For #define directives, we save the stack and then restart counting
# braces until the end of the #define. Then we restore the stack.
# all changes go through this function
lines_changed = {} # keeps track of each line that was changed
def changeline(i, newtext, lines_changed=lines_changed):
if newtext != lines[i]:
lines[i] = newtext
lines_changed[i] = newtext
# we push a tuple (delim, row, col, newcol) onto this stack whenever
# we find a {, (, or [ delimiter, this keeps track of where we found
# the delimiter and what column we want to move it to
stack = []
lastdepth = 0
# this is a superstack that allows us to save the entire stack when we
# enter into an #if conditional block
dstack = []
# these are syntactic elements we need to look for
directive = re.compile(r"\s*#\s*(..)")
label = re.compile(r"""(case(?!\w)([^:]|::)+|\w+\s*(::\s*)*\s*:(?!:))""")
cflow = re.compile(r"(if|else|for|do|while|switch)(?!\w)")
delims = re.compile(r"[{}()\[\];]")
spaces = re.compile(r"\s*")
other = re.compile(r"(\w+|[^{}()\[\];\w\s]+)\s*")
cplusplus = re.compile(r"\s*#\s*ifdef\s+__cplusplus")
indentation = 0 # current indentation column
continuation = False # true if line continues an unfinished statement
new_context = True # also set when we enter a #define statement
in_else = False # set if in an #else
in_define = False # set if in #define
in_assign = False # set to deal with "= {" or #define x {"
leaving_define = False # set if at the end of a #define
save_stack = None # save stack when entering a #define
for i in range(n):
line = newlines[i]
# restore stack when leaving #define
if leaving_define:
stack, indentation, continuation = save_stack
save_stack = None
in_define = False
leaving_define = False
# handle #if conditionals
is_directive = False
in_else = False
match = directive.match(line)
if match:
is_directive = True
if match.groups()[0] == 'if':
dstack.append((list(stack), indentation, continuation,
line))
elif match.groups()[0] in ('en', 'el'):
oldstack, oldindent, oldcont, dline = dstack.pop()
if len(stack) > len(oldstack) and not cplusplus.match(dline):
sys.stderr.write(filename + ":" + str(i) + ": ")
sys.stderr.write("mismatched delimiter in \"" +
dline + "\" block\n")
if match.groups()[0] == 'el':
in_else = True
indentation = oldindent
continuation = oldcont
stack = oldstack
dstack.append((list(stack), indentation, continuation,
line))
elif match.groups()[0] == 'de':
in_define = True
leaving_define = False
save_stack = (stack, indentation, continuation)
stack = []
new_context = True
# remove backslash at end of line, if present
if len(line) > 0 and line[-1] == '\\':
line = line[0:-1].rstrip()
elif in_define:
leaving_define = True
if not is_directive and len(line) > 0 and not continuation:
# what is the indentation of the current line?
match = spaces.match(line)
if not line[match.end()] == '{':
indentation = match.end()
continuation = True
# new_context marks beginning of a file or a macro
if new_context:
continuation = False
indentation = 0
new_context = False
# skip initial whitespace
if is_directive:
pos = directive.match(line).end()
else:
pos = spaces.match(line).end()
# check for a label e.g. case
match = label.match(line, pos)
if match:
base = True
for item in stack:
if item[0] != '{':
base = False
if base:
word = re.match(r"\w*", match.group())
if word in ("case", "default"):
indentation = pos
continuation = False
# check for multiple labels on the same line
while match:
pos = spaces.match(line, match.end()).end()
match = label.match(line, pos)
# parse the line
while pos != len(line):
# check for if, else, for, while, do, switch
match = cflow.match(line, pos)
if match:
# if we are at the beginning of the line
if spaces.match(line).end() == pos:
indentation = pos
pos = spaces.match(line, match.end()).end()
continue
# check for a delimiter {} () [] or ;
match = delims.match(line, pos)
if not match:
# check for any other identifiers, operators
match = other.match(line, pos)
if match:
pos = match.end()
continue
else:
break
# found a delimiter
delim = line[pos]
if delim in ('(', '['):
# save delim, row, col, and current indentation
stack.append((delim, i, pos, indentation))
elif delim == '{':
if in_assign or line[0:pos-1].rstrip()[-1:] == "=":
# do not adjust braces for initializer lists
stack.append((delim, i, -1, indentation))
elif ((in_else or in_define) and spaces.sub("", line) == "{"):
# for opening braces that might have no match
indent = " "*indentation
changeline(i, spaces.sub(indent, lines[i], count=1))
stack.append((delim, i, pos, indentation))
else:
# save delim, row, col, and previous indentation
stack.append((delim, i, pos, indentation))
if spaces.sub("", newlines[i][0:pos]) == "":
indentation += 2
continuation = False
elif delim == ';':
# ';' marks end of statement unless inside for (;;)
if len(stack) == 0 or stack[-1][0] == '{':
continuation = False
else:
# found a ')', ']', or '}' delimiter, so pop its partner
try:
ldelim, j, k, indentation = stack.pop()
in_assign = (k < 0)
except IndexError:
ldelim = ""
if ldelim != {'}':'{', ')':'(', ']':'['}[delim]:
sys.stderr.write(filename + ":" + str(i) + ": ")
sys.stderr.write("mismatched \'" + delim + "\'\n")
# adjust the indentation of matching '{', '}'
if (ldelim == '{' and delim == '}' and not in_assign and
spaces.sub("", line[0:pos]) == ""):
if spaces.sub("", newlines[j][0:k]) == "":
indent = " "*indentation
changeline(j, spaces.sub(indent, lines[j], count=1))
changeline(i, spaces.sub(indent, lines[i], count=1))
elif i != j:
indent = " "*indentation
changeline(i, spaces.sub(indent, lines[i], count=1))
if delim == '}':
continuation = False
# eat whitespace and continue
pos = spaces.match(line, match.end()).end()
# check for " = " and #define assignments for the sake of
# the { inializer list } that might be on the following line
if len(line) > 0:
if (line[-1] == '=' or
(is_directive and in_define and not leaving_define)):
in_assign = True
elif not is_directive:
in_assign = False
if len(dstack) != 0:
sys.stderr.write(filename + ": ")
sys.stderr.write("mismatched #if conditional.\n")
if len(stack) != 0:
sys.stderr.write(filename + ":" + str(stack[0][1]) + ": ")
sys.stderr.write("no match for " + stack[0][0] +
" before end of file.\n")
if lines_changed:
# remove any trailing whitespace
trailing = re.compile(r" *$")
for i in range(n):
lines[i] = trailing.sub("", lines[i])
while n > 0 and lines[n-1].rstrip() == "":
n -= 1
if dry_run:
errcount = len(lines_changed)
line_numbers = list(lines_changed.keys())
line_numbers.sort()
line_numbers = [str(l + 1) for l in line_numbers[0:10] ]
if errcount > len(line_numbers):
line_numbers.append("...")
sys.stderr.write("Warning: " + filename +
": incorrect brace indentation on " +
str(errcount) +
(" lines: ", "line: ")[errcount == 1] +
", ".join(line_numbers) + "\n")
else:
# rewrite the file
ofile = open(filename, 'w')
ofile.writelines(lines)
ofile.close()
return True
return False
if __name__ == "__main__":
# ignore generated files
ignorefiles = ["lex.yy.c", "vtkParse.tab.c"]
files = []
opt_ignore = False # ignore all further options
opt_test = False # the --test option
for arg in sys.argv[1:]:
if arg[0:1] == '-' and not opt_ignore:
if arg == '--':
opt_ignore = True
elif arg == '--test':
opt_test = True
else:
sys.stderr.write("%s: unrecognized option %s\n" %
(os.path.split(sys.argv[0])[-1], arg))
sys.exit(1)
elif os.path.split(arg)[-1] not in ignorefiles:
files.append(arg)
# if --test was set, whenever a file needs modification, we set
# "failed" and continue checking the rest of the files
failed = False
for filename in files:
# repeat until no further changes occur
while reindent(filename, dry_run=opt_test):
if opt_test:
failed = True
break
if failed:
sys.exit(1)
|