1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466
|
#! /usr/bin/python3
# Copyright (C) 2019-2020 Free Software Foundation, Inc.
# This file is part of the GNU C Library.
#
# The GNU C Library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# The GNU C Library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with the GNU C Library; if not, see
# <https://www.gnu.org/licenses/>.
"""Verifies that installed headers do not use any obsolete constructs:
* legacy BSD typedefs superseded by <stdint.h>:
ushort uint ulong u_char u_short u_int u_long u_intNN_t quad_t u_quad_t
(sys/types.h is allowed to _define_ these types, but not to use them
to define anything else).
"""
import argparse
import collections
import re
import sys
# Simplified lexical analyzer for C preprocessing tokens.
# Does not implement trigraphs.
# Does not implement backslash-newline in the middle of any lexical
# item other than a string literal.
# Does not implement universal-character-names in identifiers.
# Treats prefixed strings (e.g. L"...") as two tokens (L and "...")
# Accepts non-ASCII characters only within comments and strings.
# Caution: The order of the outermost alternation matters.
# STRING must be before BAD_STRING, CHARCONST before BAD_CHARCONST,
# BLOCK_COMMENT before BAD_BLOCK_COM before PUNCTUATOR, and OTHER must
# be last.
# Caution: There should be no capturing groups other than the named
# captures in the outermost alternation.
# For reference, these are all of the C punctuators as of C11:
# [ ] ( ) { } , ; ? ~
# ! != * *= / /= ^ ^= = ==
# # ##
# % %= %> %: %:%:
# & &= &&
# | |= ||
# + += ++
# - -= -- ->
# . ...
# : :>
# < <% <: << <<= <=
# > >= >> >>=
# The BAD_* tokens are not part of the official definition of pp-tokens;
# they match unclosed strings, character constants, and block comments,
# so that the regex engine doesn't have to backtrack all the way to the
# beginning of a broken construct and then emit dozens of junk tokens.
PP_TOKEN_RE_ = re.compile(r"""
(?P<STRING> \"(?:[^\"\\\r\n]|\\(?:[\r\n -~]|\r\n))*\")
|(?P<BAD_STRING> \"(?:[^\"\\\r\n]|\\[ -~])*)
|(?P<CHARCONST> \'(?:[^\'\\\r\n]|\\(?:[\r\n -~]|\r\n))*\')
|(?P<BAD_CHARCONST> \'(?:[^\'\\\r\n]|\\[ -~])*)
|(?P<BLOCK_COMMENT> /\*(?:\*(?!/)|[^*])*\*/)
|(?P<BAD_BLOCK_COM> /\*(?:\*(?!/)|[^*])*\*?)
|(?P<LINE_COMMENT> //[^\r\n]*)
|(?P<IDENT> [_a-zA-Z][_a-zA-Z0-9]*)
|(?P<PP_NUMBER> \.?[0-9](?:[0-9a-df-oq-zA-DF-OQ-Z_.]|[eEpP][+-]?)*)
|(?P<PUNCTUATOR>
[,;?~(){}\[\]]
| [!*/^=]=?
| \#\#?
| %(?:[=>]|:(?:%:)?)?
| &[=&]?
|\|[=|]?
|\+[=+]?
| -[=->]?
|\.(?:\.\.)?
| :>?
| <(?:[%:]|<(?:=|<=?)?)?
| >(?:=|>=?)?)
|(?P<ESCNL> \\(?:\r|\n|\r\n))
|(?P<WHITESPACE> [ \t\n\r\v\f]+)
|(?P<OTHER> .)
""", re.DOTALL | re.VERBOSE)
HEADER_NAME_RE_ = re.compile(r"""
< [^>\r\n]+ >
| " [^"\r\n]+ "
""", re.DOTALL | re.VERBOSE)
ENDLINE_RE_ = re.compile(r"""\r|\n|\r\n""")
# based on the sample code in the Python re documentation
Token_ = collections.namedtuple("Token", (
"kind", "text", "line", "column", "context"))
Token_.__doc__ = """
One C preprocessing token, comment, or chunk of whitespace.
'kind' identifies the token type, which will be one of:
STRING, CHARCONST, BLOCK_COMMENT, LINE_COMMENT, IDENT,
PP_NUMBER, PUNCTUATOR, ESCNL, WHITESPACE, HEADER_NAME,
or OTHER. The BAD_* alternatives in PP_TOKEN_RE_ are
handled within tokenize_c, below.
'text' is the sequence of source characters making up the token;
no decoding whatsoever is performed.
'line' and 'column' give the position of the first character of the
token within the source file. They are both 1-based.
'context' indicates whether or not this token occurred within a
preprocessing directive; it will be None for running text,
'<null>' for the leading '#' of a directive line (because '#'
all by itself on a line is a "null directive"), or the name of
the directive for tokens within a directive line, starting with
the IDENT for the name itself.
"""
def tokenize_c(file_contents, reporter):
"""Yield a series of Token objects, one for each preprocessing
token, comment, or chunk of whitespace within FILE_CONTENTS.
The REPORTER object is expected to have one method,
reporter.error(token, message), which will be called to
indicate a lexical error at the position of TOKEN.
If MESSAGE contains the four-character sequence '{!r}', that
is expected to be replaced by repr(token.text).
"""
Token = Token_
PP_TOKEN_RE = PP_TOKEN_RE_
ENDLINE_RE = ENDLINE_RE_
HEADER_NAME_RE = HEADER_NAME_RE_
line_num = 1
line_start = 0
pos = 0
limit = len(file_contents)
directive = None
at_bol = True
while pos < limit:
if directive == "include":
mo = HEADER_NAME_RE.match(file_contents, pos)
if mo:
kind = "HEADER_NAME"
directive = "after_include"
else:
mo = PP_TOKEN_RE.match(file_contents, pos)
kind = mo.lastgroup
if kind != "WHITESPACE":
directive = "after_include"
else:
mo = PP_TOKEN_RE.match(file_contents, pos)
kind = mo.lastgroup
text = mo.group()
line = line_num
column = mo.start() - line_start
adj_line_start = 0
# only these kinds can contain a newline
if kind in ("WHITESPACE", "BLOCK_COMMENT", "LINE_COMMENT",
"STRING", "CHARCONST", "BAD_BLOCK_COM", "ESCNL"):
for tmo in ENDLINE_RE.finditer(text):
line_num += 1
adj_line_start = tmo.end()
if adj_line_start:
line_start = mo.start() + adj_line_start
# Track whether or not we are scanning a preprocessing directive.
if kind == "LINE_COMMENT" or (kind == "WHITESPACE" and adj_line_start):
at_bol = True
directive = None
else:
if kind == "PUNCTUATOR" and text == "#" and at_bol:
directive = "<null>"
elif kind == "IDENT" and directive == "<null>":
directive = text
at_bol = False
# Report ill-formed tokens and rewrite them as their well-formed
# equivalents, so downstream processing doesn't have to know about them.
# (Rewriting instead of discarding provides better error recovery.)
if kind == "BAD_BLOCK_COM":
reporter.error(Token("BAD_BLOCK_COM", "", line, column+1, ""),
"unclosed block comment")
text += "*/"
kind = "BLOCK_COMMENT"
elif kind == "BAD_STRING":
reporter.error(Token("BAD_STRING", "", line, column+1, ""),
"unclosed string")
text += "\""
kind = "STRING"
elif kind == "BAD_CHARCONST":
reporter.error(Token("BAD_CHARCONST", "", line, column+1, ""),
"unclosed char constant")
text += "'"
kind = "CHARCONST"
tok = Token(kind, text, line, column+1,
"include" if directive == "after_include" else directive)
# Do not complain about OTHER tokens inside macro definitions.
# $ and @ appear in macros defined by headers intended to be
# included from assembly language, e.g. sysdeps/mips/sys/asm.h.
if kind == "OTHER" and directive != "define":
self.error(tok, "stray {!r} in program")
yield tok
pos = mo.end()
#
# Base and generic classes for individual checks.
#
class ConstructChecker:
"""Scan a stream of C preprocessing tokens and possibly report
problems with them. The REPORTER object passed to __init__ has
one method, reporter.error(token, message), which should be
called to indicate a problem detected at the position of TOKEN.
If MESSAGE contains the four-character sequence '{!r}' then that
will be replaced with a textual representation of TOKEN.
"""
def __init__(self, reporter):
self.reporter = reporter
def examine(self, tok):
"""Called once for each token in a header file.
Call self.reporter.error if a problem is detected.
"""
raise NotImplementedError
def eof(self):
"""Called once at the end of the stream. Subclasses need only
override this if it might have something to do."""
pass
class NoCheck(ConstructChecker):
"""Generic checker class which doesn't do anything. Substitute this
class for a real checker when a particular check should be skipped
for some file."""
def examine(self, tok):
pass
#
# Check for obsolete type names.
#
# The obsolete type names we're looking for:
OBSOLETE_TYPE_RE_ = re.compile(r"""\A
(__)?
( quad_t
| u(?: short | int | long
| _(?: char | short | int(?:[0-9]+_t)? | long | quad_t )))
\Z""", re.VERBOSE)
class ObsoleteNotAllowed(ConstructChecker):
"""Don't allow any use of the obsolete typedefs."""
def examine(self, tok):
if OBSOLETE_TYPE_RE_.match(tok.text):
self.reporter.error(tok, "use of {!r}")
class ObsoletePrivateDefinitionsAllowed(ConstructChecker):
"""Allow definitions of the private versions of the
obsolete typedefs; that is, 'typedef [anything] __obsolete;'
"""
def __init__(self, reporter):
super().__init__(reporter)
self.in_typedef = False
self.prev_token = None
def examine(self, tok):
# bits/types.h hides 'typedef' in a macro sometimes.
if (tok.kind == "IDENT"
and tok.text in ("typedef", "__STD_TYPE")
and tok.context is None):
self.in_typedef = True
elif tok.kind == "PUNCTUATOR" and tok.text == ";" and self.in_typedef:
self.in_typedef = False
if self.prev_token.kind == "IDENT":
m = OBSOLETE_TYPE_RE_.match(self.prev_token.text)
if m and m.group(1) != "__":
self.reporter.error(self.prev_token, "use of {!r}")
self.prev_token = None
else:
self._check_prev()
self.prev_token = tok
def eof(self):
self._check_prev()
def _check_prev(self):
if (self.prev_token is not None
and self.prev_token.kind == "IDENT"
and OBSOLETE_TYPE_RE_.match(self.prev_token.text)):
self.reporter.error(self.prev_token, "use of {!r}")
class ObsoletePublicDefinitionsAllowed(ConstructChecker):
"""Allow definitions of the public versions of the obsolete
typedefs. Only specific forms of definition are allowed:
typedef __obsolete obsolete; // identifiers must agree
typedef __uintN_t u_intN_t; // N must agree
typedef unsigned long int ulong;
typedef unsigned short int ushort;
typedef unsigned int uint;
"""
def __init__(self, reporter):
super().__init__(reporter)
self.typedef_tokens = []
def examine(self, tok):
if tok.kind in ("WHITESPACE", "BLOCK_COMMENT",
"LINE_COMMENT", "NL", "ESCNL"):
pass
elif (tok.kind == "IDENT" and tok.text == "typedef"
and tok.context is None):
if self.typedef_tokens:
self.reporter.error(tok, "typedef inside typedef")
self._reset()
self.typedef_tokens.append(tok)
elif tok.kind == "PUNCTUATOR" and tok.text == ";":
self._finish()
elif self.typedef_tokens:
self.typedef_tokens.append(tok)
def eof(self):
self._reset()
def _reset(self):
while self.typedef_tokens:
tok = self.typedef_tokens.pop(0)
if tok.kind == "IDENT" and OBSOLETE_TYPE_RE_.match(tok.text):
self.reporter.error(tok, "use of {!r}")
def _finish(self):
if not self.typedef_tokens: return
if self.typedef_tokens[-1].kind == "IDENT":
m = OBSOLETE_TYPE_RE_.match(self.typedef_tokens[-1].text)
if m:
if self._permissible_public_definition(m):
self.typedef_tokens.clear()
self._reset()
def _permissible_public_definition(self, m):
if m.group(1) == "__": return False
name = m.group(2)
toks = self.typedef_tokens
ntok = len(toks)
if ntok == 3 and toks[1].kind == "IDENT":
defn = toks[1].text
n = OBSOLETE_TYPE_RE_.match(defn)
if n and n.group(1) == "__" and n.group(2) == name:
return True
if (name[:5] == "u_int" and name[-2:] == "_t"
and defn[:6] == "__uint" and defn[-2:] == "_t"
and name[5:-2] == defn[6:-2]):
return True
return False
if (name == "ulong" and ntok == 5
and toks[1].kind == "IDENT" and toks[1].text == "unsigned"
and toks[2].kind == "IDENT" and toks[2].text == "long"
and toks[3].kind == "IDENT" and toks[3].text == "int"):
return True
if (name == "ushort" and ntok == 5
and toks[1].kind == "IDENT" and toks[1].text == "unsigned"
and toks[2].kind == "IDENT" and toks[2].text == "short"
and toks[3].kind == "IDENT" and toks[3].text == "int"):
return True
if (name == "uint" and ntok == 4
and toks[1].kind == "IDENT" and toks[1].text == "unsigned"
and toks[2].kind == "IDENT" and toks[2].text == "int"):
return True
return False
def ObsoleteTypedefChecker(reporter, fname):
"""Factory: produce an instance of the appropriate
obsolete-typedef checker for FNAME."""
# The obsolete rpc/ and rpcsvc/ headers are allowed to use the
# obsolete types, because it would be more trouble than it's
# worth to remove them from headers that we intend to stop
# installing eventually anyway.
if (fname.startswith("rpc/")
or fname.startswith("rpcsvc/")
or "/rpc/" in fname
or "/rpcsvc/" in fname):
return NoCheck(reporter)
# bits/types.h is allowed to define the __-versions of the
# obsolete types.
if (fname == "bits/types.h"
or fname.endswith("/bits/types.h")):
return ObsoletePrivateDefinitionsAllowed(reporter)
# sys/types.h is allowed to use the __-versions of the
# obsolete types, but only to define the unprefixed versions.
if (fname == "sys/types.h"
or fname.endswith("/sys/types.h")):
return ObsoletePublicDefinitionsAllowed(reporter)
return ObsoleteNotAllowed(reporter)
#
# Master control
#
class HeaderChecker:
"""Perform all of the checks on each header. This is also the
"reporter" object expected by tokenize_c and ConstructChecker.
"""
def __init__(self):
self.fname = None
self.status = 0
def error(self, tok, message):
self.status = 1
if '{!r}' in message:
message = message.format(tok.text)
sys.stderr.write("{}:{}:{}: error: {}\n".format(
self.fname, tok.line, tok.column, message))
def check(self, fname):
self.fname = fname
try:
with open(fname, "rt", encoding="utf-8") as fp:
contents = fp.read()
except OSError as e:
sys.stderr.write("{}: {}\n".format(fname, e.strerror))
self.status = 1
return
typedef_checker = ObsoleteTypedefChecker(self, self.fname)
for tok in tokenize_c(contents, self):
typedef_checker.examine(tok)
def main():
ap = argparse.ArgumentParser(description=__doc__)
ap.add_argument("headers", metavar="header", nargs="+",
help="one or more headers to scan for obsolete constructs")
args = ap.parse_args()
checker = HeaderChecker()
for fname in args.headers:
# Headers whose installed name begins with "finclude/" contain
# Fortran, not C, and this program should completely ignore them.
if not (fname.startswith("finclude/") or "/finclude/" in fname):
checker.check(fname)
sys.exit(checker.status)
main()
|