1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96
|
from dataclasses import dataclass
from mypy.nodes import CallExpr, RefExpr, Var
from refurb.error import Error
@dataclass
class ErrorInfo(Error):
"""
If you are passing a compiled regular expression to a regex function,
consider calling the regex method on the pattern itself: It is faster, and
can improve readability.
Bad:
```
import re
COMMENT = re.compile(".*(#.*)")
found_comment = re.match(COMMENT, "this is a # comment")
```
Good:
```
import re
COMMENT = re.compile(".*(#.*)")
found_comment = COMMENT.match("this is a # comment")
```
"""
name = "use-regex-pattern-methods"
code = 170
categories = ("readability", "regex")
# This table represents the function calls that we will emit errors for. The
# ellipsis are positional args, and the strings are optional args/kwargs.
# The number of required/optional args must match, and if an optional arg
# is used, it must either be unnamed (positional), or named (kwarg), and if
# so, must match the string name.
REGEX_FUNC_ARGS = {
"re.search": (..., ...),
"re.match": (..., ...),
"re.fullmatch": (..., ...),
"re.split": (..., ..., "maxsplit"),
"re.findall": (..., ...),
"re.finditer": (..., ...),
"re.sub": (..., ..., ..., "count"),
"re.subn": (..., ..., ..., "count"),
}
def build_args(arg_names: list[str | None]) -> str:
args = ["..." if arg is None else f"{arg}=..." for arg in arg_names]
return ", ".join(args)
def check(node: CallExpr, errors: list[Error]) -> None:
match node:
case CallExpr(
callee=RefExpr(fullname=fullname, name=name), # type: ignore
args=[pattern, *_] as args,
arg_names=arg_names,
):
arg_format = REGEX_FUNC_ARGS.get(fullname)
if not arg_format:
return
match pattern:
case RefExpr(node=Var(type=ty)) if (str(ty).startswith("re.Pattern[")):
pass
case _:
return
min_len = len([arg for arg in arg_format if arg is ...])
if len(args) < min_len or len(args) > len(arg_format):
return
if isinstance(arg_format[-1], str):
if arg_names[-1] and arg_names[-1] != arg_format[-1]:
return
params = build_args(arg_names[1:])
msg = f"Replace `{fullname}(x, {params})` with `x.{name}({params})`" # noqa: E501
errors.append(ErrorInfo.from_node(node, msg))
|