1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877
|
package Perl::Critic::Policy::RegularExpressions::ProhibitUnusedCapture;
use 5.010001;
use strict;
use warnings;
use List::SomeUtils qw(none);
use Readonly;
use Scalar::Util qw(refaddr);
use Perl::Critic::Utils qw{
:booleans :characters :severities hashify precedence_of
split_nodes_on_comma
};
use parent 'Perl::Critic::Policy';
our $VERSION = '1.156';
#-----------------------------------------------------------------------------
Readonly::Scalar my $SPLIT => q{split};
Readonly::Scalar my $WHILE => q{while};
Readonly::Hash my %ZERO_BASED_CAPTURE_REFERENCE =>
hashify( qw< ${^CAPTURE} > );
# TODO: additional logic to prevent ${^CAPTURE_ALL}[n] from being recognized
# as a use of capture variable n.
Readonly::Hash my %CAPTURE_REFERENCE => (
hashify( qw< $+ $- ${^CAPTURE_ALL} > ),
%ZERO_BASED_CAPTURE_REFERENCE );
Readonly::Hash my %CAPTURE_REFERENCE_ENGLISH => (
hashify( qw{ $LAST_PAREN_MATCH $LAST_MATCH_START $LAST_MATCH_END } ),
%CAPTURE_REFERENCE );
Readonly::Hash my %CAPTURE_ARRAY => hashify( qw< @- @+ @{^CAPTURE} > );
Readonly::Hash my %CAPTURE_ARRAY_ENGLISH => (
hashify( qw< @LAST_MATCH_START @LAST_MATCH_END > ),
%CAPTURE_ARRAY );
Readonly::Hash my %CAPTURE_HASH => hashify( qw< %- %+ %{^CAPTURE} >);
Readonly::Hash my %CAPTURE_HASH_ENGLISH => (
hashify( qw< %LAST_PAREN_MATCH > ),
%CAPTURE_HASH );
Readonly::Scalar my $DESC => q{Only use a capturing group if you plan to use the captured value};
Readonly::Scalar my $EXPL => [252];
#-----------------------------------------------------------------------------
sub supported_parameters { return qw() }
sub default_severity { return $SEVERITY_MEDIUM }
sub default_themes { return qw( core pbp maintenance ) }
sub applies_to {
return qw< PPI::Token::Regexp::Match PPI::Token::Regexp::Substitute >;
}
#-----------------------------------------------------------------------------
Readonly::Scalar my $NUM_CAPTURES_FOR_GLOBAL => 100; # arbitrarily large number
sub violates {
my ( $self, $elem, $doc ) = @_;
# optimization: don't bother parsing the regexp if there are no parens
return if 0 > index $elem->content(), '(';
my $re = $doc->ppix_regexp_from_element( $elem ) or return;
$re->failures() and return;
my $ncaptures = $re->max_capture_number() or return;
my @captures = ( undef ) x $ncaptures; # List of expected captures
my %named_captures; # List of expected named captures.
# Unlike the numbered capture logic, %named_captures
# entries are made undefined when a use of the name is
# found. Otherwise two hashes would be needed, one to
# become defined when a use is found, and one to hold
# the mapping of name to number.
foreach my $struct ( @{ $re->find( 'PPIx::Regexp::Structure::NamedCapture'
) || [] } ) {
# There can be more than one capture with the same name, so we need to
# record all of them. There will be duplications if the 'branch reset'
# "(?| ... )" pattern is used, but this is benign given how numbered
# captures are recorded.
push @{ $named_captures{ $struct->name() } ||= [] }, $struct->number();
}
# Look for references to the capture in the regex itself
return if _enough_uses_in_regexp( $re, \@captures, \%named_captures, $doc );
if ( $re->modifier_asserted( 'g' )
and not _check_if_in_while_condition_or_block( $elem ) ) {
$ncaptures = $NUM_CAPTURES_FOR_GLOBAL;
$#captures = $ncaptures - 1;
}
if ( !%named_captures ) {
return if _enough_assignments($elem, \@captures);
return if _is_in_slurpy_array_context($elem);
}
return if _enough_magic($elem, $re, \@captures, \%named_captures, $doc);
return $self->violation( $DESC, $EXPL, $elem );
}
# Find uses of both numbered and named capture variables in the regexp itself.
# Return true if all are used.
sub _enough_uses_in_regexp {
my ( $re, $captures, $named_captures, $doc ) = @_;
# Look for references to the capture in the regex itself. Note that this
# will also find backreferences in the replacement string of s///.
foreach my $token ( @{ $re->find( 'PPIx::Regexp::Token::Reference' )
|| [] } ) {
if ( $token->is_named() ) {
_record_named_capture( $token->name(), $captures, $named_captures );
} else {
_record_numbered_capture( $token->absolute(), $captures );
}
}
foreach my $token ( @{ $re->find(
'PPIx::Regexp::Token::Code' ) || [] } ) {
my $ppi = $token->ppi() or next;
_check_node_children( $ppi, {
regexp => $re,
numbered_captures => $captures,
named_captures => $named_captures,
document => $doc,
}, _make_regexp_checker() );
}
return ( none {not defined} @{$captures} )
&& ( !%{$named_captures} ||
none {defined} values %{$named_captures} );
}
sub _enough_assignments {
my ($elem, $captures) = @_;
# look backward for the assignment operator
my $psib = $elem->sprevious_sibling;
SIBLING:
while (1) {
return if !$psib;
if ($psib->isa('PPI::Token::Operator')) {
last SIBLING if q{=} eq $psib->content;
return if q{!~} eq $psib->content;
}
$psib = $psib->sprevious_sibling;
}
$psib = $psib->sprevious_sibling;
return if !$psib; # syntax error: '=' at the beginning of a statement???
if ($psib->isa('PPI::Token::Symbol')) {
# @foo = m/(foo)/
# @$foo = m/(foo)/
# %foo = m/(foo)/
# %$foo = m/(foo)/
return $TRUE if _symbol_is_slurpy($psib);
} elsif ($psib->isa('PPI::Structure::Block')) {
# @{$foo} = m/(foo)/
# %{$foo} = m/(foo)/
return $TRUE if _is_preceded_by_array_or_hash_cast($psib);
} elsif ($psib->isa('PPI::Structure::List')) {
# () = m/(foo)/
# ($foo) = m/(foo)/
# ($foo,$bar) = m/(foo)(bar)/
# (@foo) = m/(foo)(bar)/
# ($foo,@foo) = m/(foo)(bar)/
# ($foo,@$foo) = m/(foo)(bar)/
# ($foo,@{$foo}) = m/(foo)(bar)/
my @args = $psib->schildren;
return $TRUE if not @args; # empty list (perhaps the "goatse" operator) is slurpy
# Forward looking: PPI might change in v1.200 so schild(0) is a
# PPI::Statement::Expression.
if ( 1 == @args && $args[0]->isa('PPI::Statement::Expression') ) {
@args = $args[0]->schildren;
}
my @parts = split_nodes_on_comma(@args);
PART:
for my $i (0 .. $#parts) {
if (1 == @{$parts[$i]}) {
my $var = $parts[$i]->[0];
if ($var->isa('PPI::Token::Symbol') || $var->isa('PPI::Token::Cast')) {
return $TRUE if _has_array_sigil($var);
}
}
_record_numbered_capture( $i + 1, $captures );
# ith variable capture
}
}
return none {not defined} @{$captures};
}
sub _symbol_is_slurpy {
my ($symbol) = @_;
return $TRUE if _has_array_sigil($symbol);
return $TRUE if _has_hash_sigil($symbol);
return $TRUE if _is_preceded_by_array_or_hash_cast($symbol);
return;
}
sub _has_array_sigil {
my ($elem) = @_; # Works on PPI::Token::Symbol and ::Cast
return q{@} eq substr $elem->content, 0, 1;
}
sub _has_hash_sigil {
my ($elem) = @_; # Works on PPI::Token::Symbol and ::Cast
return q{%} eq substr $elem->content, 0, 1;
}
sub _is_preceded_by_array_or_hash_cast {
my ($elem) = @_;
my $psib = $elem->sprevious_sibling;
my $cast;
while ($psib && $psib->isa('PPI::Token::Cast')) {
$cast = $psib;
$psib = $psib->sprevious_sibling;
}
return if !$cast;
my $sigil = substr $cast->content, 0, 1;
return q{@} eq $sigil || q{%} eq $sigil;
}
sub _is_in_slurpy_array_context {
my ($elem) = @_;
# return true is the result of the regexp is passed to a subroutine.
# doesn't check for array context due to assignment.
# look backward for explicit regex operator
my $psib = $elem->sprevious_sibling;
if ($psib && $psib->content eq q{=~}) {
# Track back through value
$psib = _skip_lhs($psib);
}
if (!$psib) {
my $parent = $elem->parent;
return if !$parent;
if ($parent->isa('PPI::Statement')) {
$parent = $parent->parent;
return if !$parent;
}
# Return true if we have a list that isn't part of a foreach loop.
# TECHNICAL DEBT: This code is basically shared with
# RequireCheckingReturnValueOfEval. I don't want to put this code
# into Perl::Critic::Utils::*, but I don't have time to sort out
# PPIx::Utilities::Structure::List yet.
if ( $parent->isa('PPI::Structure::List') ) {
my $parent_statement = $parent->statement() or return $TRUE;
return $TRUE if not
$parent_statement->isa('PPI::Statement::Compound');
return $TRUE if $parent_statement->type() ne 'foreach';
}
return $TRUE if $parent->isa('PPI::Structure::Constructor');
if ($parent->isa('PPI::Structure::Block')) {
return $TRUE
if
refaddr($elem->statement)
eq refaddr([$parent->schildren]->[-1]);
}
return;
}
if ($psib->isa('PPI::Token::Operator')) {
# Most operators kill slurpiness (except assignment, which is handled elsewhere).
return q{,} eq $psib->content;
}
return $TRUE;
}
sub _skip_lhs {
my ($elem) = @_;
# TODO: better implementation to handle casts, expressions, subcalls, etc.
$elem = $elem->sprevious_sibling();
return $elem;
}
sub _enough_magic {
my ($elem, $re, $captures, $named_captures, $doc) = @_;
_check_for_magic($elem, $re, $captures, $named_captures, $doc);
return ( none {not defined} @{$captures} )
&& ( !%{$named_captures} ||
none {defined} values %{$named_captures} );
}
# void return
sub _check_for_magic {
my ($elem, $re, $captures, $named_captures, $doc) = @_;
# Search for $1..$9 in :
# * the rest of this statement
# * subsequent sibling statements
# * if this is in a conditional boolean, the if/else bodies of the conditional
# * if this is in a while/for condition, the loop body
# But NO intervening regexps!
# Package up the usual arguments for _check_rest_of_statement().
my $arg = {
regexp => $re,
numbered_captures => $captures,
named_captures => $named_captures,
document => $doc,
};
# Capture whether or not the regular expression is negated -- that
# is, whether it is preceded by the '!~' binding operator.
if ( my $prior_token = $elem->sprevious_sibling() ) {
$arg->{negated} = $prior_token->isa( 'PPI::Token::Operator' ) &&
q<!~> eq $prior_token->content();
}
return if ! _check_rest_of_statement( $elem, $arg );
my $parent = $elem->parent();
while ($parent && ! $parent->isa('PPI::Statement::Sub')) {
return if ! _check_rest_of_statement( $parent, $arg );
$parent = $parent->parent();
}
return;
}
# Check if we are in the condition or block of a 'while'
sub _check_if_in_while_condition_or_block {
my ( $elem ) = @_;
$elem or return;
my $parent = $elem->parent() or return;
$parent->isa( 'PPI::Statement' ) or return;
my $item = $parent = $parent->parent() or return;
if ( $item->isa( 'PPI::Structure::Block' ) ) {
$item = $item->sprevious_sibling() or return;
}
$item->isa( 'PPI::Structure::Condition' ) or return;
$item = $item->sprevious_sibling() or return;
$item->isa( 'PPI::Token::Word' ) or return;
return $WHILE eq $item->content();
}
{
# Shortcut operators '||', '//', and 'or' can cause everything after
# them to be skipped. 'and' trumps '||' and '//', and causes things
# to be evaluated again. The value is true to skip, false to cancel
# skipping.
Readonly::Hash my %SHORTCUT_OPERATOR => (
q<||> => $FALSE,
q<//> => $FALSE,
and => $TRUE,
or => $FALSE,
);
# RT #38942
# The issue in the ticket is that in something like
# if ( /(a)/ || /(b)/ ) {
# say $1
# }
# the capture variable can come from either /(a)/ or /(b)/. If we
# don't take into account the short-cutting nature of the '||' we
# erroneously conclude that the capture in /(a)/ is not used. So we
# need to skip every regular expression after an alternation.
#
# The trick is that we want to still mark magic variables, because
# of code like
# my $foo = $1 || $2;
# so we can't just ignore everything after an alternation.
#
# To do all this correctly, we have to track precedence, and start
# paying attention again if an 'and' is found after a '||'.
# Subroutine _make_regexp_checker() manufactures a snippet of code
# which is used to track regular expressions. It takes one optional
# argument, which is the snippet used to track the parent object's
# regular expressions.
#
# The snippet is passed each token encountered, and returns true if
# the scan for capture variables is to be stopped. This will happen
# if the token is a regular expression which is _not_ to the right
# of an alternation operator ('||', '//', or 'or'), or it _is_ to
# the right of an 'and', without an intervening alternation
# operator.
#
# If _make_regexp_checker() was passed a snippet which
# returns false on encountering a regular expression, the returned
# snippet always returns false, for the benefit of code like
# /(a)/ || ( /(b)/ || /(c)/ ).
sub _make_regexp_checker {
my ( $parent ) = @_;
$parent
and not $parent->()
and return sub { return $FALSE };
my $check = $TRUE;
my $precedence = 0;
return sub {
my ( $elem ) = @_;
$elem or return $check;
if ( $elem->isa( 'PPI::Token::Regexp' ) ) {
return _regexp_is_in_split( $elem ) ? $FALSE : $check;
}
if ( $elem->isa( 'PPI::Token::Structure' )
&& q<;> eq $elem->content() ) {
$check = $TRUE;
$precedence = 0;
return $FALSE;
}
$elem->isa( 'PPI::Token::Operator' )
or return $FALSE;
my $content = $elem->content();
defined( my $oper_check = $SHORTCUT_OPERATOR{$content} )
or return $FALSE;
my $oper_precedence = precedence_of( $content );
$oper_precedence >= $precedence
or return $FALSE;
$precedence = $oper_precedence;
$check = $oper_check;
return $FALSE;
};
}
}
# Argument is regexp.
# True if it is the regexp in a split()
sub _regexp_is_in_split {
my ( $elem ) = @_;
my $prev;
if ( ! ( $prev = $elem->sprevious_sibling() ) ) {
# Maybe we have split( /.../, ... )
my $stmt = $elem->statement()
or return $FALSE;
$stmt->parent()
or return $FALSE;
$prev = $elem->sprevious_sibling()
or return $FALSE;
}
return $prev->isa( 'PPI::Token::Word' ) && $SPLIT eq $prev->content();
}
# false if we hit another regexp
# The arguments are:
# $elem - The PPI::Element whose siblings are to be checked;
# $arg - A hash reference containing the following keys:
# regexp => the relevant PPIx::Regexp object;
# numbered_captures => a reference to the array used to track the
# use of numbered captures;
# named_captures => a reference to the hash used to track the
# use of named captures;
# negated => true if the regexp was bound to its target with the
# '!~' operator;
# document => a reference to the Perl::Critic::Document;
# Converted to passing the arguments everyone gets in a hash because of
# the need to add the 'negated' argument, which would put us at six
# arguments.
sub _check_rest_of_statement {
my ( $elem, $arg ) = @_;
my $checker = _make_regexp_checker();
my $nsib = $elem->snext_sibling;
# If we are an if (or elsif) and the result of the regexp is
# negated, we skip the first block found. RT #69867
if ( $arg->{negated} && _is_condition_of_if_statement( $elem ) ) {
while ( $nsib && ! $nsib->isa( 'PPI::Structure::Block' ) ) {
$nsib = $nsib->snext_sibling();
}
$nsib and $nsib = $nsib->snext_sibling();
}
while ($nsib) {
return if $checker->($nsib);
if ($nsib->isa('PPI::Node')) {
return if ! _check_node_children($nsib, $arg, $checker );
} else {
_mark_magic( $nsib, $arg->{regexp}, $arg->{numbered_captures},
$arg->{named_captures}, $arg->{document} );
}
$nsib = $nsib->snext_sibling;
}
return $TRUE;
}
{
Readonly::Hash my %IS_IF_STATEMENT => hashify( qw{ if elsif } );
# Return true if the argument is the condition of an if or elsif
# statement, otherwise return false.
sub _is_condition_of_if_statement {
my ( $elem ) = @_;
$elem
and $elem->isa( 'PPI::Structure::Condition' )
or return $FALSE;
my $psib = $elem->sprevious_sibling()
or return $FALSE;
$psib->isa( 'PPI::Token::Word' )
or return $FALSE;
return $IS_IF_STATEMENT{ $psib->content() };
}
}
# false if we hit another regexp
# The arguments are:
# $elem - The PPI::Node whose children are to be checked;
# $arg - A hash reference containing the following keys:
# regexp => the relevant PPIx::Regexp object;
# numbered_captures => a reference to the array used to track the
# use of numbered captures;
# named_captures => a reference to the hash used to track the
# use of named captures;
# document => a reference to the Perl::Critic::Document;
# $parent_checker - The parent's regexp checking code snippet,
# manufactured by _make_regexp_checker(). This argument is not in
# the $arg hash because that hash is shared among levels of the
# parse tree, whereas the regexp checker is not.
# TODO the things in the $arg hash are widely shared among the various
# pieces/parts of this policy; maybe more subroutines should use this
# hash rather than passing all this stuff around as individual
# arguments. This particular subroutine got the hash-reference treatment
# because Subroutines::ProhibitManyArgs started complaining when the
# checker argument was added.
sub _check_node_children {
my ($elem, $arg, $parent_checker) = @_;
# caveat: this will descend into subroutine definitions...
my $checker = _make_regexp_checker($parent_checker);
for my $child ($elem->schildren) {
return if $checker->($child);
if ($child->isa('PPI::Node')) {
return if ! _check_node_children($child, $arg, $checker);
} else {
_mark_magic($child, $arg->{regexp},
$arg->{numbered_captures}, $arg->{named_captures},
$arg->{document});
}
}
return $TRUE;
}
sub _mark_magic {
my ($elem, $re, $captures, $named_captures, $doc) = @_;
# If we're a double-quotish element, we need to grub through its
# content. RT #38942
if ( _is_double_quotish_element( $elem ) ) {
_mark_magic_in_content(
$elem->content(), $re, $captures, $named_captures, $doc );
return;
}
# Ditto a here document, though the logic is different. RT #38942
if ( $elem->isa( 'PPI::Token::HereDoc' ) ) {
$elem->content() =~ m/ \A << ~? \s* ' /sxm
or _mark_magic_in_content(
join( $EMPTY, $elem->heredoc() ), $re, $captures,
$named_captures, $doc );
return;
}
# Only interested in magic, or known English equivalent.
my $content = $elem->content();
my ( $capture_ref, $capture_array, $capture_hash ) = $doc->uses_module( 'English' ) ?
( \%CAPTURE_REFERENCE_ENGLISH, \%CAPTURE_ARRAY_ENGLISH, \%CAPTURE_HASH_ENGLISH ) :
( \%CAPTURE_REFERENCE, \%CAPTURE_ARRAY, \%CAPTURE_HASH );
$elem->isa( 'PPI::Token::Magic' )
or $capture_ref->{$content}
or $capture_array->{$content}
or $capture_hash->{$content}
or return;
if ( $content =~ m/ \A \$ ( \d+ ) /xms ) {
# Record if we see $1, $2, $3, ...
my $num = $1;
if (0 < $num) { # don't mark $0
# Only mark the captures we really need -- don't mark superfluous magic vars
if ($num <= @{$captures}) {
_record_numbered_capture( $num, $captures );
}
}
} elsif ( $capture_array->{$content} ) { # GitHub #778
foreach my $num ( 1 .. @{$captures} ) {
_record_numbered_capture( $num, $captures );
}
} elsif ( $capture_hash->{$content} ) {
foreach my $name ( keys %{$named_captures} ) {
_record_named_capture( $name, $captures, $named_captures );
}
} elsif ( $capture_ref->{$content} ) {
_mark_magic_subscripted_code( $elem, $re, $captures, $named_captures );
}
return;
}
# Record a named capture referenced by a hash or array found in code.
# The arguments are:
# $elem - The element that represents a subscripted capture variable;
# $re - The PPIx::Regexp object;
# $captures - A reference to the numbered capture array;
# $named_captures - A reference to the named capture hash.
sub _mark_magic_subscripted_code {
my ( $elem, $re, $captures, $named_captures ) = @_;
my $subscr = $elem->snext_sibling() or return;
$subscr->isa( 'PPI::Structure::Subscript' ) or return;
my $subval = $subscr->content();
_record_subscripted_capture(
$elem->content(), $subval, $re, $captures, $named_captures );
return;
}
# Find capture variables in the content of a double-quotish thing, and
# record their use. RT #38942. The arguments are:
# $content - The content() ( or heredoc() in the case of a here
# document) to be analyzed;
# $re - The PPIx::Regexp object;
# $captures - A reference to the numbered capture array;
# $named_captures - A reference to the named capture hash.
sub _mark_magic_in_content {
my ( $content, $re, $captures, $named_captures, $doc ) = @_;
my ( $capture_ref, $capture_array ) = $doc->uses_module( 'English' ) ?
( \%CAPTURE_REFERENCE_ENGLISH, \%CAPTURE_ARRAY_ENGLISH ) :
( \%CAPTURE_REFERENCE, \%CAPTURE_ARRAY );
while ( $content =~ m< ( [\$\@] (?:
[{] \^? (?: \w+ | . ) [}] | \w+ | . ) ) >sxmg ) {
my $name = $1;
$name =~ s/ \A ( [\$\@] ) [{] (?! \^ ) /$1/sxm
and $name =~ s/ [}] \z //sxm;
if ( $name =~ m/ \A \$ ( \d+ ) \z /sxm ) {
my $num = $1;
0 < $num
and $num <= @{ $captures }
and _record_numbered_capture( $num, $captures );
} elsif ( $capture_array->{$name} ) { # GitHub #778
foreach my $num ( 1 .. @{$captures} ) {
_record_numbered_capture( $num, $captures );
}
} elsif ( $capture_ref->{$name} &&
$content =~ m/ \G ( [{] [^}]+ [}] | [[] [^]] []] ) /smxgc )
{
_record_subscripted_capture(
$name, $1, $re, $captures, $named_captures );
}
}
return;
}
# Return true if the given element is double-quotish. Always returns
# false for a PPI::Token::HereDoc, since they're a different beast.
# RT #38942.
sub _is_double_quotish_element {
my ( $elem ) = @_;
$elem or return;
my $content = $elem->content();
if ( $elem->isa( 'PPI::Token::QuoteLike::Command' ) ) {
return $content !~ m/ \A qx \s* ' /sxm;
}
foreach my $class ( qw{
PPI::Token::Quote::Double
PPI::Token::Quote::Interpolate
PPI::Token::QuoteLike::Backtick
PPI::Token::QuoteLike::Readline
} ) {
$elem->isa( $class ) and return $TRUE;
}
return $FALSE;
}
# Record a subscripted capture, either hash dereference or array
# dereference. We assume that an array represents a numbered capture and
# a hash represents a named capture, since we have to handle (e.g.) both
# @+ and %+.
sub _record_subscripted_capture {
my ( $variable_name, $suffix, $re, $captures, $named_captures ) = @_;
if ( $suffix =~ m/ \A [{] ( .*? ) [}] /smx ) {
( my $name = $1 ) =~ s/ \A ( ["'] ) ( .*? ) \1 \z /$2/smx;
_record_named_capture( $name, $captures, $named_captures );
} elsif ( $suffix =~ m/ \A [[] \s* ( [-+]? \d+ ) \s* []] /smx ) {
# GitHub #778
# Mostly capture numbers encountered here are 1-based (e.g. @+, @-).
# But @{^CAPTURE} is zero-based, so we need to tweak the subscript
# before we record the capture number.
my $num = $1 + 0;
$num >= 0
and $ZERO_BASED_CAPTURE_REFERENCE{$variable_name}
and $num++;
_record_numbered_capture( $num, $captures, $re );
}
return;
}
# Because a named capture is also one or more numbered captures, the recording
# of the use of a named capture seemed complex enough to wrap in a subroutine.
sub _record_named_capture {
my ( $name, $captures, $named_captures ) = @_;
defined ( my $numbers = $named_captures->{$name} ) or return;
foreach my $capnum ( @{ $numbers } ) {
_record_numbered_capture( $capnum, $captures );
}
$named_captures->{$name} = undef;
return;
}
sub _record_numbered_capture {
my ( $number, $captures, $re ) = @_;
$re and $number < 0
and $number = $re->max_capture_number() + $number + 1;
return if $number <= 0;
$captures->[ $number - 1 ] = 1;
return;
}
1;
__END__
#-----------------------------------------------------------------------------
=pod
=for stopwords refactored
=head1 NAME
Perl::Critic::Policy::RegularExpressions::ProhibitUnusedCapture - Only use a capturing group if you plan to use the captured value.
=head1 AFFILIATION
This Policy is part of the core L<Perl::Critic|Perl::Critic>
distribution.
=head1 DESCRIPTION
Perl regular expressions have multiple types of grouping syntax. The
basic parentheses (e.g. C<m/(foo)/>) captures into the magic variable
C<$1>. Non-capturing groups (e.g. C<m/(?:foo)/>) are useful because
they have better runtime performance and do not copy strings to the
magic global capture variables.
It's also easier on the maintenance programmer if you consistently use
capturing vs. non-capturing groups, because that programmer can tell
more easily which regexps can be refactored without breaking
surrounding code which may use the captured values.
=head1 CONFIGURATION
This Policy is not configurable except for the standard options.
=head1 CAVEATS
=head2 C<qr//> interpolation
This policy can be confused by interpolation of C<qr//> elements, but
those are always false negatives. For example:
my $foo_re = qr/(foo)/;
my ($foo) = m/$foo_re (bar)/x;
A human can tell that this should be a violation because there are two
captures but only the first capture is used, not the second. The
policy only notices that there is one capture in the regexp and
remains happy.
=head2 C<@->, C<@+>, C<$LAST_MATCH_START> and C<$LAST_MATCH_END>
This policy will only recognize capture groups referred to by these
variables if the use is subscripted by a literal integer.
=head2 C<$^N> and C<$LAST_SUBMATCH_RESULT>
This policy will not recognize capture groups referred to only by these
variables, because there is in general no way by static analysis to
determine which capture group is referred to. For example,
m/ (?: (A[[:alpha:]]+) | (N\d+) ) (?{$foo=$^N}) /smx
makes use of the first capture group if it matches, or the second
capture group if the first does not match but the second does.
=head2 split()
Normally, this policy thinks that if a capture is used at all it must be
used before the next regular expression in the same scope. The regular
expression in a C<split()> needs to be exempted because it does not
affect the caller's capture variables.
At present, this policy recognizes and exempts the regular expressions
in
split /.../, ...
and
split( /.../, ... )
but more exotic syntax may produce false positives.
=head1 CREDITS
Initial development of this policy was supported by a grant from the
Perl Foundation.
=head1 AUTHOR
Chris Dolan <cdolan@cpan.org>
=head1 COPYRIGHT
Copyright (c) 2007-2023 Chris Dolan. Many rights reserved.
This program is free software; you can redistribute it and/or modify
it under the same terms as Perl itself. The full text of this license
can be found in the LICENSE file included with this module
=cut
# Local Variables:
# mode: cperl
# cperl-indent-level: 4
# fill-column: 78
# indent-tabs-mode: nil
# c-indentation-style: bsd
# End:
# ex: set ts=8 sts=4 sw=4 tw=78 ft=perl expandtab shiftround :
|