File: ProhibitCaptureWithoutTest.pm

package info (click to toggle)
libperl-critic-perl 1.156-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 3,544 kB
  • sloc: perl: 24,092; lisp: 341; makefile: 7
file content (407 lines) | stat: -rw-r--r-- 14,397 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
package Perl::Critic::Policy::RegularExpressions::ProhibitCaptureWithoutTest;

use 5.010001;
use strict;
use warnings;
use Readonly;

use Perl::Critic::Utils qw{ :booleans :data_conversion :severities };
use parent 'Perl::Critic::Policy';

our $VERSION = '1.156';

#-----------------------------------------------------------------------------

Readonly::Hash my %CONDITIONAL_OPERATOR => hashify( qw{ && || ? and or xor } );
Readonly::Hash my %UNAMBIGUOUS_CONTROL_TRANSFER => hashify(
    qw< next last redo return > );

Readonly::Scalar my $DESC => q{Capture variable used outside conditional};
Readonly::Scalar my $EXPL => [ 253 ];

#-----------------------------------------------------------------------------

sub supported_parameters { return (
        {
            name        => 'exception_source',
            description => 'Names of ways to generate exceptions',
            behavior    => 'string list',
            list_always_present_values => [ qw{ die croak confess } ],
        }
    );
}
sub default_severity     { return $SEVERITY_MEDIUM         }
sub default_themes       { return qw(core pbp maintenance certrule ) }
sub applies_to           { return 'PPI::Token::Magic'      }

#-----------------------------------------------------------------------------

sub violates {
    my ($self, $elem, undef) = @_;
    # TODO named capture variables
    return if $elem !~ m/\A \$[1-9] \z/xms;
    return if _is_in_conditional_expression($elem);
    return if $self->_is_in_conditional_structure($elem);
    return $self->violation( $DESC, $EXPL, $elem );
}

sub _is_in_conditional_expression {
    my $elem = shift;

    # simplistic check: is there a conditional operator between a match and
    # the capture var?
    my $psib = $elem->sprevious_sibling;
    while ($psib) {
        if ($psib->isa('PPI::Token::Operator')) {
            my $op = $psib->content;
            if ( $CONDITIONAL_OPERATOR{ $op } ) {
                $psib = $psib->sprevious_sibling;
                while ($psib) {
                    return 1 if ($psib->isa('PPI::Token::Regexp::Match'));
                    return 1 if ($psib->isa('PPI::Token::Regexp::Substitute'));
                    $psib = $psib->sprevious_sibling;
                }
                return; # false
            }
        }
        $psib = $psib->sprevious_sibling;
    }

    return; # false
}

sub _is_in_conditional_structure {
    my ( $self, $elem ) = @_;

    my $stmt = $elem->statement();
    while ($stmt && $elem->isa('PPI::Statement::Expression')) {
       #return if _is_in_conditional_expression($stmt);
       $stmt = $stmt->statement();
    }
    return if !$stmt;

    # Check if any previous statements in the same scope have regexp matches
    my $psib = $stmt->sprevious_sibling;
    while ($psib) {
        if ( $psib->isa( 'PPI::Node' ) and
            my $match = _find_exposed_match_or_substitute( $psib ) ) {
            return _is_control_transfer_to_left( $self, $match, $elem ) ||
                _is_control_transfer_to_right( $self, $match, $elem );
        }
        $psib = $psib->sprevious_sibling;
    }

    # Check for an enclosing 'if', 'unless', 'elsif', 'else', or 'when'
    my $parent = $stmt->parent;
    while ($parent) { # never false as long as we're inside a PPI::Document
        if ($parent->isa('PPI::Statement::Compound') ||
            $parent->isa('PPI::Statement::When' )
        ) {
            return 1;
        }
        elsif ($parent->isa('PPI::Structure')) {
           return 1 if _is_in_conditional_expression($parent);
           return 1 if $self->_is_in_conditional_structure($parent);
           $parent = $parent->parent;
        }
        else {
           last;
        }
    }

    return; # fail
}

# This subroutine returns true if there is a control transfer to the left of
# the match operation which would bypass the capture variable. The arguments
# are the match operation and the capture variable.
sub _is_control_transfer_to_left {
    my ( $self, $match, $elem ) = @_;
    # If a regexp match is found, we succeed if a match failure
    # appears to throw an exception, and fail otherwise. RT 36081
    my $prev = $match->sprevious_sibling() or return;
    while ( not ( $prev->isa( 'PPI::Token::Word' ) &&
            q<unless> eq $prev->content() ) ) {
        $prev = $prev->sprevious_sibling() or return;
    }
    # In this case we analyze the first thing to appear in the parent of the
    # 'unless'. This is the simplest case, and it will not be hard to dream up
    # cases where this is insufficient (e.g. do {something(); die} unless ...)
    my $parent = $prev->parent() or return;
    my $first = $parent->schild( 0 ) or return;
    if ( my $method = _get_method_name( $first ) ) {
        # Methods can also be exception sources.
        return $self->{_exception_source}{ $method->content() };
    }
    return $self->{_exception_source}{ $first->content() } ||
        _unambiguous_control_transfer( $first, $elem );
}

# This subroutine returns true if there is a control transfer to the right of
# the match operation which would bypass the capture variable. The arguments
# are the match operation and the capture variable.
sub _is_control_transfer_to_right {
    my ( $self, $match, $elem ) = @_;
    # If a regexp match is found, we succeed if a match failure
    # appears to throw an exception, and fail otherwise. RT 36081
    my $oper = $match->snext_sibling() or return;   # fail
    my $oper_content = $oper->content();
    # We do not check '//' because a match failure does not
    # return an undefined value.
    q{or} eq $oper_content
        or q{||} eq $oper_content
        or return;                                  # fail
    my $next = $oper->snext_sibling() or return;    # fail
    if ( my $method = _get_method_name( $next ) ) {
        # Methods can also be exception sources.
        return $self->{_exception_source}{ $method->content() };
    }
    return $self->{_exception_source}{ $next->content() } ||
        _unambiguous_control_transfer( $next, $elem );
}

# Given a PPI::Node, find the last regexp match or substitution that is
# in-scope to the node's next sibling.
sub _find_exposed_match_or_substitute { # RT 36081
    my $elem = shift;
FIND_REGEXP_NOT_IN_BLOCK:
    foreach my $regexp ( reverse @{ $elem->find(
            sub {
                return $_[1]->isa( 'PPI::Token::Regexp::Substitute' )
                    || $_[1]->isa( 'PPI::Token::Regexp::Match' );
            }
        ) || [] } ) {
        my $parent = $regexp->parent();
        while ( $parent != $elem ) {
            $parent->isa( 'PPI::Structure::Block' )
                and next FIND_REGEXP_NOT_IN_BLOCK;
            $parent = $parent->parent()
                or next FIND_REGEXP_NOT_IN_BLOCK;
        }
        return $regexp;
    }
    return;
}

# If the argument introduces a method call, return the method name;
# otherwise just return.
sub _get_method_name {
    my ( $elem ) = @_;
    # We fail unless the element we were given looks like it might be an
    # object or a class name.
    $elem or return;
    (
        $elem->isa( 'PPI::Token::Symbol' ) &&
        q<$> eq $elem->raw_type() ||
        $elem->isa( 'PPI::Token::Word' ) &&
        $elem->content() =~ m/ \A [\w:]+ \z /smx
    ) or return;
    # We skip over all the subscripts and '->' operators to the right of
    # the original element, failing if we run out of objects.
    my $prior;
    my $next = $elem->snext_sibling() or return;
    while ( $next->isa( 'PPI::Token::Subscript' ) ||
        $next->isa( 'PPI::Token::Operator' ) &&
        q{->} eq $next->content() ) {
        $prior = $next;
        $next = $next->snext_sibling or return; # fail
    }
    # A method call must have a '->' operator before it.
    ( $prior &&
        $prior->isa( 'PPI::Token::Operator' ) &&
        q{->} eq $prior->content()
    ) or return;
    # Anything other than a PPI::Token::Word can not be statically
    # recognized as a method name.
    $next->isa( 'PPI::Token::Word' ) or return;
    # Whatever we have left at this point looks very like a method name.
    return $next;
}

# Determine whether the given element represents an unambiguous transfer of
# control around anything that follows it in the same block. The arguments are
# the element to check, and the capture variable that is the subject of this
# call to the policy.
sub _unambiguous_control_transfer { # RT 36081.
    my ( $xfer, $elem ) = @_;

    my $content = $xfer->content();

    # Anything in the hash is always a transfer of control.
    return $TRUE if $UNAMBIGUOUS_CONTROL_TRANSFER{ $content };

    # A goto is not unambiguous on the face of it, but at least some forms of
    # it can be accepted.
    q<goto> eq $content
        and return _unambiguous_goto( $xfer, $elem );

    # Anything left at this point is _not_ an unambiguous transfer of control
    # around whatever follows it.
    return;
}

# Determine whether the given goto represents an unambiguous transfer of
# control around anything that follows it in the same block. The arguments are
# the element to check, and the capture variable that is the subject of this
# call to the policy.
sub _unambiguous_goto {
    my ( $xfer, $elem ) = @_;

    # A goto without a target?
    my $target = $xfer->snext_sibling() or return;

    # The co-routine form of goto is an unambiguous transfer of control.
    $target->isa( 'PPI::Token::Symbol' )
        and q<&> eq $target->raw_type()
        and return $TRUE;

    # The label form of goto is an unambiguous transfer of control,
    # provided the label does not occur between the goto and the capture
    # variable.
    if ( $target->isa( 'PPI::Token::Word' ) ) {

        # We need to search in our most-local block, or the document if
        # there is no enclosing block.
        my $container = $target;
        while ( my $parent = $container->parent() ) {
            $container = $parent;
            $container->isa( 'PPI::Structure::Block' ) and last;
        }

        # We search the container for our label. If we find it, we return
        # true if it occurs before the goto or after the capture variable,
        # otherwise we return false. If we do not find it we return true.
        # Note that perl does not seem to consider duplicate labels an
        # error, but also seems to take the first one in the relevant
        # scope when this happens.
        my $looking_for = qr/ \A @{[ $target->content() ]} \s* : \z /smx;
        my ($start_line, $start_char) = @{ $xfer->location() || [] };
        defined $start_line or return;  # document not indexed.
        my ($end_line,   $end_char)   = @{ $elem->location() || [] };
        foreach my $label (
            @{ $container->find( 'PPI::Token::Label' ) || [] } )
        {
            $label->content() =~ m/$looking_for/smx or next;
            my ( $line, $char ) = @{ $label->location() || [] };
            return $TRUE
                if $line < $start_line ||
                    $line == $start_line && $char < $start_char;
            return $TRUE
                if $line > $end_line ||
                    $line == $end_line && $char > $end_char;
            return;
        }
        return $TRUE;
    }

    # Any other form of goto can not be statically analyzed, and so is not
    # an unambiguous transfer of control around the capture variable.
    return;
}

1;

#-----------------------------------------------------------------------------

__END__

=pod

=head1 NAME

Perl::Critic::Policy::RegularExpressions::ProhibitCaptureWithoutTest - Capture variable used outside conditional.

=head1 AFFILIATION

This Policy is part of the core L<Perl::Critic|Perl::Critic>
distribution.

=head1 DESCRIPTION

If a regexp match fails, then any capture variables (C<$1>, C<$2>,
...) will be unaffected.  They will retain whatever old values they may
have had.  Therefore it's important to check the return value of a match
before using those variables.

    '12312123' =~ /(2)/;
    print $1;    # Prints 2
    '123123123' =~ /(X)/;
    print $1;    # Prints 2, because $1 has not changed.

Note that because the values of C<$1> etc will be unaffected, you cannot
determine if a match succeeded by checking to see if the capture variables
have values.

    # WRONG
    $str =~ /foo(.+)/;
    if ( $1 ) {
        print "I found $1 after 'foo'";
    }

This policy checks that the previous regexp for which the capture
variable is in-scope is either in a conditional or causes an exception
or other control transfer (i.e. C<next>, C<last>, C<redo>, C<return>, or
sometimes C<goto>) if the match fails.

A C<goto> is only accepted by this policy if it is a co-routine call
(i.e.  C<goto &foo>) or a C<goto LABEL> where the label does not fall
between the C<goto> and the capture variable in the scope of the
C<goto>. A computed C<goto> (i.e. something like C<goto (qw{foo bar
baz})[$i]>) is not accepted by this policy because its target can not be
statically determined.

This policy does not check whether that conditional is actually
testing a regexp result, nor does it check whether a regexp actually
has a capture in it.  Those checks are too hard.

This policy also does not check arbitrarily complex conditionals guarding
regexp results, for pretty much the same reason.  Simple things like

 m/(foo)/ or die "No foo!";
 die "No foo!" unless m/(foo)/;

will be handled, but something like

 m/(foo)/ or do {
   ... lots of complicated calculations here ...
   die "No foo!";
 };

are beyond its scope.


=head1 CONFIGURATION

By default, this policy considers C<die>, C<croak>, and C<confess> to
throw exceptions. If you have additional subroutines or methods that may
be used in lieu of one of these, you can configure them in your
perlcriticrc as follows:

 [RegularExpressions::ProhibitCaptureWithoutTest]
 exception_source = my_exception_generator

=head1 BUGS

This policy does not recognize named capture variables. Yet.

=head1 AUTHOR

Chris Dolan <cdolan@cpan.org>

=head1 COPYRIGHT

Copyright (c) 2006-2017 Chris Dolan.

This program is free software; you can redistribute it and/or modify
it under the same terms as Perl itself.

=cut

# Local Variables:
#   mode: cperl
#   cperl-indent-level: 4
#   fill-column: 78
#   indent-tabs-mode: nil
#   c-indentation-style: bsd
# End:
# ex: set ts=8 sts=4 sw=4 tw=78 ft=perl expandtab shiftround :