File: find-variable-length-lookarounds

package info (click to toggle)
libppix-regexp-perl 0.090-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 1,524 kB
  • sloc: perl: 8,022; makefile: 8
file content (231 lines) | stat: -rwxr-xr-x 5,525 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
#!/usr/bin/env perl

use 5.006;

use strict;
use warnings;

use File::Find;
use File::Spec;
use Getopt::Long 2.33 qw{ :config auto_version };
use Pod::Usage;
use PPI::Document;
use PPIx::Regexp;

our $VERSION = '0.090';

my %opt;

my %ignore_dir = map { $_ => 1 } qw{
	.bzr
	.cabal-sandbox
	.cdv
	.git
	.hg
	.metadata
	.pc
	.svn
	CMakeFiles
	CVS
	RCS
	SCCS
	_MTN
	_build
	_darcs
	_sgbak
	autom4te.cache
	blib
	cover_db
	node_modules
	~.dep
	~.dot
	~.nib
	~.plst
};

GetOptions( \%opt,
    qw{ break! files_with_matches|files-with-matches|l! },
    'ignore_directory|ignore-directory=s' => sub {
	$ignore_dir{$_[1]} = 1;
    },
    'no_ignore_directory|no-ignore-directory=s' => sub {
	delete $ignore_dir{$_[1]};
    },
    dump => sub {
	print "  --ignore-directory=$_\n" for sort keys %ignore_dir;
	exit;
    },	
    help => sub { pod2usage( { -verbose => 2 } ) },
) or pod2usage( { -verbose => 0 } );

@ARGV
    or @ARGV = ( File::Spec->curdir() );

defined $opt{break}
    or $opt{break} = ! $opt{files_with_matches};
$opt{_break} = $opt{break} ? "\n" : '';

find( \&handler, @ARGV );

sub handler {
    -d $_
	and do {
	$File::Find::prune = $ignore_dir{$_};
	return;
    };
    is_perl()
	or return;
    my $doc = PPI::Document->new( $_ )
	or do {
	warn "Unable to make PPI::Document from $File::Find::name: @{[
	PPI::Document->errstr()
	]}\n";
	return;
    };
    my $header;
    foreach my $re ( PPIx::Regexp->extract_regexps( $doc ) ) {
	foreach my $assertion ( @{ $re->find(
	    'PPIx::Regexp::Structure::Assertion' ) || [] } ) {
	    $assertion->type()->content() =~ m/ \A [?] <? [=!] /smx
		or next;
	    $assertion->find( 'PPIx::Regexp::Token::Quantifier' )
		or $assertion->find(
		'PPIx::Regexp::Structure::Quantifier' )
		or next;
	    my $loc = $re->source()->location();
	    $header++
		or print "$opt{_break}$File::Find::name\n";
	    $opt{files_with_matches}
		or print "$loc->[0]:@{[ $re->content() ]}\n";
	    last;
	}
    }
}

sub is_perl {
    -T _
	or return;
    m/ [.] (?: pm | t | (?i: pl ) ) \z /smx
	and return 1;
    open my $fh, '<', $_
    	or return;
    defined( local $_ = <$fh> )
	or return;
    close $fh;
    return m/ \A \#! .* perl /smx;
}

__END__

=head1 TITLE

find-variable-length-lookarounds - Find regular expressions that use variable-length lookarounds

=head1 SYNOPSIS

 find-variable-length-lookarounds
 find-variable-length-lookarounds --help
 find-variable-length-lookarounds --version

=head1 OPTIONS

Option names have been chosen to be compatible (or at least close) to
C<ack>.

=head2 --break

Asserting this Boolean option causes a blank line to be inserted before
the file name in the output.

The default is the inverse of the value of
L<--files-with-matches|/--files-with-matches>.

=head2 --dump

This option causes the configuration to be dumped. The script then
exits.

The configuration consists of the list of ignored directories in effect
as of the time the C<--dump> option was encountered.

=head2 --files-with-matches

Asserting this option suppresses the listing of individual regular
expressions.

The default is C<--no-files-with-matches>, which causes both file name
and regular expressions to be listed.

=head2 --help

This option displays the documentation for this script. The script then
exits.

=head2 --ignore-directory

 --ignore-directory=fubar

This option adds a directory to the list of directories to be ignored.
It may be specified multiple times. The initial list was cribbed from
C<ack>.

=head2 -l

This Boolean option is a synonym for
L<--files-with-matches|/--files-with-matches>.

=head2 --no-ignore-directory

 --no-ignore-directory=fubar

This option removes a directory from the list of directories to be
ignored. It is not an error to remove a directory that was not on the
list.

=head2 --version

This option displays the version of this script. The script then exits.

=head1 DETAILS

This Perl script reads Perl files, and finds and displays regular
expressions that contain variable-length lookarounds, either lookaheads
or lookbehinds. These are defined as lookarounds that contain at least
one quantifier. Note that this definition does B<not> catch lookarounds
that are variable-length due to current case-folding rules (which, among
other things, require ligatures to match non-ligatures, and a German
sharp s to match a double s).

The files to analyze are specified on the command line. Directories are
traversed recursively, with directories not likely to be of interest
being skipped. Only files that appear to be Perl are actually analyzed.
These are text files whose names end in F<.pm>, F<.t>, F<.pl>, or
F<.PL>, or begin with a shebang line containing C<'perl'>.

If no files are specified on the command line, the default directory is
analyzed.

For each file containing at least one variable-length lookaround, the
name of the file is listed. Each regular expression containing a
variable-length lookaround is listed, preceded by its line number in the
file.

=head1 AUTHOR

Thomas R. Wyant, III F<wyant at cpan dot org>

=head1 COPYRIGHT AND LICENSE

Copyright (C) 2019-2023, 2025 by Thomas R. Wyant, III

This program is free software; you can redistribute it and/or modify it
under the same terms as Perl 5.10.0. For more details, see the full text
of the licenses in the directory LICENSES.

This program is distributed in the hope that it will be useful, but
without any warranty; without even the implied warranty of
merchantability or fitness for a particular purpose.

=cut

# ex: set textwidth=72 :