1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347
|
#!/usr/local/bin/perl
use 5.006;
use strict;
use warnings;
BEGIN {
if ( '5.008' le $] ) {
# Have to prevent Perl from parsing 'open' as 'CORE::open'.
require 'open.pm';
'open'->import( qw{ :std :encoding(utf-8) } );
}
}
use Getopt::Long 2.33 qw{ :config auto_version };
use Pod::Usage;
use PPI::Document;
use PPIx::Regexp::Dumper;
use Scalar::Util qw{ refaddr };
use vars qw{ $VERSION };
$VERSION = '0.091';
my %opt = (
default_modifiers => [],
verbose => 0,
);
GetOptions( \%opt,
help => sub { pod2usage ( {
-exitval => 0,
-verbose => 2,
-output => \*STDOUT,
} )
},
qw{
default_modifiers|default-modifiers=s@
encoding=s explain! files!
indent=i locations! margin=i objectify! ordinal!
perl_version|perl-version!
ppi! short! significant! strict!
test! tokens! trace+ unescape! verbose width! +
} )
and @ARGV
or pod2usage( {
-exitval => 2,
-verbose => 1,
-output => \*STDERR,
} );
$opt{default_modifiers} = [ map { split qr{ \s* , \s* }smx } @{
$opt{default_modifiers} } ];
foreach my $re ( process_args( \%opt, @ARGV ) ) {
if ( ! $opt{test} ) {
my @output = ( "\n$re" );
@{ $opt{default_modifiers} }
and push @output, q{default_modifiers => '} . join( ',', @{
$opt{default_modifiers} } ) . q{'};
print join( "\t", @output ), "\n";
@output = item_info( $re )
and print join( "\t", @output ), "\n";
}
PPIx::Regexp::Dumper->new( $re, %opt )->print();
}
{
my @docs; # Have to save reference
my %file;
sub process_args {
my ( $opt, @args ) = @_;
my @rslt;
foreach my $datum ( @args ) {
if ( $opt->{files} ) {
my $doc = PPI::Document->new( $datum, readonly => 1 )
or die "Can not make PPI::Document from file '$datum'\n";
push @docs, $doc;
push @rslt, extract_res( $doc, $opt );
$file{ refaddr( $doc ) } = {
name => $datum,
};
} else {
$opt->{unescape}
and $datum =~ s/ \\\\ /\\/smxg;
if ( $opt->{objectify} ) {
my $doc = PPI::Document->new( \$datum )
or die "Can not make PPI::Document from '$datum'\n";
push @docs, $doc;
push @rslt, extract_res( $doc, $opt );
} else {
push @rslt, $datum;
}
}
}
delete $opt->{files};
delete $opt->{objectify};
delete $opt->{unescape};
return @rslt;
}
sub item_info {
my ( $obj ) = @_;
ref $obj
or return;
eval {
$obj->isa( 'PPI::Element' );
} or return;
my $doc = $obj->document()
or return;
my $info = $file{ refaddr $doc }
or return;
return wantarray ?
( $info->{name}, @{ $obj->location() || [] }[0, 2] ) :
$info->{name};
}
}
{
my ( %classes, @regex, @string );
BEGIN {
@regex = qw{
PPI::Token::QuoteLike::Regexp
PPI::Token::Regexp::Match
PPI::Token::Regexp::Substitute
};
@string = qw{
PPI::Token::Quote
PPI::Token::QuoteLike::Command
PPI::Token::QuoteLike::BackTick
PPI::Token::HereDoc
};
%classes = (
guess => [ @regex, @string ],
regex => \@regex,
string => \@string,
);
}
sub extract_res {
my ( $doc, $opt ) = @_;
my $parse = $opt->{parse} || 'regex';
'regex' eq $parse
and return PPIx::Regexp->extract_regexps( $doc );
# TODO get rid of this whole mess in favor of the above line
# once the string functionality goes away.
return (
map { @{ $doc->find( $_ ) || [] } } @{
$classes{$parse} || $classes{regex} }
);
}
}
__END__
=head1 NAME
predump - Dump a regular expression
=head1 SYNOPSIS
predump 'qr{foo}smx'
predump --ordinal 'm/foo/x'
You can use
predump --help
for full documentation on usage.
=head1 DESCRIPTION
This Perl script parses the regular expression given on its command line
and dumps the results of the parse to standard out. Options are accepted
with leading single dashes as well as double dashes. The following
options are recognized:
=over
=item --default-modifiers text
This option specifies default modifiers for the regular expression. You
can specify more than one, either as a comma-separated list or by
specifying the option multiple times, or both. It is simply passed
through to L<< PPIx::Regexp->new()|PPIx::Regexp/new >>.
This option can also be expressed as C<--default_modifiers>.
=item --encoding name
This option specifies the encoding of the regular expression. It is
simply passed through to L<< PPIx::Regexp->new()|PPIx::Regexp/new >>.
=item --files
If true, this option specifies that the arguments are files whose
regular expressions are to be analyzed. If this options is asserted,
C<--objectify> and C<--unescape> are ignored.
=item --help
This option displays the documentation for this script. The script then
exits.
=item --indent number
This option specifies the number of spaces to indent each level of the
parse hierarchy. It is simply passed through to
L<< PPIx::Regexp::Dumper->new()|PPIx::Regexp::Dumper/new >>.
=item --margin number
This option specifies the width of the left margin of the dump output.
It is simply passed through to
L<< PPIx::Regexp::Dumper->new()|PPIx::Regexp::Dumper/new >>.
=item --objectify
If true, this option specifies that the arguments should be made into
L<PPI::Token|PPI::Token> objects before being passed to PPIx::Regexp.
This option is ignored if C<--files> is asserted.
=item --ordinal
If true, this option specifies that the ordinal value of all
L<PPIx::Regexp::Token::Literal|PPIx::Regexp::Token::Literal> objects be
displayed as part of the dump. The default is false. This is simply
passed through to
L<< PPIx::Regexp::Dumper->new()|PPIx::Regexp::Dumper/new >>.
=item --perl-version
If true, this option specifies that the dump include the perl version
applicable to each dumped item. The default is false. This is simply
passed through to
L<< PPIx::Regexp::Dumper->new()|PPIx::Regexp::Dumper/new >>.
This option can also be expressed as C<--perl_version>.
=item --significant
If true, this option specifies that the dump include only significant
syntax elements. That is, no comments or non-significant white space.
The default is false. This is simply passed through to
L<< PPIx::Regexp::Dumper->new()|PPIx::Regexp::Dumper/new >>.
=item --short
If true, this option specifies that class names in the dump will have
the leading C<'PPIx::Regexp::'> removed. This is simply passed through
to L<< PPIx::Regexp::Dumper->new()|PPIx::Regexp::Dumper/new >>.
=item --test
If true, this option specifies that the dump take the form of a
predefined set of tests be generated for the regular expression. This
option is unsupported in the sense that the author makes no commitment
to what it will do, and reserves the right to change it without notice.
This is simply passed through to
L<< PPIx::Regexp::Dumper->new()|PPIx::Regexp::Dumper/new >>.
=item --tokens
If true, this option specifies that only tokenization be done on the
regular expression, and the output tokens dumped to standard out.
This is simply passed through to
L<< PPIx::Regexp::Dumper->new()|PPIx::Regexp::Dumper/new >>.
=item --trace
If true, this option specifies the generation of trace output from the
parse. It is unsupported in the sense that the author makes no
commitment to what it will do, and reserves the right to change it
without notice. This is simply passed through to
L<< PPIx::Regexp->new()|PPIx::Regexp/new >>.
=item --unescape
If true, this option causes the argument to be unescaped before
processing. You would use it if the argument is a Perl single-quotish
string, since Perl's single-quoted syntax differs from that of the usual
Unix shell. This option is ignored if C<-files> is asserted.
=item --verbose
If true, this option causes more information to be dumped about each
object produced by the parse. It is unsupported in the sense that the
author makes no commitment to what it will do, and reserves the right to
change it without notice. This is simply passed through to L<<
PPIx::Regexp::Dumper->new()|PPIx::Regexp::Dumper/new >>.
=item --version
This option displays the version of this script. The script then exits.
=item --width
If true, this option causes the width of the object to be dumped.
=back
=head1 SUPPORT
Support is by the author. Please file bug reports at
L<https://rt.cpan.org/Public/Dist/Display.html?Name=PPIx-Regexp>,
L<https://github.com/trwyant/perl-PPIx-Regexp/issues>, or in
electronic mail to the author.
=head1 AUTHOR
Thomas R. Wyant, III F<wyant at cpan dot org>
=head1 COPYRIGHT AND LICENSE
Copyright (C) 2009-2023, 2025 by Thomas R. Wyant, III
This program is free software; you can redistribute it and/or modify it
under the same terms as Perl 5.10.0. For more details, see the full text
of the licenses in the directory LICENSES.
This program is distributed in the hope that it will be useful, but
without any warranty; without even the implied warranty of
merchantability or fitness for a particular purpose.
=cut
# ex: set textwidth=72 :
|