1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221
|
package PPI::Token::_QuoteEngine;
# The PPI::Token::_QuoteEngine package is designed hold functionality
# for processing quotes and quote like operators, including regex's.
# These have special requirements in parsing.
#
# The PPI::Token::_QuoteEngine package itself provides various parsing
# methods, which the PPI::Token::Quote::*, PPI::Token::QuoteLike::* and
# PPI::Token::Regexp::* can inherit from. In this sense, it serves
# as a base class.
#
# This file also contains the token classes for all the quotes, and
# quote like operators.
#
# To use these, you should initialize them as normal 'Class->new',
# and then call the 'fill' method, which will cause the specialised
# parser to parse the quote to its end point.
#
# If ->fill returns true, finalise the token.
use strict;
use Carp ();
use vars qw{$VERSION};
BEGIN {
$VERSION = '0.903';
}
# Hook for the _on_char token call
sub _on_char {
my $class = shift;
my $t = $_[0]->{token} ? shift : return undef;
# Call the fill method to process the quote
my $rv = $t->{token}->_fill( $t );
return undef unless defined $rv;
# Finalize the token and return 0 to tell the tokenizer
# to go to the next character.
$t->_finalize_token;
0;
}
#####################################################################
# Optimised character processors, used for quotes
# and quote like stuff, and accessible to the child classes
# An outright scan, raw and fast.
# Searches for a particular character, loading in new
# lines as needed.
# When called, we start at the current position.
# When leaving, the position should be set to the position
# of the character, NOT the one after it.
sub _scan_for_character {
my $class = shift;
my $t = shift;
my $char = (length $_[0] == 1) ? quotemeta shift : return undef;
# Create the search regex
my $search = qr/^(.*?$char)/;
my $string = '';
while ( exists $t->{line} ) {
# Get the search area for the current line
$_ = $t->{line_cursor}
? substr( $t->{line}, $t->{line_cursor} )
: $t->{line};
# Can we find a match on this line
if ( /$search/ ) {
# Found the character on this line
$t->{line_cursor} += length($1) - 1;
return $string . $1;
}
# Load in the next line
$string .= $_;
return undef unless defined $t->_fill_line;
$t->{line_cursor} = 0;
}
# Returning the string as a reference indicates EOF
\$string;
}
# Scan for a character, but not if it is escaped
sub _scan_for_unescaped_character {
my $class = shift;
my $t = shift;
my $char = (length $_[0] == 1) ? quotemeta shift : return undef;
# Create the search regex.
# Same as above but with a negative look-behind assertion.
my $search = qr/^(.*?(?<!\\)(?:\\\\)*$char)/;
my $string = '';
while ( exists $t->{line} ) {
# Get the search area for the current line
$_ = $t->{line_cursor}
? substr( $t->{line}, $t->{line_cursor} )
: $t->{line};
# Can we find a match on this line
if ( /$search/ ) {
# Found the character on this line
$t->{line_cursor} += length($1) - 1;
return $string . $1;
}
# Load in the next line
$string .= $_;
return undef unless defined $t->_fill_line;
$t->{line_cursor} = 0;
}
# Returning the string as a reference indicates EOF
\$string;
}
# Scan for a close braced, and take into account both escaping,
# and open close bracket pairs in the string. When complete, the
# method leaves the line cursor on the LAST character found.
sub _scan_for_brace_character {
my $class = shift;
my $t = shift;
my $close_brace = $_[0] =~ /^(?:\>|\)|\}|\])$/ ? shift : Carp::confess(''); # return undef;
my $open_brace = $close_brace;
$open_brace =~ tr/\>\)\}\]/\<\(\{\[/;
# Create the search string
$close_brace = quotemeta $close_brace;
$open_brace = quotemeta $open_brace;
my $search = qr/^(.*?(?<!\\)(?:\\\\)*(?:$open_brace|$close_brace))/;
# Loop as long as we can get new lines
my $string = '';
my $depth = 1;
while ( exists $t->{line} ) {
# Get the search area
$_ = $t->{line_cursor}
? substr( $t->{line}, $t->{line_cursor} )
: $t->{line};
# Look for a match
unless ( /$search/ ) {
# Load in the next line
$string .= $_;
defined $t->_fill_line or return undef;
$t->{line_cursor} = 0;
next;
}
# Add to the string
$string .= $1;
$t->{line_cursor} += length $1;
# Alter the depth and continue if we arn't at the end
$depth += ($1 =~ /$open_brace$/) ? 1 : -1 and next;
# Rewind the cursor by one character ( cludgy hack )
$t->{line_cursor} -= 1;
return $string;
}
# Returning the string as a reference indicates EOF
\$string;
}
# Find all spaces and comments, up to, but not including
# the first non-whitespace character.
#
# Although it doesn't return it, it leaves the cursor
# on the character following the gap
sub _scan_quote_like_operator_gap {
my $t = $_[1];
my $string = '';
while ( exists $t->{line} ) {
# Get the search area for the current line
$_ = $t->{line_cursor}
? substr( $t->{line}, $t->{line_cursor} )
: $t->{line};
# Since this regex can match zero characters, it should always match
/^(\s*(?:\#.*)?)/s or return undef;
# Add the chars found to the string
$string .= $1;
# Did we match the entire line?
unless ( length $1 == length $_ ) {
# Partial line match, which means we are at
# the end of the gap. Fix the cursor and return
# the string.
$t->{line_cursor} += length $1;
return $string;
}
# Load in the next line.
# If we reach the EOF, $t->{line} gets deleted,
# which is caught by the while.
return undef unless defined $t->_fill_line;
# Set the cursor to the first character
$t->{line_cursor} = 0;
}
# Returning the string as a reference indicates EOF
\$string;
}
1;
|