1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212
|
package PPI::Token::Unknown;
# This large, seperate class is used when we have a limited
# number of characters that could yet mean a variety of
# different things.
#
# All the unknown cases are character by character problems,
# so this class only needs to implement _on_char()
use strict;
use base 'PPI::Token';
use vars qw{$VERSION};
BEGIN {
$VERSION = '0.903';
}
sub _on_char {
my $t = $_[1]; # Tokenizer object
my $c = $t->{token}->{content}; # Current token contents
$_ = substr( $t->{line}, $t->{line_cursor}, 1 ); # Current character
# Now, we split on the different values of the current content
if ( $c eq '*' ) {
if ( /(?:[^\W\d]|\:)/ ) {
# Symbol
return $t->_set_token_class( 'Symbol' ) ? 1 : undef;
}
if ( $_ eq '{' or $_ eq '$' ) {
# GLOB cast
$t->_set_token_class( 'Cast' ) or return undef;
return $t->_finalize_token->_on_char( $t );
}
$t->_set_token_class( 'Operator' ) or return undef;
return $t->_finalize_token->_on_char( $t );
} elsif ( $c eq '$' ) {
if ( /[a-z_]/i ) {
# Symbol
return $t->_set_token_class( 'Symbol' ) ? 1 : undef;
}
if ( $PPI::Token::Magic::magic{ $c . $_ } ) {
# Magic variable
return $t->_set_token_class( 'Magic' ) ? 1 : undef;
}
# Must be a cast
$t->_set_token_class( 'Cast' ) or return undef;
return $t->_finalize_token->_on_char( $t );
} elsif ( $c eq '@' ) {
if ( /[\w:]/ ) {
# Symbol
return $t->_set_token_class( 'Symbol' ) ? 1 : undef;
}
if ( /[\-\+\*]/ ) {
# Magic variable
return $t->_set_token_class( 'Magic' ) ? 1 : undef;
}
# Must be a cast
$t->_set_token_class( 'Cast' ) or return undef;
return $t->_finalize_token->_on_char( $t );
} elsif ( $c eq '%' ) {
# Is it a number?
if ( /\d/ ) {
# This is %2 (modulus number)
$t->_set_token_class( 'Operator' ) or return undef;
return $t->_finalize_token->_on_char( $t );
}
# Is it a symbol?
if ( /[\w:]/ ) {
return $t->_set_token_class( 'Symbol' ) ? 1 : undef;
}
if ( /[\$@%{]/ ) {
# It's a cast
$t->_set_token_class( 'Cast' ) or return undef;
return $t->_finalize_token->_on_char( $t );
}
# Probably the mod operator
$t->_set_token_class( 'Operator' ) or return undef;
return $t->{class}->_on_char( $t );
} elsif ( $c eq '&' ) {
# Is it a number?
if ( /\d/ ) {
# This is &2 (bitwise-and number)
$t->_set_token_class( 'Operator' ) or return undef;
return $t->_finalize_token->_on_char( $t );
}
# Is it a symbol
if ( /[\w:]/ ) {
return $t->_set_token_class( 'Symbol' ) ? 1 : undef;
}
if ( /[\$@%{]/ ) {
# The ampersand is a cast
$t->_set_token_class( 'Cast' ) or return undef;
return $t->_finalize_token->_on_char( $t );
}
# Probably the binary and operator
$t->_set_token_class( 'Operator' ) or return undef;
return $t->{class}->_on_char( $t );
} elsif ( $c eq '-' ) {
if ( /\d/o ) {
# Number
return $t->_set_token_class( 'Number' ) ? 1 : undef;
}
if ( /[a-zA-Z]/ ) {
return $t->_set_token_class( 'DashedWord' ) ? 1 : undef;
}
# The numeric negative operator
$t->_set_token_class( 'Operator' ) or return undef;
return $t->{class}->_on_char( $t );
} elsif ( $c eq ':' ) {
if ( $_ eq ':' ) {
# ::foo style bareword
return $t->_set_token_class( 'Word' ) ? 1 : undef;
}
# Now, : acts very very differently in different contexts.
# Mainly, we need to find out if this is a subroutine attribute.
# We'll leave a hint in the token to indicate that, if it is.
if ( $_[0]->_is_an_attribute( $t ) ) {
# This : is an attribute indicator
$t->_set_token_class( 'Operator' ) or return undef;
$t->{token}->{_attribute} = 1;
return $t->_finalize_token->_on_char( $t );
}
# It MIGHT be a label, but its probably the ?: trinary operator
$t->_set_token_class( 'Operator' ) or return undef;
return $t->{class}->_on_char( $t );
}
### erm...
die 'Unknown value in PPI::Token::Unknown token';
}
# Are we at a location where a ':' would indicate a subroutine attribute
sub _is_an_attribute {
my $t = $_[1]; # Tokenizer object
my $tokens = $t->_previous_significant_tokens( 3 ) or return undef;
# If we just had another attribute, we are also an attribute
if ( $tokens->[0]->_isa('Attribute') ) {
return 1;
}
# If we just had a prototype, then we are an attribute
if ( $tokens->[0]->_isa('Prototype') ) {
return 1;
}
# Other than that, we would need to have had a bareword
unless ( $tokens->[0]->_isa('Word') ) {
return '';
}
# We could be an anonymous subroutine
if ( $tokens->[0]->_isa('Word', 'sub') ) {
return 1;
}
# Or, we could be a named subroutine
if ( $tokens->[1]->_isa('Word', 'sub')
and ( $tokens->[2]->_isa('Structure')
or $tokens->[2]->_isa('Whitespace','')
)
) {
return 1;
}
# We arn't an attribute
'';
}
1;
|