File: Unknown.pm

package info (click to toggle)
libppi-perl 0.903-2
  • links: PTS
  • area: main
  • in suites: sarge
  • size: 840 kB
  • ctags: 429
  • sloc: perl: 5,551; makefile: 45
file content (212 lines) | stat: -rwxr-xr-x 4,884 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
package PPI::Token::Unknown;

# This large, seperate class is used when we have a limited
# number of characters that could yet mean a variety of
# different things.
#
# All the unknown cases are character by character problems,
# so this class only needs to implement _on_char()

use strict;
use base 'PPI::Token';

use vars qw{$VERSION};
BEGIN {
	$VERSION = '0.903';
}





sub _on_char {
	my $t = $_[1];                                    # Tokenizer object
	my $c = $t->{token}->{content};                   # Current token contents
	$_ = substr( $t->{line}, $t->{line_cursor}, 1 );  # Current character


	# Now, we split on the different values of the current content


	if ( $c eq '*' ) {
		if ( /(?:[^\W\d]|\:)/ ) {
			# Symbol
			return $t->_set_token_class( 'Symbol' ) ? 1 : undef;
		}

		if ( $_ eq '{' or $_ eq '$' ) {
			# GLOB cast
			$t->_set_token_class( 'Cast' ) or return undef;
			return $t->_finalize_token->_on_char( $t );
		}

		$t->_set_token_class( 'Operator' ) or return undef;
		return $t->_finalize_token->_on_char( $t );



	} elsif ( $c eq '$' ) {
		if ( /[a-z_]/i ) {
			# Symbol
			return $t->_set_token_class( 'Symbol' ) ? 1 : undef;
		}

		if ( $PPI::Token::Magic::magic{ $c . $_ } ) {
			# Magic variable
			return $t->_set_token_class( 'Magic' ) ? 1 : undef;
		}

		# Must be a cast
		$t->_set_token_class( 'Cast' ) or return undef;
		return $t->_finalize_token->_on_char( $t );



	} elsif ( $c eq '@' ) {
		if ( /[\w:]/ ) {
			# Symbol
			return $t->_set_token_class( 'Symbol' ) ? 1 : undef;
		}

		if ( /[\-\+\*]/ ) {
			# Magic variable
			return $t->_set_token_class( 'Magic' ) ? 1 : undef;
		}

		# Must be a cast
		$t->_set_token_class( 'Cast' ) or return undef;
		return $t->_finalize_token->_on_char( $t );



	} elsif ( $c eq '%' ) {
		# Is it a number?
		if ( /\d/ ) {
			# This is %2 (modulus number)
			$t->_set_token_class( 'Operator' ) or return undef;
			return $t->_finalize_token->_on_char( $t );
		}

		# Is it a symbol?
		if ( /[\w:]/ ) {
			return $t->_set_token_class( 'Symbol' ) ? 1 : undef;
		}

		if ( /[\$@%{]/ ) {
			# It's a cast
			$t->_set_token_class( 'Cast' ) or return undef;
			return $t->_finalize_token->_on_char( $t );

		}

		# Probably the mod operator
		$t->_set_token_class( 'Operator' ) or return undef;
		return $t->{class}->_on_char( $t );



	} elsif ( $c eq '&' ) {
		# Is it a number?
		if ( /\d/ ) {
			# This is &2 (bitwise-and number)
			$t->_set_token_class( 'Operator' ) or return undef;
			return $t->_finalize_token->_on_char( $t );
		}

		# Is it a symbol
		if ( /[\w:]/ ) {
			return $t->_set_token_class( 'Symbol' ) ? 1 : undef;
		}

		if ( /[\$@%{]/ ) {
			# The ampersand is a cast
			$t->_set_token_class( 'Cast' ) or return undef;
			return $t->_finalize_token->_on_char( $t );
		}

		# Probably the binary and operator
		$t->_set_token_class( 'Operator' ) or return undef;
		return $t->{class}->_on_char( $t );



	} elsif ( $c eq '-' ) {
		if ( /\d/o ) {
			# Number
			return $t->_set_token_class( 'Number' ) ? 1 : undef;
		}

		if ( /[a-zA-Z]/ ) {
			return $t->_set_token_class( 'DashedWord' ) ? 1 : undef;
		}

		# The numeric negative operator
		$t->_set_token_class( 'Operator' ) or return undef;
		return $t->{class}->_on_char( $t );



	} elsif ( $c eq ':' ) {
		if ( $_ eq ':' ) {
			# ::foo style bareword
			return $t->_set_token_class( 'Word' ) ? 1 : undef;
		}

		# Now, : acts very very differently in different contexts.
		# Mainly, we need to find out if this is a subroutine attribute.
		# We'll leave a hint in the token to indicate that, if it is.
		if ( $_[0]->_is_an_attribute( $t ) ) {
			# This : is an attribute indicator
			$t->_set_token_class( 'Operator' ) or return undef;
			$t->{token}->{_attribute} = 1;
			return $t->_finalize_token->_on_char( $t );
		}

		# It MIGHT be a label, but its probably the ?: trinary operator
		$t->_set_token_class( 'Operator' ) or return undef;
		return $t->{class}->_on_char( $t );
	}

	### erm...
	die 'Unknown value in PPI::Token::Unknown token';
}

# Are we at a location where a ':' would indicate a subroutine attribute
sub _is_an_attribute {
	my $t = $_[1]; # Tokenizer object
	my $tokens = $t->_previous_significant_tokens( 3 ) or return undef;

	# If we just had another attribute, we are also an attribute
	if ( $tokens->[0]->_isa('Attribute') ) {
		return 1;
	}

	# If we just had a prototype, then we are an attribute
	if ( $tokens->[0]->_isa('Prototype') ) {
		return 1;
	}

	# Other than that, we would need to have had a bareword
	unless ( $tokens->[0]->_isa('Word') ) {
		return '';
	}

	# We could be an anonymous subroutine
	if ( $tokens->[0]->_isa('Word', 'sub') ) {
		return 1;
	}

	# Or, we could be a named subroutine
	if ( $tokens->[1]->_isa('Word', 'sub')
		and ( $tokens->[2]->_isa('Structure')
			or $tokens->[2]->_isa('Whitespace','')
		)
	) {
		return 1;
	}

	# We arn't an attribute
	'';	
}

1;