1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221
|
# /=====================================================================\ #
# | LaTeXML::Core::Tokens | #
# | A list of Token(s) | #
# |=====================================================================| #
# | Part of LaTeXML: | #
# | Public domain software, produced as part of work done by the | #
# | United States Government & not subject to copyright in the US. | #
# |---------------------------------------------------------------------| #
# | Bruce Miller <bruce.miller@nist.gov> #_# | #
# | http://dlmf.nist.gov/LaTeXML/ (o o) | #
# \=========================================================ooo==U==ooo=/ #
package LaTeXML::Core::Tokens;
use strict;
use warnings;
use LaTeXML::Global;
use LaTeXML::Common::Object;
use LaTeXML::Common::Error;
use LaTeXML::Core::Token;
use base qw(LaTeXML::Common::Object);
use base qw(Exporter);
our @EXPORT = ( # Global STATE; This gets bound by LaTeXML.pm
qw(&Tokens &TokensI),
);
#======================================================================
# Token List constructors.
# Return a LaTeXML::Core::Tokens made from the arguments (tokens)
sub Tokens {
my (@tokens) = @_;
my $r;
# faster than foreach
@tokens = map { (($r = ref $_) eq 'LaTeXML::Core::Token' ? $_
: ($r eq 'LaTeXML::Core::Tokens' ? @$_
: Error('misdefined', $r, undef, "Expected a Token, got " . Stringify($_)) || T_OTHER(Stringify($_)))) }
@tokens;
return bless [@tokens], 'LaTeXML::Core::Tokens'; }
sub TokensI {
my (@tokens) = @_;
return bless [@tokens], 'LaTeXML::Core::Tokens'; }
#======================================================================
# Return a list of the tokens making up this Tokens
sub unlist {
my ($self) = @_;
return @$self; }
# Return a shallow copy of the Tokens
sub clone {
my ($self) = @_;
return bless [@$self], ref $self; }
# Return a string containing the TeX form of the Tokens
sub revert {
my ($self) = @_;
return @$self; }
# toString is used often, and for more keyword-like reasons,
# NOT for creating valid TeX (use revert or UnTeX for that!)
sub toString {
my ($self) = @_;
return join('', map { ($$_[1] == CC_COMMENT ? '' : $_->toString) } @$self); }
# Methods for overloaded ops.
# Compare two Tokens lists, ignoring comments & markers
sub equals {
my ($a, $b) = @_;
return 0 unless defined $b && (ref $a) eq (ref $b);
my @a = @$a;
my @b = @$b;
while (@a || @b) {
if (@a && (($a[0]->[1] == CC_COMMENT) || ($a[0]->[1] == CC_MARKER))) { shift(@a); next; }
if (@b && (($b[0]->[1] == CC_COMMENT) || ($b[0]->[1] == CC_MARKER))) { shift(@b); next; }
return unless @a && @b && shift(@a)->equals(shift(@b)); }
return 1; }
sub stringify {
my ($self) = @_;
return "Tokens[" . join(',', map { $_->toString } @$self) . "]"; }
sub beDigested {
no warnings 'recursion';
my ($self, $stomach) = @_;
return $stomach->digest($self); }
sub neutralize {
my ($self, @extraspecials) = @_;
return Tokens(map { $_->neutralize(@extraspecials) } @$self); }
sub isBalanced {
my ($self) = @_;
my $level = 0;
foreach my $t (@$self) {
my $cc = $$t[1]; # INLINE
$level++ if $cc == CC_BEGIN;
if ($cc == CC_END) {
$level--;
# Note that '{ }} {' is still unbalanced
# even though the left and right braces match in count.
last if $level < 0; } }
return $level == 0; }
# NOTE: Assumes each arg either undef or also Tokens
# Using inline accessors on those assumptions
sub substituteParameters {
my ($self, @args) = @_;
my @in = @{$self}; # ->unlist
my @result = ();
while (my $token = shift(@in)) {
if ($$token[1] != CC_ARG) { # Non-match; copy it
push(@result, $token); }
else {
if (my $arg = $args[ord($$token[0]) - ord("0") - 1]) {
push(@result, (ref $arg eq 'LaTeXML::Core::Token' ? $arg : @$arg)); } } } # ->unlist
return bless [@result], 'LaTeXML::Core::Tokens'; }
# Packs repeated CC_PARAM tokens into CC_ARG tokens for use as a macro body (and other token lists)
# Also unwraps \noexpand tokens, since that is also needed for macro bodies
# (but not strictly part of packing parameters)
sub packParameters {
my ($self) = @_;
my @rescanned = ();
my @toks = @$self;
my $repacked = 0;
while (my $t = shift @toks) {
if ($$t[1] == CC_PARAM && @toks) {
$repacked = 1;
my $next_t = shift @toks;
my $next_cc = $next_t && $$next_t[1];
if ($next_cc == CC_OTHER) {
# only group clear match token cases
push(@rescanned, T_ARG($next_t)); }
elsif ($next_cc == CC_PARAM) {
push(@rescanned, $t); }
else { # any other case, preserve as-is, let the higher level call resolve any errors
# e.g. \detokenize{#,} is legal, while \textbf{#,} is not
Error('misdefined', 'expansion', undef, "Parameter has a malformed arg, should be #1-#9 or ##. ",
"In expansion " . ToString($self)); } }
else {
push(@rescanned, $t); } }
return ($repacked ? bless [@rescanned], 'LaTeXML::Core::Tokens' : $self); }
# Trims outer braces (if they balance each other)
# Should this also trim whitespace? or only if there are braces?
sub stripBraces {
my ($self) = @_;
my $n = 1 + $#$self;
my $i0 = 0;
my $i1 = $n;
# skip past spaces at ends.
while (($i0 < $n) && ($$self[$i0]->getCatcode == CC_SPACE)) { $i0++; }
while (($i1 > 0) && ($$self[$i1 - 1]->getCatcode == CC_SPACE)) { $i1--; }
my (@o, @p);
# Collect balanced pairs.
for (my $i = $i0 ; $i < $i1 ; $i++) {
my $cc = $$self[$i]->getCatcode;
if ($cc == CC_BEGIN) {
push(@o, $i); }
elsif ($cc == CC_END) {
if (@o) {
push(@p, pop(@o), $i); }
else {
return $self; } } } # Unbalanced: Too many }
return $self if @o; # Unbalanced: Too many {
## COULD strip multiple pairs of braces by checking more @p pairs
if (@p) {
my $j1 = pop(@p);
my $j0 = pop(@p);
if (($j0 == $i0) && ($j1 == $i1 - 1)) {
$i0++; $i1--; } }
return (($i0 < $i1) && (($i0 > 0) || ($i1 < $n))
? bless [@$self[$i0 .. $i1 - 1]], 'LaTeXML::Core::Tokens'
: $self); }
#======================================================================
1;
__END__
=pod
=head1 NAME
C<LaTeXML::Core::Tokens> - represents lists of L<LaTeXML::Core::Token>'s;
extends L<LaTeXML::Common::Object>.
=head2 Exported functions
=over 4
=item C<< $tokens = Tokens(@token); >>
Creates a L<LaTeXML::Core::Tokens> from a list of L<LaTeXML::Core::Token>'s
=back
=head2 Tokens methods
The following method is specific to C<LaTeXML::Core::Tokens>.
=over 4
=item C<< $tokenscopy = $tokens->clone; >>
Return a shallow copy of the $tokens. This is useful before reading from a C<LaTeXML::Core::Tokens>.
=back
=head1 AUTHOR
Bruce Miller <bruce.miller@nist.gov>
=head1 COPYRIGHT
Public domain software, produced as part of work done by the
United States Government & not subject to copyright in the US.
=cut
|