File: Tokens.pm

package info (click to toggle)
latexml 0.8.8-4
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 31,920 kB
  • sloc: xml: 109,048; perl: 30,224; sh: 179; javascript: 28; makefile: 13
file content (221 lines) | stat: -rw-r--r-- 7,184 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
# /=====================================================================\ #
# |  LaTeXML::Core::Tokens                                              | #
# | A list of Token(s)                                                  | #
# |=====================================================================| #
# | Part of LaTeXML:                                                    | #
# |  Public domain software, produced as part of work done by the       | #
# |  United States Government & not subject to copyright in the US.     | #
# |---------------------------------------------------------------------| #
# | Bruce Miller <bruce.miller@nist.gov>                        #_#     | #
# | http://dlmf.nist.gov/LaTeXML/                              (o o)    | #
# \=========================================================ooo==U==ooo=/ #
package LaTeXML::Core::Tokens;
use strict;
use warnings;
use LaTeXML::Global;
use LaTeXML::Common::Object;
use LaTeXML::Common::Error;
use LaTeXML::Core::Token;
use base qw(LaTeXML::Common::Object);
use base qw(Exporter);
our @EXPORT = (    # Global STATE; This gets bound by LaTeXML.pm
  qw(&Tokens &TokensI),
);

#======================================================================
# Token List constructors.

# Return a LaTeXML::Core::Tokens made from the arguments (tokens)
sub Tokens {
  my (@tokens) = @_;
  my $r;
  # faster than foreach
  @tokens = map { (($r = ref $_) eq 'LaTeXML::Core::Token' ? $_
      : ($r eq 'LaTeXML::Core::Tokens' ? @$_
        : Error('misdefined', $r, undef, "Expected a Token, got " . Stringify($_)) || T_OTHER(Stringify($_)))) }
    @tokens;
  return bless [@tokens], 'LaTeXML::Core::Tokens'; }

sub TokensI {
  my (@tokens) = @_;
  return bless [@tokens], 'LaTeXML::Core::Tokens'; }

#======================================================================
# Return a list of the tokens making up this Tokens
sub unlist {
  my ($self) = @_;
  return @$self; }

# Return a shallow copy of the Tokens
sub clone {
  my ($self) = @_;
  return bless [@$self], ref $self; }

# Return a string containing the TeX form of the Tokens
sub revert {
  my ($self) = @_;
  return @$self; }

# toString is used often, and for more keyword-like reasons,
# NOT for creating valid TeX (use revert or UnTeX for that!)
sub toString {
  my ($self) = @_;
  return join('', map { ($$_[1] == CC_COMMENT ? '' : $_->toString) } @$self); }

# Methods for overloaded ops.

# Compare two Tokens lists, ignoring comments & markers
sub equals {
  my ($a, $b) = @_;
  return 0 unless defined $b && (ref $a) eq (ref $b);
  my @a = @$a;
  my @b = @$b;
  while (@a || @b) {
    if (@a && (($a[0]->[1] == CC_COMMENT) || ($a[0]->[1] == CC_MARKER))) { shift(@a); next; }
    if (@b && (($b[0]->[1] == CC_COMMENT) || ($b[0]->[1] == CC_MARKER))) { shift(@b); next; }
    return unless @a && @b && shift(@a)->equals(shift(@b)); }
  return 1; }

sub stringify {
  my ($self) = @_;
  return "Tokens[" . join(',', map { $_->toString } @$self) . "]"; }

sub beDigested {
  no warnings 'recursion';
  my ($self, $stomach) = @_;
  return $stomach->digest($self); }

sub neutralize {
  my ($self, @extraspecials) = @_;
  return Tokens(map { $_->neutralize(@extraspecials) } @$self); }

sub isBalanced {
  my ($self) = @_;
  my $level = 0;
  foreach my $t (@$self) {
    my $cc = $$t[1];    # INLINE
    $level++ if $cc == CC_BEGIN;
    if ($cc == CC_END) {
      $level--;
      # Note that '{ }} {' is still unbalanced
      # even though the left and right braces match in count.
      last if $level < 0; } }
  return $level == 0; }

# NOTE: Assumes each arg either undef or also Tokens
# Using inline accessors on those assumptions
sub substituteParameters {
  my ($self, @args) = @_;
  my @in     = @{$self};    # ->unlist
  my @result = ();
  while (my $token = shift(@in)) {
    if ($$token[1] != CC_ARG) {    # Non-match; copy it
      push(@result, $token); }
    else {
      if (my $arg = $args[ord($$token[0]) - ord("0") - 1]) {
        push(@result, (ref $arg eq 'LaTeXML::Core::Token' ? $arg : @$arg)); } } }    # ->unlist
  return bless [@result], 'LaTeXML::Core::Tokens'; }

# Packs repeated CC_PARAM tokens into CC_ARG tokens for use as a macro body (and other token lists)
# Also unwraps \noexpand tokens, since that is also needed for macro bodies
# (but not strictly part of packing parameters)
sub packParameters {
  my ($self)    = @_;
  my @rescanned = ();
  my @toks      = @$self;
  my $repacked  = 0;
  while (my $t = shift @toks) {
    if ($$t[1] == CC_PARAM && @toks) {
      $repacked = 1;
      my $next_t  = shift @toks;
      my $next_cc = $next_t && $$next_t[1];
      if ($next_cc == CC_OTHER) {
        # only group clear match token cases
        push(@rescanned, T_ARG($next_t)); }
      elsif ($next_cc == CC_PARAM) {
        push(@rescanned, $t); }
      else {    # any other case, preserve as-is, let the higher level call resolve any errors
                # e.g. \detokenize{#,} is legal, while \textbf{#,} is not
        Error('misdefined', 'expansion', undef, "Parameter has a malformed arg, should be #1-#9 or ##. ",
          "In expansion " . ToString($self)); } }
    else {
      push(@rescanned, $t); } }
  return ($repacked ? bless [@rescanned], 'LaTeXML::Core::Tokens' : $self); }

# Trims outer braces (if they balance each other)
# Should this also trim whitespace? or only if there are braces?
sub stripBraces {
  my ($self) = @_;
  my $n      = 1 + $#$self;
  my $i0     = 0;
  my $i1     = $n;
  # skip past spaces at ends.
  while (($i0 < $n) && ($$self[$i0]->getCatcode == CC_SPACE))     { $i0++; }
  while (($i1 > 0)  && ($$self[$i1 - 1]->getCatcode == CC_SPACE)) { $i1--; }
  my (@o, @p);
  # Collect balanced pairs.
  for (my $i = $i0 ; $i < $i1 ; $i++) {
    my $cc = $$self[$i]->getCatcode;
    if ($cc == CC_BEGIN) {
      push(@o, $i); }
    elsif ($cc == CC_END) {
      if (@o) {
        push(@p, pop(@o), $i); }
      else {
        return $self; } } }    # Unbalanced: Too many }
  return $self if @o;          # Unbalanced: Too many {
  ## COULD strip multiple pairs of braces by checking more @p pairs
  if (@p) {
    my $j1 = pop(@p);
    my $j0 = pop(@p);
    if (($j0 == $i0) && ($j1 == $i1 - 1)) {
      $i0++; $i1--; } }
  return (($i0 < $i1) && (($i0 > 0) || ($i1 < $n))
    ? bless [@$self[$i0 .. $i1 - 1]], 'LaTeXML::Core::Tokens'
    : $self); }

#======================================================================

1;

__END__

=pod

=head1 NAME

C<LaTeXML::Core::Tokens> - represents lists of L<LaTeXML::Core::Token>'s;
extends L<LaTeXML::Common::Object>.

=head2 Exported functions

=over 4

=item C<< $tokens = Tokens(@token); >>

Creates a L<LaTeXML::Core::Tokens> from a list of L<LaTeXML::Core::Token>'s

=back

=head2 Tokens methods

The following method is specific to C<LaTeXML::Core::Tokens>.

=over 4

=item C<< $tokenscopy = $tokens->clone; >>

Return a shallow copy of the $tokens.  This is useful before reading from a C<LaTeXML::Core::Tokens>.

=back

=head1 AUTHOR

Bruce Miller <bruce.miller@nist.gov>

=head1 COPYRIGHT

Public domain software, produced as part of work done by the
United States Government & not subject to copyright in the US.

=cut