File: Tokenizer.py

package info (click to toggle)
plastex 3.1-4
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 6,132 kB
  • sloc: python: 23,341; xml: 18,076; javascript: 7,755; ansic: 46; makefile: 40; sh: 26
file content (110 lines) | stat: -rwxr-xr-x 4,355 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
import unittest
from unittest import TestCase
from plasTeX import Macro
from plasTeX.TeX import *
from plasTeX.Tokenizer import *

class Tokenizing(TestCase):

    def testTokens(self):
        tokens = [x for x in TeX().input(r'{\hskip 36 pt}').itertokens()]
        expected = [BeginGroup('{'), 
                    EscapeSequence('hskip'), 
                    Other('3'),
                    Other('6'),
                    Space(' '),
                    Letter('p'), 
                    Letter('t'), 
                    EndGroup('}')]
        assert tokens == expected, '%s != %s' % (tokens, expected)

    def testComment(self):
        tokens = [x for x in TeX().input('line % comment').itertokens()]
        expected = [Letter('l'), 
                    Letter('i'), 
                    Letter('n'), 
                    Letter('e'), 
                    Space(' ')]
        assert tokens == expected, '%s != %s' % (tokens, expected)

    def testSymbols(self):
        tokens = [x for x in TeX().input('\\ { } $ & # ^ _ ~ %').itertokens()]
        expected = [EscapeSequence(' '),
                    BeginGroup('{'), Space(' '), 
                    EndGroup('}'), Space(' '), 
                    MathShift('$'), Space(' '), 
                    Alignment('&'), Space(' '), 
                    Parameter('#'), Space(' '), 
                    Superscript('^'), Space(' '), 
                    Subscript('_'), Space(' '), 
                    EscapeSequence('active::~'), Space(' ')]
        assert tokens == expected, '%s != %s' % (tokens, expected)
        
        tokens = [x for x in TeX().input(r'\\ \{ \} \$ \& \# \^ \_ \~ \%').itertokens()]
        expected = [EscapeSequence('\\'), Space(' '),
                    EscapeSequence('{'), Space(' '),
                    EscapeSequence('}'), Space(' '),
                    EscapeSequence('$'), Space(' '),
                    EscapeSequence('&'), Space(' '),
                    EscapeSequence('#'), Space(' '),
                    EscapeSequence('^'), Space(' '),
                    EscapeSequence('_'), Space(' '),
                    EscapeSequence('~'), Space(' '),
                    EscapeSequence('%')]
        assert tokens == expected, '%s != %s' % (tokens, expected)

    def testDoubleSuper(self):
        tokens = [x for x in TeX().input('^^I ^^A ^^@ ^^M').itertokens()]
        expected = [Other('\x01'), Space(' ')]
        assert tokens == expected, '%s != %s' % (tokens, expected)

    def testParagraph(self):
        tokens = [x for x in TeX().input('1\n   2\n   \n   3\n').itertokens()]
        expected = [Other('1'), Space(' '), 
                    Other('2'), Space(' '), 
                    EscapeSequence('par'), 
                    Other('3'), Space(' ')]
        assert tokens == expected, '%s != %s' % (tokens, expected)
         
    def testExercises(self):
        """ Exercises in the TeX book """
        # 8.4
        tokens = [x for x in TeX().input(r' $x^2$~  \TeX  ^^C').itertokens()]
        expected = [MathShift('$'), 
                    Letter('x'), 
                    Superscript('^'), 
                    Other('2'), 
                    MathShift('$'), 
                    EscapeSequence('active::~'), 
                    Space(' '), 
                    EscapeSequence('TeX'), 
                    Other('\x03')]
        assert tokens == expected, '%s != %s' % (tokens, expected)
        
        # 8.5
        tokens = [x for x in TeX().input('Hi!\n\n\n').itertokens()]
        expected = [Letter('H'), 
                    Letter('i'), 
                    Other('!'), 
                    Space(' '), 
                    EscapeSequence('par')]
        assert tokens == expected, '%s != %s' % (tokens, expected)

        # 8.6
        tokens = [x for x in TeX().input(r'^^B^^BM^^A^^B^^C^^M^^@\M ').itertokens()]
        expected = [Other('\x02'), 
                    Other('\x02'), 
                    Letter('M'), 
                    Other('\x01'), 
                    Other('\x02'), 
                    Other('\x03'), 
                    Space(' '), 
                    EscapeSequence('M')]
        assert tokens == expected, '%s != %s' % (tokens, expected)

    def testParameters(self):
        tokens = [x for x in TeX().input(r'\def\foo#1[#2]{hi}').itertokens()]

if __name__ == '__main__':
    unittest.main()