File: Lex.om

package info (click to toggle)
omake 0.10.7-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 14,076 kB
  • sloc: ml: 49,729; ansic: 5,163; makefile: 688; sh: 110
file content (120 lines) | stat: -rw-r--r-- 2,431 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
#
# Lexer for LaTeX code.
# Actually, LaTeX parsing is not context-free,
# so this is really only an approximation.
#
open parse/LaTeX/Macros

#
# Special characters
#
public.operators. =
    extends $(Map)

    $|{|  = lbrace
    $|}|  = rbrace
    $|[|  = lbrack
    $|]|  = rbrack
    $|$$| = math2
    $|$|  = math1

#
# Utilities
#
private.arg1(s) =
   match $s with
   case $'[{]\([^}]+\)[}]'
      value $"$1"

########################################################################
# The contents of verbatim blocks are not interpreted.
#
private.verbatim_lexer. =
    extends $(Lexer)

    declare name
    declare text

    lex-block(n, t) =
        name = $n
	text[] = $t
	this.lex()

    skip1: $'[^\\]+'
        text[] += $0
        this.lex()

    skip2: $'\\'
        text[] += $0
        this.lex()

    term: $'\\end[{][[:alpha:]]+[*]?[}]'
        text[] += $0
        name2 = $(arg1 $0)
	if $(equal $(name2), $(name))
	    Token.pair(text, $(concat $(EMPTY), $(text)))
        else
            this.lex()
	
########################################################################
# The lexer
#
public.latex_lexer. =
    extends $(Lexer)

    #
    # If all else fails, its a syntax error
    #
    other: .
        loc = $(parse-loc)
        eprintln($(loc.to-string): illegal character: $0)
        this.lex()

    #
    # Macros.
    #
    macro: $'''\\([[:alpha:]@]+[*]?|[ \t\n%_'=>#$^&*}{"[:digit:]]|\\|\[|\])'''
        Token.pair(macro, $0)

    begin: $'''\\begin[{][[:alpha:]]+[*]?[}]'''
        name = $(arg1 $0)
	if $(MACROS.is-verbatim $(name))
	    verbatim_lexer.lex-block($(name), $0)
        else
            Token.pair(begin, $(name))

    end: $'''\\end[{][[:alpha:]]+[*]?[}]'''
        Token.pair(end, $(arg1 $0))

    #
    # Operators
    #
    op: $'[}{$]|\[|\]|[$][$]'
        Token.pair($(operators.find $0), $0)

    word: $'[[:alpha:]]+'
        Token.pair(word, $0)

    nl: $'[[:space:]]*\n([[:space:]]*\n)+'
        Token.pair(nl, $0)

    white: $'[[:space:]]+'
        Token.pair(white, $0)

    word: $'[[:alpha:]]+'
        Token.pair(word, $0)

    text: $'[^][{}%\\$[:space:][:alpha:]]+'
        Token.pair(text, $0)

    lstinline: $'\\(lstinline|verb)(/[^/]*/|[$][^$]*[$]|+[^+]*+|![^!]*!|[]][^]]*[]]|~[^~]*~|`[^`]*`)'
        Token.pair(text, $0)

    comment: $'%[^\n]*\n'
        Token.pair(comment, $0)

    #
    # End of file
    #
    eof: $"\'"
        Token.unit(eof)