File: sml.lua

package info (click to toggle)
vis 0.9-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 12,624 kB
  • sloc: ansic: 23,195; sh: 981; makefile: 363; python: 47
file content (93 lines) | stat: -rw-r--r-- 3,274 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
-- Copyright 2017-2024 Murray Calavera. See LICENSE.
-- Standard ML LPeg lexer.

local lexer = require('lexer')
local token, word_match = lexer.token, lexer.word_match
local P, S = lpeg.P, lpeg.S

local lex = lexer.new('sml')

-- Whitespace.
local ws = token(lexer.WHITESPACE, lexer.space^1)
lex:add_rule('whitespace', ws)

-- Structures.
local id = (lexer.alnum + "'" + '_')^0
local aid = lexer.alpha * id
local longid = (aid * '.')^0 * aid
local struct_dec = token(lexer.KEYWORD, 'structure') * ws * token(lexer.CLASS, aid) * ws *
  token(lexer.OPERATOR, '=') * ws
lex:add_rule('struct_new', struct_dec * token(lexer.KEYWORD, 'struct'))
lex:add_rule('struct_alias', struct_dec * token(lexer.CLASS, longid))
lex:add_rule('structure', token(lexer.CLASS, aid * '.'))

-- Open.
lex:add_rule('open', token(lexer.KEYWORD, word_match('open structure functor')) * ws *
  token(lexer.CLASS, longid))

-- Keywords.
lex:add_rule('keyword', token(lexer.KEYWORD, word_match{
  'abstype', 'and', 'andalso', 'as', 'case', 'do', 'datatype', 'else', 'end', 'exception', 'fn',
  'fun', 'handle', 'if', 'in', 'infix', 'infixr', 'let', 'local', 'nonfix', 'of', 'op', 'orelse',
  'raise', 'rec', 'then', 'type', 'val', 'with', 'withtype', 'while', --
  'eqtype', 'functor', 'include', 'sharing', 'sig', 'signature', 'struct', 'structure'
}))

-- Types.
lex:add_rule('type', token(lexer.TYPE, word_match{
  'int', 'real', 'word', 'bool', 'char', 'string', 'unit', 'array', 'exn', 'list', 'option',
  'order', 'ref', 'substring', 'vector'
}))

-- Functions.
-- `real`, `vector` and `substring` are a problem.
lex:add_rule('function', token(lexer.FUNCTION, word_match{
  'app', 'before', 'ceil', 'chr', 'concat', 'exnMessage', 'exnName', 'explode', 'floor', 'foldl',
  'foldr', 'getOpt', 'hd', 'ignore', 'implode', 'isSome', 'length', 'map', 'not', 'null', 'ord',
  'print', 'real', 'rev', 'round', 'size', 'str', 'substring', 'tl', 'trunc', 'valOf', 'vector',
  'o', 'abs', 'mod', 'div'
}))

-- Constants.
lex:add_rule('constant', token(lexer.CONSTANT, word_match('true false nil') + lexer.upper * id))

-- Indentifiers (non-symbolic).
lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.lower * id))

-- Strings.
lex:add_rule('string', token(lexer.STRING, P('#')^-1 * lexer.range('"', true)))

-- Comments.
local line_comment = lexer.to_eol('(*)')
local block_comment = lexer.range('(*', '*)', false, false, true)
lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment))

-- Numbers.
local function num(digit) return digit * (digit^0 * '_')^0 * digit^1 + digit end
local int = num(lexer.digit)
local frac = '.' * int
local minus = lpeg.P('~')^-1
local exp = lpeg.S('eE') * minus * int
local real = int * frac^-1 * exp + int * frac * exp^-1
local hex = num(lexer.xdigit)
local bin = num(lpeg.S('01'))
-- LuaFormatter off
lex:add_rule('number', token(lexer.NUMBER,
  '0w' * int +
  (P('0wx') + '0xw') * hex +
  (P('0wb') + '0bw') * bin +
  minus * '0x' * hex +
  minus * '0b' * bin +
  minus * real +
  minus * int))
-- LuaFormatter on

-- Type variables.
lex:add_rule('typevar', token(lexer.VARIABLE, "'" * id))

-- Operators.
lex:add_rule('operator', token(lexer.OPERATOR, S('!*/+-^:@=<>()[]{},;._|#%&$?~`\\')))

lexer.property['scintillua.comment'] = '(*)'

return lex