1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116
|
-- Copyright 2017 Murray Calavera. See LICENSE.
-- Pony LPeg lexer.
local l = require('lexer')
local token = l.token
local P, S = lpeg.P, lpeg.S
local function pword(words)
return l.word_match(words, "'")
end
local ws = token(l.WHITESPACE, l.space^1)
local comment_line = '//' * l.nonnewline^0
local comment_block = l.nested_pair('/*', '*/')
local comment = token(l.COMMENT, comment_line + comment_block)
local annotation = token(l.PREPROCESSOR, l.delimited_range('\\', false, true))
local lit_bool = token(l.CONSTANT, pword{'true', 'false'})
local nq = l.any - P'"'
local lit_str = token(l.STRING,
P'"""' * (nq + (P'"' * #(nq + (P'"' * nq))))^0 * P'"""'
+ l.delimited_range('"')
+ l.delimited_range("'")
)
local function num(digit)
return digit * (digit^0 * P'_')^0 * digit^1 + digit
end
local int = num(l.digit)
local frac = P('.') * int
local exp = S('eE') * (P('-') + P('+'))^-1 * int
local lit_num = token(l.NUMBER,
P('0x') * num(l.xdigit)
+ P('0b') * num(S('01'))
+ int * frac^-1 * exp^-1
)
local keyword = token(l.KEYWORD, pword{
'actor', 'as', 'be', 'break', 'class', 'compile_error', 'compile_intrinsic',
'continue', 'consume', 'do', 'else', 'elseif', 'embed', 'end', 'error',
'for', 'fun', 'if', 'ifdef', 'iftype', 'in', 'interface', 'is', 'isnt',
'lambda', 'let', 'match', 'new', 'object', 'primitive', 'recover', 'repeat',
'return', 'struct', 'then', 'this', 'trait', 'try', 'type', 'until', 'use',
'var', 'where', 'while', 'with'})
local capability = token(l.LABEL, pword{
'box', 'iso', 'ref', 'tag', 'trn', 'val'})
local qualifier = token(l.LABEL,
P'#' * pword{'read', 'send', 'share', 'any', 'alias'})
local operator = token(l.OPERATOR,
pword{'and', 'or', 'xor', 'not', 'addressof', 'digestof'}
+ lpeg.Cmt(S('+-*/%<>=!~')^1, function(input, index, op)
local ops = {
['+'] = true, ['-'] = true, ['*'] = true, ['/'] = true, ['%'] = true,
['+~'] = true, ['-~'] = true, ['*~'] = true, ['/~'] = true,
['%~'] = true, ['<<'] = true, ['>>'] = true, ['<<~'] = true,
['>>~'] = true, ['=='] = true, ['!='] = true, ['<'] = true,
['<='] = true, ['>='] = true, ['>'] = true, ['==~'] = true,
['!=~'] = true, ['<~'] = true, ['<=~'] = true, ['>=~'] = true,
['>~'] = true
}
return ops[op] and index or nil
end)
)
-- there is no suitable token name for this, change this if ever one is added
local punctuation = token(l.OPERATOR,
P'=>' + P'.>' + P'<:' + P'->'
+ S('=.,:;()[]{}!?~^&|_@'))
-- highlight functions with syntax sugar at declaration
local func
= token(l.KEYWORD, pword{'fun', 'new', 'be'}) * ws^-1
* annotation^-1 * ws^-1
* capability^-1 * ws^-1
* token(l.FUNCTION, pword{
'create', 'dispose', '_final', 'apply', 'update',
'add', 'sub', 'mul', 'div', 'mod', 'add_unsafe', 'sub_unsafe',
'mul_unsafe', 'div_unsafe', 'mod_unsafe', 'shl', 'shr', 'shl_unsafe',
'shr_unsafe', 'op_and', 'op_or', 'op_xor', 'eq', 'ne', 'lt', 'le', 'ge',
'gt', 'eq_unsafe', 'ne_unsafe', 'lt_unsafe', 'le_unsafe', 'ge_unsafe',
'gt_unsafe', 'neg', 'neg_unsafe', 'op_not',
'has_next', 'next',
'_serialise_space', '_serialise', '_deserialise'})
local id_suffix = (l.alnum + P("'") + P('_'))^0
local type = token(l.TYPE, P('_')^-1 * l.upper * id_suffix)
local identifier = token(l.IDENTIFIER, P('_')^-1 * l.lower * id_suffix)
local tuple_lookup = token(l.IDENTIFIER, P('_') * l.digit^1)
local M = {_NAME = 'pony'}
M._rules = {
{'whitespace', ws},
{'comment', comment},
{'annotation', annotation},
{'boolean', lit_bool},
{'number', lit_num},
{'string', lit_str},
{'function', func},
{'keyword', keyword},
{'capability', capability},
{'qualifier', qualifier},
{'operator', operator},
{'type', type},
{'identifier', identifier},
{'lookup', tuple_lookup},
{'punctuation', punctuation}
}
return M
|