1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42
|
-- Copyright 2023-2024 Mitchell. See LICENSE.
-- troff/man LPeg lexer.
-- Based on original Man lexer by David B. Lamkins and modified by Eolien55.
local lexer = lexer
local P, R, S = lpeg.P, lpeg.R, lpeg.S
local lex = lexer.new(...)
-- Registers and groff's structured programming.
lex:add_rule('keywords', lex:tag(lexer.KEYWORD, (lexer.starts_line('.') * (lexer.space - '\n')^0 *
(P('while') + 'break' + 'continue' + 'nr' + 'rr' + 'rnn' + 'aln' + '\\}')) + '\\{'))
-- Markup.
lex:add_rule('escape_sequences', lex:tag(lexer.VARIABLE,
'\\' * (('s' * S('+-')^-1) + S('*fgmnYV'))^-1 * (P('(') * 2 + lexer.range('[', ']') + 1)))
lex:add_rule('headings', lex:tag(lexer.NUMBER,
lexer.starts_line('.') * (lexer.space - '\n')^0 * (S('STN') * 'H') * (lexer.space - '\n') *
lexer.nonnewline^0))
lex:add_rule('man_alignment', lex:tag(lexer.KEYWORD,
lexer.starts_line('.') * (lexer.space - '\n')^0 * (P('br') + 'DS' + 'RS' + 'RE' + 'PD' + 'PP') *
lexer.space))
lex:add_rule('font', lex:tag(lexer.VARIABLE,
lexer.starts_line('.') * (lexer.space - '\n')^0 * ('B' * P('R')^-1 + 'I' * S('PR')^-1) *
lexer.space))
-- Lowercase troff macros are plain macros (like .so or .nr).
lex:add_rule('troff_plain_macros', lex:tag(lexer.VARIABLE, lexer.starts_line('.') *
(lexer.space - '\n')^0 * lexer.lower^1))
lex:add_rule('any_macro', lex:tag(lexer.PREPROCESSOR,
lexer.starts_line('.') * (lexer.space - '\n')^0 * (lexer.any - lexer.space)^0))
lex:add_rule('comment', lex:tag(lexer.COMMENT,
(lexer.starts_line('.\\"') + '\\"' + '\\#') * lexer.nonnewline^0))
lex:add_rule('string', lex:tag(lexer.STRING, lexer.range('"', true)))
-- Usually used by eqn, and mandoc in some way.
lex:add_rule('in_dollars', lex:tag(lexer.EMBEDDED, lexer.range('$', false, false)))
-- TODO: a lexer for each preprocessor?
return lex
|