1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131
|
-- Copyright 2019-2024 Julien L. See LICENSE.
-- txt2tags LPeg lexer.
-- (developed and tested with Txt2tags Markup Rules
-- [https://txt2tags.org/doc/english/rules.t2t])
-- Contributed by Julien L.
local lexer = require('lexer')
local token, word_match = lexer.token, lexer.word_match
local P, S = lpeg.P, lpeg.S
local nonspace = lexer.any - lexer.space
local lex = lexer.new('txt2tags')
-- Whitespace.
local ws = token(lexer.WHITESPACE, (lexer.space - lexer.newline)^1)
-- Titles
local alphanumeric = lexer.alnum + S('_-')
local header_label = token('header_label_start', '[') * token(lexer.LABEL, alphanumeric^1) *
token('header_label_end', ']')
local function h(level)
local equal = string.rep('=', level) * (lexer.nonnewline - '=')^1 * string.rep('=', level)
local plus = string.rep('+', level) * (lexer.nonnewline - '+')^1 * string.rep('+', level)
return token('h' .. level, equal + plus) * header_label^-1
end
local header = h(5) + h(4) + h(3) + h(2) + h(1)
-- Comments.
local line_comment = lexer.to_eol(lexer.starts_line('%'))
local block_comment = lexer.range(lexer.starts_line('%%%'))
local comment = token(lexer.COMMENT, block_comment + line_comment)
-- Inline.
local function span(name, delimiter)
return token(name, (delimiter * nonspace * delimiter * S(delimiter)^0) +
(delimiter * nonspace * (lexer.nonnewline - nonspace * delimiter)^0 * nonspace * delimiter *
S(delimiter)^0))
end
local bold = span(lexer.BOLD, '**')
local italic = span(lexer.ITALIC, '//')
local underline = span(lexer.UNDERLINE, '__')
local strike = span('strike', '--')
local mono = span(lexer.CODE, '``')
local raw = span(lexer.DEFAULT, '""')
local tagged = span('tagged', "''")
local inline = bold + italic + underline + strike + mono + raw + tagged
-- Link.
local email = token(lexer.LINK,
(nonspace - '@')^1 * '@' * (nonspace - '.')^1 * ('.' * (nonspace - S('.?'))^1)^1 *
('?' * nonspace^1)^-1)
local host = token(lexer.LINK,
word_match('www ftp', true) * (nonspace - '.')^0 * '.' * (nonspace - '.')^1 * '.' *
(nonspace - S(',.'))^1)
local url = token(lexer.LINK,
(nonspace - '://')^1 * '://' * (nonspace - ',' - '.')^1 * ('.' * (nonspace - S(',./?#'))^1)^1 *
('/' * (nonspace - S('./?#'))^0 * ('.' * (nonspace - S(',.?#'))^1)^0)^0 *
('?' * (nonspace - '#')^1)^-1 * ('#' * nonspace^0)^-1)
local label_with_address = token(lexer.LABEL, '[') * lexer.space^0 *
token(lexer.LABEL, ((nonspace - ']')^1 * lexer.space^1)^1) * token(lexer.LINK, (nonspace - ']')^1) *
token(lexer.LABEL, ']')
local link = label_with_address + url + host + email
-- Line.
local line = token('line', S('-=_')^20)
-- Image.
local image_only = token('image_start', '[') * token('image', (nonspace - ']')^1) *
token('image_end', ']')
local image_link = token('image_link_start', '[') * image_only *
token('image_link_sep', lexer.space^1) * token(lexer.LINK, (nonspace - ']')^1) *
token('image_link_end', ']')
local image = image_link + image_only
-- Macro.
local macro = token(lexer.PREPROCESSOR, '%%' * (nonspace - '(')^1 * lexer.range('(', ')', true)^-1)
-- Verbatim.
local verbatim_line = lexer.to_eol(lexer.starts_line('```') * S(' \t'))
local verbatim_block = lexer.range(lexer.starts_line('```'))
local verbatim_area = token(lexer.CODE, verbatim_block + verbatim_line)
-- Raw.
local raw_line = lexer.to_eol(lexer.starts_line('"""') * S(' \t'))
local raw_block = lexer.range(lexer.starts_line('"""'))
local raw_area = token(lexer.DEFAULT, raw_block + raw_line)
-- Tagged.
local tagged_line = lexer.to_eol(lexer.starts_line('\'\'\'') * S(' \t'))
local tagged_block = lexer.range(lexer.starts_line('\'\'\''))
local tagged_area = token('tagged_area', tagged_block + tagged_line)
-- Table.
local table_sep = token('table_sep', '|')
local cell_content = inline + link + image + macro + token('cell_content', lexer.nonnewline - ' |')
local header_cell_content = token('header_cell_content', lexer.nonnewline - ' |')
local field_sep = ' ' * table_sep^1 * ' '
local table_row_end = P(' ')^0 * table_sep^0
local table_row = lexer.starts_line(P(' ')^0 * table_sep) * cell_content^0 *
(field_sep * cell_content^0)^0 * table_row_end
local table_row_header =
lexer.starts_line(P(' ')^0 * table_sep * table_sep) * header_cell_content^0 *
(field_sep * header_cell_content^0)^0 * table_row_end
local table = table_row_header + table_row
lex:add_rule('table', table)
lex:add_rule('link', link)
lex:add_rule('line', line)
lex:add_rule('header', header)
lex:add_rule('comment', comment)
lex:add_rule('whitespace', ws)
lex:add_rule('image', image)
lex:add_rule('macro', macro)
lex:add_rule('inline', inline)
lex:add_rule('verbatim_area', verbatim_area)
lex:add_rule('raw_area', raw_area)
lex:add_rule('tagged_area', tagged_area)
lex:add_style('line', {bold = true})
local font_size = tonumber(lexer.property_expanded['style.default']:match('size:(%d+)')) or 10
for n = 5, 1, -1 do
lex:add_style('h' .. n, {fore = lexer.colors.red, size = font_size + (6 - n)})
end
lex:add_style('image', {fore = lexer.colors.green})
lex:add_style('strike', {italics = true}) -- a strike style is not available
lex:add_style('tagged', lexer.styles.embedded)
lex:add_style('tagged_area', lexer.styles.embedded) -- in consistency with tagged
lex:add_style('table_sep', {fore = lexer.colors.green})
lex:add_style('header_cell_content', {fore = lexer.colors.green})
return lex
|