1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153
|
module Puppet::Pops
module Parser
module HeredocSupport
include LexerSupport
# Pattern for heredoc `@(endtag[:syntax][/escapes])
# Produces groups for endtag (group 1), syntax (group 2), and escapes (group 3)
#
PATTERN_HEREDOC = %r{@\(([^:/\r\n\)]+)(?::[[:blank:]]*([a-z][a-zA-Z0-9_+]+)[[:blank:]]*)?(?:/((?:\w|[$])*)[[:blank:]]*)?\)}
def heredoc
scn = @scanner
ctx = @lexing_context
locator = @locator
before = scn.pos
# scanner is at position before @(
# find end of the heredoc spec
str = scn.scan_until(/\)/) || lex_error(Issues::HEREDOC_UNCLOSED_PARENTHESIS, :followed_by => followed_by)
pos_after_heredoc = scn.pos
# Note: allows '+' as separator in syntax, but this needs validation as empty segments are not allowed
md = str.match(PATTERN_HEREDOC)
lex_error(Issues::HEREDOC_INVALID_SYNTAX) unless md
endtag = md[1]
syntax = md[2] || ''
escapes = md[3]
endtag.strip!
# Is this a dq string style heredoc? (endtag enclosed in "")
if endtag =~ /^"(.*)"$/
dqstring_style = true
endtag = $1.strip
end
lex_error(Issues::HEREDOC_EMPTY_ENDTAG) unless endtag.length >= 1
resulting_escapes = []
if escapes
escapes = "trnsuL$" if escapes.length < 1
escapes = escapes.split('')
unless escapes.length == escapes.uniq.length
lex_error(Issues::HEREDOC_MULTIPLE_AT_ESCAPES, :escapes => escapes)
end
resulting_escapes = ["\\"]
escapes.each do |e|
case e
when "t", "r", "n", "s", "u", "$"
resulting_escapes << e
when "L"
resulting_escapes += ["\n", "\r\n"]
else
lex_error(Issues::HEREDOC_INVALID_ESCAPE, :actual => e)
end
end
end
# Produce a heredoc token to make the syntax available to the grammar
enqueue_completed([:HEREDOC, syntax, pos_after_heredoc - before], before)
# If this is the second or subsequent heredoc on the line, the lexing context's :newline_jump contains
# the position after the \n where the next heredoc text should scan. If not set, this is the first
# and it should start scanning after the first found \n (or if not found == error).
if ctx[:newline_jump]
scn.pos = ctx[:newline_jump]
else
scn.scan_until(/\n/) || lex_error(Issues::HEREDOC_WITHOUT_TEXT)
end
# offset 0 for the heredoc, and its line number
heredoc_offset = scn.pos
heredoc_line = locator.line_for_offset(heredoc_offset)-1
# Compute message to emit if there is no end (to make it refer to the opening heredoc position).
eof_error = create_lex_error(Issues::HEREDOC_WITHOUT_END_TAGGED_LINE)
# Text from this position (+ lexing contexts offset for any preceding heredoc) is heredoc until a line
# that terminates the heredoc is found.
# (Endline in EBNF form): WS* ('|' WS*)? ('-' WS*)? endtag WS* \r? (\n|$)
endline_pattern = /([[:blank:]]*)(?:([|])[[:blank:]]*)?(?:(\-)[[:blank:]]*)?#{Regexp.escape(endtag)}[[:blank:]]*\r?(?:\n|\z)/
lines = []
while !scn.eos? do
one_line = scn.scan_until(/(?:\n|\z)/)
raise eof_error unless one_line
md = one_line.match(endline_pattern)
if md
leading = md[1]
has_margin = md[2] == '|'
remove_break = md[3] == '-'
# Record position where next heredoc (from same line as current @()) should start scanning for content
ctx[:newline_jump] = scn.pos
# Process captured lines - remove leading, and trailing newline
# get processed string and index of removed margin/leading size per line
str, margin_per_line = heredoc_text(lines, leading, has_margin, remove_break)
# Use a new lexer instance configured with a sub-locator to enable correct positioning
sublexer = self.class.new()
locator = Locator::SubLocator.new(locator, str, heredoc_line, heredoc_offset, has_margin, margin_per_line)
# Emit a token that provides the grammar with location information about the lines on which the heredoc
# content is based.
enqueue([:SUBLOCATE,
LexerSupport::TokenValue.new([:SUBLOCATE,
lines, lines.reduce(0) {|size, s| size + s.length} ],
heredoc_offset,
locator)])
sublexer.lex_unquoted_string(str, locator, resulting_escapes, dqstring_style)
sublexer.interpolate_uq_to(self)
# Continue scan after @(...)
scn.pos = pos_after_heredoc
return
else
lines << one_line
end
end
raise eof_error
end
# Produces the heredoc text string given the individual (unprocessed) lines as an array and array with margin sizes per line
# @param lines [Array<String>] unprocessed lines of text in the heredoc w/o terminating line
# @param leading [String] the leading text up (up to pipe or other terminating char)
# @param has_margin [Boolean] if the left margin should be adjusted as indicated by `leading`
# @param remove_break [Boolean] if the line break (\r?\n) at the end of the last line should be removed or not
# @return [Array] - a tuple with resulting string, and an array with margin size per line
#
def heredoc_text(lines, leading, has_margin, remove_break)
if has_margin && leading.length > 0
leading_pattern = /^#{Regexp.escape(leading)}/
# TODO: This implementation is not according to the specification, but is kept to be bug compatible.
# The specification says that leading space up to the margin marker should be removed, but this implementation
# simply leaves lines that have text in the margin untouched.
#
processed_lines = lines.collect {|s| s.gsub(leading_pattern, '') }
margin_per_line = Array.new(processed_lines.length) {|x| lines[x].length - processed_lines[x].length }
lines = processed_lines
else
# Array with a 0 per line
margin_per_line = Array.new(lines.length, 0)
end
result = lines.join('')
result.gsub!(/\r?\n\z/m, '') if remove_break
[result, margin_per_line]
end
end
end
end
|