1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265
|
module Puppet::Pops
module Parser
# This module is an integral part of the Lexer.
# It handles scanning of EPP (Embedded Puppet), a form of string/expression interpolation similar to ERB.
#
require 'strscan'
module EppSupport
TOKEN_RENDER_STRING = [:RENDER_STRING, nil, 0]
TOKEN_RENDER_EXPR = [:RENDER_EXPR, nil, 0]
# Scans all of the content and returns it in an array
# Note that the terminating [false, false] token is included in the result.
#
def fullscan_epp
result = []
scan_epp {|token, value| result.push([token, value]) }
result
end
# A block must be passed to scan. It will be called with two arguments, a symbol for the token,
# and an instance of LexerSupport::TokenValue
# PERFORMANCE NOTE: The TokenValue is designed to reduce the amount of garbage / temporary data
# and to only convert the lexer's internal tokens on demand. It is slightly more costly to create an
# instance of a class defined in Ruby than an Array or Hash, but the gain is much bigger since transformation
# logic is avoided for many of its members (most are never used (e.g. line/pos information which is only of
# value in general for error messages, and for some expressions (which the lexer does not know about).
#
def scan_epp
# PERFORMANCE note: it is faster to access local variables than instance variables.
# This makes a small but notable difference since instance member access is avoided for
# every token in the lexed content.
#
scn = @scanner
ctx = @lexing_context
queue = @token_queue
lex_error(Issues::EPP_INTERNAL_ERROR, :error => 'No string or file given to lexer to process.') unless scn
ctx[:epp_mode] = :text
enqueue_completed([:EPP_START, nil, 0], 0)
interpolate_epp
# This is the lexer's main loop
until queue.empty? && scn.eos? do
token = queue.shift || lex_token
if token
yield [ ctx[:after] = token[0], token[1] ]
end
end
if ctx[:epp_open_position]
lex_error(Issues::EPP_UNBALANCED_TAG, {}, ctx[:epp_position])
end
# Signals end of input
yield [false, false]
end
def interpolate_epp(skip_leading=false)
scn = @scanner
ctx = @lexing_context
eppscanner = EppScanner.new(scn)
before = scn.pos
s = eppscanner.scan(skip_leading)
case eppscanner.mode
when :text
# Should be at end of scan, or something is terribly wrong
unless @scanner.eos?
lex_error(Issues::EPP_INTERNAL_ERROR, :error => 'template scanner returns text mode and is not and end of input')
end
if s
# s may be nil if scanned text ends with an epp tag (i.e. no trailing text).
enqueue_completed([:RENDER_STRING, s, scn.pos - before], before)
end
ctx[:epp_open_position] = nil
# do nothing else, scanner is at the end
when :error
lex_error(eppscanner.issue)
when :epp
# It is meaningless to render empty string segments, and it is harmful to do this at
# the start of the scan as it prevents specification of parameters with <%- ($x, $y) -%>
#
if s && s.length > 0
enqueue_completed([:RENDER_STRING, s, scn.pos - before], before)
end
# switch epp_mode to general (embedded) pp logic (non rendered result)
ctx[:epp_mode] = :epp
ctx[:epp_open_position] = scn.pos
when :expr
# It is meaningless to render an empty string segment
if s && s.length > 0
enqueue_completed([:RENDER_STRING, s, scn.pos - before], before)
end
enqueue_completed(TOKEN_RENDER_EXPR, before)
# switch mode to "epp expr interpolation"
ctx[:epp_mode] = :expr
ctx[:epp_open_position] = scn.pos
else
lex_error(Issues::EPP_INTERNAL_ERROR, :error => "Unknown mode #{eppscanner.mode} returned by template scanner")
end
nil
end
# A scanner specialized in processing text with embedded EPP (Embedded Puppet) tags.
# The scanner is initialized with a StringScanner which it mutates as scanning takes place.
# The intent is to use one instance of EppScanner per wanted scan, and this instance represents
# the state after the scan.
#
# @example Sample usage
# a = "some text <% pp code %> some more text"
# scan = StringScanner.new(a)
# eppscan = EppScanner.new(scan)
# str = eppscan.scan
# eppscan.mode # => :epp
# eppscan.lines # => 0
# eppscan
#
# The scanner supports
# * scanning text until <%, <%-, <%=
# * while scanning text:
# * tokens <%% and %%> are translated to <% and %>, respectively, and is returned as text.
# * tokens <%# and %> (or ending with -%>) and the enclosed text is a comment and is not included in the returned text
# * text following a comment that ends with -%> gets trailing whitespace (up to and including a line break) trimmed
# and this whitespace is not included in the returned text.
# * The continuation {#mode} is set to one of:
# * `:epp` - for a <% token
# * `:expr` - for a <%= token
# * `:text` - when there was no continuation mode (e.g. when input ends with text)
# * ':error` - if the tokens are unbalanced (reaching the end without a closing matching token). An error message
# is then also available via the method {#message}.
#
# Note that the intent is to use this specialized scanner to scan the text parts, when continuation mode is `:epp` or `:expr`
# the pp lexer should advance scanning (using the string scanner) until it reaches and consumes a `-%>` or '%>ยด token. If it
# finds a `-%> token it should pass this on as a `skip_leading` parameter when it performs the next {#scan}.
#
class EppScanner
# The original scanner used by the lexer/container using EppScanner
attr_reader :scanner
# The resulting mode after the scan.
# The mode is one of `:text` (the initial mode), `:epp` embedded code (no output), `:expr` (embedded
# expression), or `:error`
#
attr_reader :mode
# An error issue if `mode == :error`, `nil` otherwise.
attr_reader :issue
# If the first scan should skip leading whitespace (typically detected by the pp lexer when the
# pp mode end-token is found (i.e. `-%>`) and then passed on to the scanner.
#
attr_reader :skip_leading
# Creates an EppScanner based on a StringScanner that represents the state where EppScanner should start scanning.
# The given scanner will be mutated (i.e. position moved) to reflect the EppScanner's end state after a scan.
#
def initialize(scanner)
@scanner = scanner
end
# Here for backwards compatibility.
# @deprecated Use issue instead
# @return [String] the issue message
def message
@issue.nil? ? nil : @issue.format
end
# Scans from the current position in the configured scanner, advances this scanner's position until the end
# of the input, or to the first position after a mode switching token (`<%`, `<%-` or `<%=`). Number of processed
# lines and continuation mode can be obtained via {#lines}, and {#mode}.
#
# @return [String, nil] the scanned and processed text, or nil if at the end of the input.
#
def scan(skip_leading=false)
@mode = :text
@skip_leading = skip_leading
return nil if scanner.eos?
s = ""
until scanner.eos?
part = @scanner.scan_until(/(<%)|\z/)
if @skip_leading
part.sub!(/^[ \t]*\r?(?:\n|\z)?/,'')
@skip_leading = false
end
# The spec for %%> is to transform it into a literal %>. This is done here, as %%> otherwise would go
# undetected in text mode. (i.e. it is not really necessary to escape %> with %%> in text mode unless
# adding checks stating that a literal %> is illegal in text (unbalanced).
#
part.gsub!(/%%>/, '%>')
s += part
case @scanner.peek(1)
when ""
# at the end
# if s ends with <% then this is an error (unbalanced <% %>)
if s.end_with? "<%"
@mode = :error
@issue = Issues::EPP_UNBALANCED_EXPRESSION
end
return s
when "-"
# trim trailing whitespace on same line from accumulated s
# return text and signal switch to pp mode
@scanner.getch # drop the -
s.sub!(/[ \t]*<%\z/, '')
@mode = :epp
return s
when "%"
# verbatim text
# keep the scanned <%, and continue scanning after skipping one %
# (i.e. do nothing here)
@scanner.getch # drop the % to get a literal <% in the output
when "="
# expression
# return text and signal switch to expression mode
# drop the scanned <%, and skip past -%>, or %>, but also skip %%>
@scanner.getch # drop the =
s.slice!(-2..-1)
@mode = :expr
return s
when "#"
# template comment
# drop the scanned <%, and skip past -%>, or %>, but also skip %%>
s.slice!(-2..-1)
# unless there is an immediate termination i.e. <%#%> scan for the next %> that is not
# preceded by a % (i.e. skip %%>)
part = scanner.scan_until(/[^%]%>/)
unless part
@issue = Issues::EPP_UNBALANCED_COMMENT
@mode = :error
return s
end
# Trim leading whitespace on the same line when start was <%#-
if part[1] == '-'
s.sub!(/[ \t]*\z/, '')
end
@skip_leading = true if part.end_with?("-%>")
# Continue scanning for more text
else
# Switch to pp after having removed the <%
s.slice!(-2..-1)
@mode = :epp
return s
end
end
end
end
end
end
end
|