File: heredoc_support.rb

package info (click to toggle)
puppet-agent 7.23.0-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 19,092 kB
  • sloc: ruby: 245,074; sh: 456; makefile: 38; xml: 33
file content (153 lines) | stat: -rw-r--r-- 6,096 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
module Puppet::Pops
module Parser
module HeredocSupport
  include LexerSupport

  # Pattern for heredoc `@(endtag[:syntax][/escapes])
  # Produces groups for endtag (group 1), syntax (group 2), and escapes (group 3)
  #
  PATTERN_HEREDOC = %r{@\(([^:/\r\n\)]+)(?::[[:blank:]]*([a-z][a-zA-Z0-9_+]+)[[:blank:]]*)?(?:/((?:\w|[$])*)[[:blank:]]*)?\)}


  def heredoc
    scn = @scanner
    ctx = @lexing_context
    locator = @locator
    before = scn.pos

    # scanner is at position before @(
    # find end of the heredoc spec
    str = scn.scan_until(/\)/) || lex_error(Issues::HEREDOC_UNCLOSED_PARENTHESIS, :followed_by => followed_by)
    pos_after_heredoc = scn.pos
    # Note: allows '+' as separator in syntax, but this needs validation as empty segments are not allowed
    md = str.match(PATTERN_HEREDOC)
    lex_error(Issues::HEREDOC_INVALID_SYNTAX) unless md
    endtag = md[1]
    syntax = md[2] || ''
    escapes = md[3]

    endtag.strip!

    # Is this a dq string style heredoc? (endtag enclosed in "")
    if endtag =~ /^"(.*)"$/
      dqstring_style = true
      endtag = $1.strip
    end

    lex_error(Issues::HEREDOC_EMPTY_ENDTAG) unless endtag.length >= 1

    resulting_escapes = []
    if escapes
      escapes = "trnsuL$" if escapes.length < 1

      escapes = escapes.split('')
      unless escapes.length == escapes.uniq.length
        lex_error(Issues::HEREDOC_MULTIPLE_AT_ESCAPES, :escapes => escapes)
      end
      resulting_escapes = ["\\"]
      escapes.each do |e|
        case e
        when "t", "r", "n", "s", "u", "$"
          resulting_escapes << e
        when "L"
          resulting_escapes += ["\n", "\r\n"]
        else
          lex_error(Issues::HEREDOC_INVALID_ESCAPE, :actual => e)
        end
      end
    end

    # Produce a heredoc token to make the syntax available to the grammar
    enqueue_completed([:HEREDOC, syntax, pos_after_heredoc - before], before)

    # If this is the second or subsequent heredoc on the line, the lexing context's :newline_jump contains
    # the position after the \n where the next heredoc text should scan. If not set, this is the first
    # and it should start scanning after the first found \n (or if not found == error).

    if ctx[:newline_jump]
      scn.pos = ctx[:newline_jump]
    else
      scn.scan_until(/\n/) || lex_error(Issues::HEREDOC_WITHOUT_TEXT)
    end
    # offset 0 for the heredoc, and its line number
    heredoc_offset = scn.pos
    heredoc_line = locator.line_for_offset(heredoc_offset)-1

    # Compute message to emit if there is no end (to make it refer to the opening heredoc position).
    eof_error = create_lex_error(Issues::HEREDOC_WITHOUT_END_TAGGED_LINE)

    # Text from this position (+ lexing contexts offset for any preceding heredoc) is heredoc until a line
    # that terminates the heredoc is found.

    # (Endline in EBNF form): WS* ('|' WS*)? ('-' WS*)? endtag WS* \r? (\n|$)
    endline_pattern = /([[:blank:]]*)(?:([|])[[:blank:]]*)?(?:(\-)[[:blank:]]*)?#{Regexp.escape(endtag)}[[:blank:]]*\r?(?:\n|\z)/
    lines = []
    while !scn.eos? do
      one_line = scn.scan_until(/(?:\n|\z)/)
      raise eof_error unless one_line
      md = one_line.match(endline_pattern)
      if md
        leading      = md[1]
        has_margin   = md[2] == '|'
        remove_break = md[3] == '-'
        # Record position where next heredoc (from same line as current @()) should start scanning for content
        ctx[:newline_jump] = scn.pos


        # Process captured lines - remove leading, and trailing newline
        # get processed string and index of removed margin/leading size per line
        str, margin_per_line = heredoc_text(lines, leading, has_margin, remove_break)

        # Use a new lexer instance configured with a sub-locator to enable correct positioning
        sublexer = self.class.new()
        locator = Locator::SubLocator.new(locator, str, heredoc_line, heredoc_offset, has_margin, margin_per_line)

        # Emit a token that provides the grammar with location information about the lines on which the heredoc
        # content is based.
        enqueue([:SUBLOCATE,
          LexerSupport::TokenValue.new([:SUBLOCATE,
            lines, lines.reduce(0) {|size, s| size + s.length} ],
            heredoc_offset,
            locator)])

        sublexer.lex_unquoted_string(str, locator, resulting_escapes, dqstring_style)
        sublexer.interpolate_uq_to(self)
        # Continue scan after @(...)
        scn.pos = pos_after_heredoc
        return
      else
        lines << one_line
      end
    end
    raise eof_error
  end

  # Produces the heredoc text string given the individual (unprocessed) lines as an array and array with margin sizes per line
  # @param lines [Array<String>] unprocessed lines of text in the heredoc w/o terminating line
  # @param leading [String] the leading text up (up to pipe or other terminating char)
  # @param has_margin [Boolean] if the left margin should be adjusted as indicated by `leading`
  # @param remove_break [Boolean] if the line break (\r?\n) at the end of the last line should be removed or not
  # @return [Array] - a tuple with resulting string, and an array with margin size per line
  #
  def heredoc_text(lines, leading, has_margin, remove_break)
    if has_margin && leading.length > 0
      leading_pattern = /^#{Regexp.escape(leading)}/
      # TODO: This implementation is not according to the specification, but is kept to be bug compatible.
      # The specification says that leading space up to the margin marker should be removed, but this implementation
      # simply leaves lines that have text in the margin untouched.
      #
      processed_lines = lines.collect {|s| s.gsub(leading_pattern, '') }
      margin_per_line = Array.new(processed_lines.length) {|x| lines[x].length - processed_lines[x].length }
      lines = processed_lines
    else
      # Array with a 0 per line
      margin_per_line = Array.new(lines.length, 0)
    end
    result = lines.join('')
    result.gsub!(/\r?\n\z/m, '') if remove_break
    [result, margin_per_line]
  end

end
end
end