File: lexer_support.rb

package info (click to toggle)
puppet-agent 7.23.0-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 19,092 kB
  • sloc: ruby: 245,074; sh: 456; makefile: 38; xml: 33
file content (221 lines) | stat: -rw-r--r-- 5,980 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221

module Puppet::Pops
module Parser

require_relative '../../../puppet/util/multi_match'

# This is an integral part of the Lexer. It is broken out into a separate module
# for maintainability of the code, and making the various parts of the lexer focused.
#
module LexerSupport

  # Returns "<eof>" if at end of input, else the following 5 characters with \n \r \t escaped
  def followed_by
    return "<eof>" if @scanner.eos?
    result = @scanner.rest[0,5] + "..."
    result.gsub!("\t", '\t')
    result.gsub!("\n", '\n')
    result.gsub!("\r", '\r')
    result
  end

  # Returns a quoted string using " or ' depending on the given a strings's content
  def format_quote(q)
    if q == "'"
      '"\'"'
    else
      "'#{q}'"
    end
  end

  # Raises a Puppet::LexError with the given message
  def lex_error_without_pos(issue, args = {})
    raise Puppet::ParseErrorWithIssue.new(issue.format(args), nil, nil, nil, nil, issue.issue_code, args)
  end

  # Raises a Puppet::ParserErrorWithIssue with the given issue and arguments
  def lex_error(issue, args = {}, pos=nil)
    raise create_lex_error(issue, args, pos)
  end

  def filename
    file = @locator.file
    file.is_a?(String) && !file.empty? ? file : nil
  end

  def line(pos)
    @locator.line_for_offset(pos || @scanner.pos)
  end

  def position(pos)
    @locator.pos_on_line(pos || @scanner.pos)
  end

  def lex_warning(issue, args = {}, pos=nil)
    Puppet::Util::Log.create({
        :level => :warning,
        :message => issue.format(args),
        :issue_code => issue.issue_code,
        :file => filename,
        :line => line(pos),
        :pos => position(pos),
      })
  end

  # @param issue [Issues::Issue] the issue
  # @param args [Hash<Symbol,String>] Issue arguments
  # @param pos [Integer]
  # @return [Puppet::ParseErrorWithIssue] the created error
  def create_lex_error(issue, args = {}, pos = nil)
    Puppet::ParseErrorWithIssue.new(
        issue.format(args),
        filename,
        line(pos),
        position(pos),
        nil,
        issue.issue_code,
        args)
  end

  # Asserts that the given string value is a float, or an integer in decimal, octal or hex form.
  # An error is raised if the given value does not comply.
  #
  def assert_numeric(value, pos)
    if value =~ /^0[xX]/
      lex_error(Issues::INVALID_HEX_NUMBER, {:value => value}, pos)     unless value =~ /^0[xX][0-9A-Fa-f]+$/

    elsif value =~ /^0[^.]/
      lex_error(Issues::INVALID_OCTAL_NUMBER, {:value => value}, pos)   unless value =~ /^0[0-7]+$/

    elsif value =~ /^\d+[eE.]/
      lex_error(Issues::INVALID_DECIMAL_NUMBER, {:value => value}, pos) unless value =~ /^\d+(?:\.\d+)?(?:[eE]-?\d+)?$/

    else
      lex_error(Issues::ILLEGAL_NUMBER, {:value => value}, pos) unless value =~ /^\d+$/
    end
  end

  # A TokenValue keeps track of the token symbol, the lexed text for the token, its length
  # and its position in its source container. There is a cost associated with computing the
  # line and position on line information.
  #
  class TokenValue < Locatable
    attr_reader :token_array
    attr_reader :offset
    attr_reader :locator

    def initialize(token_array, offset, locator)
      @token_array = token_array
      @offset = offset
      @locator = locator
    end

    def length
      @token_array[2]
    end

    def [](key)
      case key
      when :value
        @token_array[1]
      when :file
        @locator.file
      when :line
        @locator.line_for_offset(@offset)
      when :pos
        @locator.pos_on_line(@offset)
      when :length
        @token_array[2]
      when :locator
        @locator
      when :offset
        @offset
      else
        nil
      end
    end

    def to_s
      # This format is very compact and is intended for debugging output from racc parser in
      # debug mode. If this is made more elaborate the output from a debug run becomes very hard to read.
      #
      "'#{self[:value]} #{@token_array[0]}'"
    end
    # TODO: Make this comparable for testing
    # vs symbolic, vs array with symbol and non hash, array with symbol and hash)
    #
  end


  MM = Puppet::Util::MultiMatch
  MM_ANY = MM::NOT_NIL

  BOM_UTF_8      = MM.new(0xEF, 0xBB, 0xBF,        MM_ANY)
  BOM_UTF_16_1   = MM.new(0xFE, 0xFF,              MM_ANY, MM_ANY)
  BOM_UTF_16_2   = MM.new(0xFF, 0xFE,              MM_ANY, MM_ANY)
  BOM_UTF_32_1   = MM.new(0x00, 0x00, 0xFE, 0xFF   )
  BOM_UTF_32_2   = MM.new(0xFF, 0xFE, 0x00, 0x00   )

  BOM_UTF_1      = MM.new(0xF7, 0x64, 0x4C,        MM_ANY)
  BOM_UTF_EBCDIC = MM.new(0xDD, 0x73, 0x66, 0x73   )
  BOM_SCSU       = MM.new(0x0E, 0xFE, 0xFF,        MM_ANY)
  BOM_BOCU       = MM.new(0xFB, 0xEE, 0x28,        MM_ANY)
  BOM_GB_18030   = MM.new(0x84, 0x31, 0x95, 0x33   )

  LONGEST_BOM    = 4

  def assert_not_bom(content)
    name, size =
    case bom = get_bom(content)

    when BOM_UTF_32_1, BOM_UTF_32_2
      ['UTF-32', 4]

    when BOM_GB_18030
      ['GB-18030', 4]

    when BOM_UTF_EBCDIC
      ['UTF-EBCDIC', 4]

    when BOM_SCSU
      ['SCSU', 3]

    when BOM_UTF_8
      ['UTF-8', 3]

    when BOM_UTF_1
      ['UTF-1', 3]

    when BOM_BOCU
      ['BOCU', 3]

    when BOM_UTF_16_1, BOM_UTF_16_2
      ['UTF-16', 2]

    else
      return
    end

    lex_error_without_pos(
      Puppet::Pops::Issues::ILLEGAL_BOM,
      { :format_name   => name,
        :bytes  => "[#{bom.values[0,size].map {|b| "%X" % b}.join(" ")}]"
      })
  end

  def get_bom(content)
    # get 5 bytes as efficiently as possible (none of the string methods works since a bom consists of
    # illegal characters on most platforms, and there is no get_bytes(n). Explicit calls are faster than
    # looping with a lambda. The get_byte returns nil if there are too few characters, and they
    # are changed to spaces
    MM.new(
      (content.getbyte(0) || ' '),
      (content.getbyte(1) || ' '),
      (content.getbyte(2) || ' '),
      (content.getbyte(3) || ' ')
      )
  end

end
end
end