File: parser.rb

package info (click to toggle)
ruby-beaker-hostgenerator 1.7.1-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 3,556 kB
  • sloc: ruby: 3,059; makefile: 3
file content (326 lines) | stat: -rw-r--r-- 12,440 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
require 'beaker-hostgenerator/data'
require 'beaker-hostgenerator/exceptions'
require 'cgi'

module BeakerHostGenerator
  # Functions for parsing the raw user input host layout string and turning
  # them into proper data structures suitable for processing by the Generator.
  #
  # The functions mainly perform data type conversions, like splitting the
  # single input string into a list of strings, each of which will be processed
  # further by other functions in this module.
  #
  # For example, given the raw user input string that defines the host layout,
  # you would first prepare it for tokenization via `prepare`, then split it
  # into tokens via `tokenize_layout`, and then for each token you would call
  # `is_ostype_token?` and/or `parse_node_info_token`.
  module Parser

    # Parses a single node definition into the following components:
    #
    #   * bits             Uppercase-only alphanumeric
    #                      Examples: 32, 64, POWER, S390X
    #
    #   * arbitrary_roles  Lowercase-only alphabetical & underscores
    #                      Examples: myrole, role_a
    #
    #   * roles            Lowercase-only, any combination of: u, a, c, l, d, f, m
    #                      Examples: m, mdca
    #
    #   * host_settings    Any character (see `settings_string_to_map` for details)
    #                      Examples: {hostname=foo-bar, ip.address=123.4.5.6, foo=[bar,baz]}
    #
    # This regex is the main workhorse for parsing input to beaker-hostgenerator.
    # There is a bit of pre and post parsing that happens before and after this
    # regex though.
    # Before we use this regex, we split the input containing multiple node
    # definitions into individual node definitions to be parsed by this regex
    # (see `tokenize_layout`).
    # After we use this regex, we turn the host_settings key-value string into
    # a proper Hash map (see `settings_string_to_map`).
    #
    # See Ruby Regexp class for information on the capture groups used below.
    # http://ruby-doc.org/core-2.2.0/Regexp.html#class-Regexp-label-Character+Classes
    #
    # Examples node specs and their resulting roles
    #
    #  64compile_master,zuul,meow.a
    #   * compile_master
    #   * zuul
    #   * meow
    #   * agent
    #
    #  32herp.cdma
    #   * herp
    #   * dashboard
    #   * database
    #   * master
    #   * agent
    #
    #  64dashboard,master,agent,database.
    #   * dashboard
    #   * master
    #   * agent
    #   * database
    #
    NODE_REGEX=/\A(?<bits>[A-Z0-9]+|\d+)((?<arbitrary_roles>([[:lower:]_]*|\,)*)\.)?(?<roles>[uacldfm]*)(?<host_settings>\{[[:print:]]*\})?\Z/

    module_function

    # Prepares the host input string for tokenization, such as URL-decoding.
    #
    # @param spec [String] Raw user input; well-formatted string specification
    #                      of the hosts to generate.
    #                      For example `"aix53-POWERfa%7Bhypervisor=aix%7D"`.
    # @returns [String] Input string with transformations necessary for
    #                   tokenization.
    def prepare(spec)
      CGI.unescape(spec)
    end

    # Breaks apart the host input string into chunks suitable for processing
    # by the generator. Returns an array of substrings of the input spec string.
    #
    # The input string is expected to be properly formatted using the dash `-`
    # character as a delimiter. Dashes may also be used within braces `{...}`,
    # which are used to define arbitrary key-values on a node.
    #
    # @param spec [String] Well-formatted string specification of the hosts to
    #                      generate. For example `"centos6-64m-debian8-32a"`.
    # @returns [Array<String>] Input string split into substrings suitable for
    #                          processing by the generator. For example
    #                          `["centos6", "64m", "debian8", "32a"]`.
    def tokenize_layout(layout_spec)
      # Here we allow dashes in certain parts of the spec string
      # i.e. "centos6-64m{hostname=foo-bar}-debian8-32"
      # by first replacing all occurrences of - with | that exist within
      # the braces {...}.
      #
      # So we'd end up with:
      #   "centos6-64m{hostname=foo|bar}-debian8-32"
      #
      # Which we can then simply split on - into:
      #   ["centos6", "64m{hostname=foo|bar}", "debian8", "32"]
      #
      # And then finally turn the | back into - now that we've
      # properly decomposed the spec string:
      #   ["centos6", "64m{hostname=foo-bar}", "debian8", "32"]
      #
      # NOTE we've specifically chosen to use the pipe character |
      # due to its unlikely occurrence in the user input string.
      spec = String.new(layout_spec) # Copy so we can replace characters inline
      within_braces = false
      spec.chars.each_with_index do |char, index|
        case char
        when '{'
          within_braces = true
        when '}'
          within_braces = false
        when '-'
          spec[index] = '|' if within_braces
        end
      end
      tokens = spec.split('-')
      tokens.map { |t| t.gsub('|', '-') }
    end

    # Tests if a string token represents an OS platform (i.e. "centos6" or
    # "debian8") and not another part of the host specification like the
    # architecture bit (i.e. "32" or "64").
    #
    # This is used when parsing the host generator input string to determine
    # if we're introducing a host for a new platform or if we're adding another
    # host for a current platform.
    #
    # @param [String] token A piece of the host generator input that might refer
    #                 to an OS platform. For example `"centos6"` or `"debian8"`.
    #
    # @param [Integer] bhg_version The version of OS info to use when testing
    #                  for whether the token represent an OS platform.
    def is_ostype_token?(token, bhg_version)
      BeakerHostGenerator::Data.get_platforms(bhg_version).each do |platform|
        ostype = platform.split('-')[0]
        if ostype == token
          return true
        end
      end
      return false
    end

    # Converts a string token that represents a node (and not an OS type) into
    # a proper hash map with keys representing the various regex capture groups
    # in `NODE_REGEX` and values being the captured text.
    #
    # Throws an exception if the token is not in the expected formatted, as
    # determined by `NODE_REGEX.match`.
    #
    # It is expected that the `Generator` will have initimate knowledge about
    # the keys and values in the returned map, as it may be adjusted and given
    # to the hypervisors or other abstractions for processing.
    #
    # @param token [String] The portion of the user input layout specifiction
    #                       that describes the node (and not the OS platform).
    #                       For example `"64myrole.mdca"`.
    #
    # @returns [Hash{String=>Object}] A map containing the regex capture groups
    #                                 suitable for processing by the Generator
    #                                 and hypervisors.
    def parse_node_info_token(token)
      node_info = NODE_REGEX.match(token)

      if node_info
        node_info = Hash[ node_info.names.zip( node_info.captures ) ]
      else
        raise BeakerHostGenerator::Exceptions::InvalidNodeSpecError.new,
              "Invalid node_info token: #{token}"
      end

      if node_info['arbitrary_roles']
        node_info['arbitrary_roles'] =
          node_info['arbitrary_roles'].split(',') || ''
      else
        # Default to empty list to avoid having to check for nil elsewhere
        node_info['arbitrary_roles'] = []
      end

      if node_info['host_settings']
        node_info['host_settings'] =
          settings_string_to_map(node_info['host_settings'])
      else
        node_info['host_settings'] = {}
      end

      return node_info
    end

    # Transforms the arbitrary host settings map from a string representation
    # to a proper hash map data structure for merging into the host
    # configuration. Supports arbitrary nested hashes and arrays.
    #
    # The string is expected to be of the form "{key1=value1,key2=[v2,v3],...}".
    # Nesting looks like "{key1={nested_key=nested_value},list=[[list1, list2]]}".
    # Whitespace is expected to be properly quoted as it will not be treated
    # any different than non-whitespace characters.
    #
    # Throws an exception of the string is malformed in any way.
    #
    # @param host_settings [String] Non-nil user input string that defines host
    #                               specific settings.
    #
    # @returns [Hash{String=>String|Array|Hash}] The host_settings string as a map.
    def settings_string_to_map(host_settings)

      stringscan = StringScanner.new(host_settings)
      object = nil
      object_depth = []
      current_depth = 0

      # This loop scans until the next delimiter character is found. When
      # the next delimiter is recognized, there is enough context in the
      # substring to determine a course of action to modify the primary
      # object. The `object_depth` object tracks which current object is
      # being modified, and pops it off the end of the array when that
      # data structure is completed.
      #
      # This algorithm would also support a base array, but since there
      # is no need for that functionality, we just assume the string is
      # always a representation of a hash.
      loop do
        blob = stringscan.scan_until(/\[|{|}|\]|,/)

        break if blob.nil?

        if stringscan.pos() == 1
          object = {}
          object_depth.push(object)
          next
        end

        current_type = object_depth[current_depth].class
        current_object = object_depth[current_depth]

        if blob == '['
          current_object.push([])
          object_depth.push(current_object.last)
          current_depth = current_depth.next
          next
        end

        if blob.start_with?('{')
          current_object.push({})
          object_depth.push(current_object.last)
          current_depth = current_depth.next
          next
        end



        if blob == ']' or blob == '}'
          object_depth.pop
          current_depth = current_depth.pred
          next
        end

        # When there is assignment happening, we need to create a new
        # corresponding data structure, add it to the object depth, and
        # then change the current depth
        if blob[-2] == '='
          raise Beaker::HostGenerator::Exceptions::InvalidNodeSpecError unless blob.end_with?('{','[')
          if blob[-1] == '{'
            current_object[blob[0..-3]] = {}
          else
            current_object[blob[0..-3]] = []
          end
          object_depth.push(current_object[blob[0..-3]])
          current_depth = current_depth.next
          next
        end

        if blob[-1] == '}'
          raise Beaker::HostGenerator::Exceptions::InvalidNodeSpecError if blob.count('=') != 1
          key_pair = blob[0..-2].split('=')
          raise Beaker::HostGenerator::Exceptions::InvalidNodeSpecError if key_pair.size != 2
          key_pair.each do |element|
            raise Beaker::HostGenerator::Exceptions::InvalidNodeSpecError if element.empty?
          end
          current_object[key_pair[0]] = key_pair[1]
          object_depth.pop
          current_depth = current_depth.pred
          next
        end

        if blob == ','
          next
        end

        if blob[-1] == ','
          if current_type == Hash
            key_pair = blob[0..-2].split('=')
            raise Beaker::HostGenerator::Exceptions::InvalidNodeSpecError if key_pair.size != 2
            key_pair.each do |element|
              raise Beaker::HostGenerator::Exceptions::InvalidNodeSpecError if element.empty?
            end
            current_object[key_pair[0]] = key_pair[1]
            next
          elsif current_type == Array
            current_object.push(blob[0..-2])
            next
          end
        end

        if blob[-1] == ']'
          current_object.push(blob[0..-2])
          object_depth.pop
          current_depth = current_depth.pred
          next
        end
      end

      object
    rescue Exception => e
      raise BeakerHostGenerator::Exceptions::InvalidNodeSpecError,
        "Malformed host settings: #{host_settings}"
    end

  end
end