File: parser.rb

package info (click to toggle)
ruby-jsonpath 1.1.5-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 332 kB
  • sloc: ruby: 1,831; makefile: 4
file content (219 lines) | stat: -rw-r--r-- 7,358 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
# frozen_string_literal: true

require 'strscan'

class JsonPath
  # Parser parses and evaluates an expression passed to @_current_node.
  class Parser
    include Dig

    REGEX = /\A\/(.+)\/([imxnesu]*)\z|\A%r{(.+)}([imxnesu]*)\z/

    def initialize(node, options)
      @_current_node = node
      @_expr_map = {}
      @options = options
    end

    # parse will parse an expression in the following way.
    # Split the expression up into an array of legs for && and || operators.
    # Parse this array into a map for which the keys are the parsed legs
    #  of the split. This map is then used to replace the expression with their
    # corresponding boolean or numeric value. This might look something like this:
    # ((false || false) && (false || true))
    #  Once this string is assembled... we proceed to evaluate from left to right.
    #  The above string is broken down like this:
    # (false && (false || true))
    # (false && true)
    #  false
    def parse(exp)
      exps = exp.split(/(&&)|(\|\|)/)
      construct_expression_map(exps)
      @_expr_map.each { |k, v| exp.sub!(k, v.to_s) }
      raise ArgumentError, "unmatched parenthesis in expression: #{exp}" unless check_parenthesis_count(exp)

      exp = parse_parentheses(exp) while exp.include?('(')
      bool_or_exp(exp)
    end

    # Construct a map for which the keys are the expressions
    #  and the values are the corresponding parsed results.
    # Exp.:
    # {"(@['author'] =~ /herman|lukyanenko/i)"=>0}
    # {"@['isTrue']"=>true}
    def construct_expression_map(exps)
      exps.each_with_index do |item, _index|
        next if item == '&&' || item == '||'

        item = item.strip.gsub(/\)*$/, '').gsub(/^\(*/, '')
        @_expr_map[item] = parse_exp(item)
      end
    end

    # Using a scanner break down the individual expressions and determine if
    # there is a match in the JSON for it or not.
    def parse_exp(exp)
      exp = exp.sub(/@/, '').gsub(/^\(/, '').gsub(/\)$/, '').tr('"', '\'').strip
      exp.scan(/^\[(\d+)\]/) do |i|
        next if i.empty?

        index = Integer(i[0])
        raise ArgumentError, 'Node does not appear to be an array.' unless @_current_node.is_a?(Array)
        raise ArgumentError, "Index out of bounds for nested array. Index: #{index}" if @_current_node.size < index

        @_current_node = @_current_node[index]
        # Remove the extra '' and the index.
        exp = exp.gsub(/^\[\d+\]|\[''\]/, '')
      end
      scanner = StringScanner.new(exp)
      elements = []
      until scanner.eos?
        if (t = scanner.scan(/\['[a-zA-Z@&*\/$%^?_]+'\]|\.[a-zA-Z0-9_]+[?]?/))
          elements << t.gsub(/[\[\]'.]|\s+/, '')
        elsif (t = scanner.scan(/(\s+)?[<>=!\-+][=~]?(\s+)?/))
          operator = t
        elsif (t = scanner.scan(/(\s+)?'?.*'?(\s+)?/))
          # If we encounter a node which does not contain `'` it means
          #  that we are dealing with a boolean type.
          operand =
            if t == 'true'
              true
            elsif t == 'false'
              false
            elsif operator.to_s.strip == '=~'
              parse_regex(t)
            else
              t.gsub(%r{^'|'$}, '').strip
            end
        elsif (t = scanner.scan(/\/\w+\//))
        elsif (t = scanner.scan(/.*/))
          raise "Could not process symbol: #{t}"
        end
      end

      el = if elements.empty?
             @_current_node
           elsif @_current_node.is_a?(Hash)
             dig(@_current_node, *elements)
           else
             elements.inject(@_current_node, &:__send__)
           end

      return (el ? true : false) if el.nil? || operator.nil?

      el = Float(el) rescue el
      operand = Float(operand) rescue operand

      el.__send__(operator.strip, operand)
    end

    private

    # /foo/i -> Regex.new("foo", Regexp::IGNORECASE) without using eval
    # also supports %r{foo}i
    # following https://github.com/seamusabshere/to_regexp/blob/master/lib/to_regexp.rb
    def parse_regex(t)
      t =~ REGEX
      content = $1 || $3
      options = $2 || $4

      raise ArgumentError, "unsupported regex #{t} use /foo/ style" if !content || !options

      content = content.gsub '\\/', '/'

      flags = 0
      flags |= Regexp::IGNORECASE if options.include?('i')
      flags |= Regexp::MULTILINE if options.include?('m')
      flags |= Regexp::EXTENDED if options.include?('x')

      # 'n' = none, 'e' = EUC, 's' = SJIS, 'u' = UTF-8
      lang = options.scan(/[nes]/).join.downcase # ignores u since that is default and causes a warning

      args = [content, flags]
      args << lang unless lang.empty? # avoid warning
      Regexp.new(*args)
    end

    #  This will break down a parenthesis from the left to the right
    #  and replace the given expression with it's returned value.
    # It does this in order to make it easy to eliminate groups
    # one-by-one.
    def parse_parentheses(str)
      opening_index = 0
      closing_index = 0

      (0..str.length - 1).step(1) do |i|
        opening_index = i if str[i] == '('
        if str[i] == ')'
          closing_index = i
          break
        end
      end

      to_parse = str[opening_index + 1..closing_index - 1]

      #  handle cases like (true && true || false && true) in
      # one giant parenthesis.
      top = to_parse.split(/(&&)|(\|\|)/)
      top = top.map(&:strip)
      res = bool_or_exp(top.shift)
      top.each_with_index do |item, index|
        if item == '&&'
          next_value = bool_or_exp(top[index + 1])
          res &&= next_value
        elsif item == '||'
          next_value = bool_or_exp(top[index + 1])
          res ||= next_value
        end
      end

      #  if we are at the last item, the opening index will be 0
      # and the closing index will be the last index. To avoid
      # off-by-one errors we simply return the result at that point.
      if closing_index + 1 >= str.length && opening_index == 0
        res.to_s
      else
        "#{str[0..opening_index - 1]}#{res}#{str[closing_index + 1..str.length]}"
      end
    end

    #  This is convoluted and I should probably refactor it somehow.
    #  The map that is created will contain strings since essentially I'm
    # constructing a string like `true || true && false`.
    # With eval the need for this would disappear but never the less, here
    #  it is. The fact is that the results can be either boolean, or a number
    # in case there is only indexing happening like give me the 3rd item... or
    # it also can be nil in case of regexes or things that aren't found.
    # Hence, I have to be clever here to see what kind of variable I need to
    # provide back.
    def bool_or_exp(b)
      if b.to_s == 'true'
        return true
      elsif b.to_s == 'false'
        return false
      elsif b.to_s == ''
        return nil
      end

      b = Float(b) rescue b
      b
    end

    # this simply makes sure that we aren't getting into the whole
    #  parenthesis parsing business without knowing that every parenthesis
    # has its pair.
    def check_parenthesis_count(exp)
      return true unless exp.include?('(')

      depth = 0
      exp.chars.each do |c|
        if c == '('
          depth += 1
        elsif c == ')'
          depth -= 1
        end
      end
      depth == 0
    end
  end
end