File: ox.rb

package info (click to toggle)
ruby-multi-xml 0.8.1-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 472 kB
  • sloc: ruby: 2,822; sh: 4; makefile: 2
file content (133 lines) | stat: -rw-r--r-- 3,800 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
require "ox"

module MultiXml
  module Parsers
    # XML parser using the Ox library (fastest pure-Ruby parser)
    #
    # @api private
    module Ox
      module_function

      # Get the parse error class for this parser
      #
      # @api private
      # @return [Class] Ox::ParseError
      def parse_error = ::Ox::ParseError

      # Parse XML from an IO object
      #
      # @api private
      # @param io [IO] IO-like object containing XML
      # @return [Hash] Parsed XML as a hash
      def parse(io)
        handler = Handler.new
        ::Ox.sax_parse(handler, io, convert_special: true, skip: :skip_return)
        handler.result
      end

      # SAX event handler that builds a hash tree while parsing
      #
      # @api private
      class Handler
        # Create a new SAX handler
        #
        # @return [Handler] new handler instance
        def initialize
          @stack = []
        end

        # Get the parsed result
        #
        # @return [Hash, nil] the root hash or nil if empty
        def result = @stack.first

        # Handle start of an element
        #
        # @param name [Symbol] Element name
        # @return [void]
        def start_element(name)
          @stack << {} if @stack.empty?
          child = {}
          add_value(name.to_s, child)
          @stack << child
        end

        # Handle end of an element
        #
        # @param _name [Symbol] Element name (unused)
        # @return [void]
        def end_element(_name)
          strip_whitespace_content if current.key?(TEXT_CONTENT_KEY)
          @stack.pop
        end

        # Handle an attribute
        #
        # @param name [Symbol] Attribute name
        # @param value [String] Attribute value
        # @return [void]
        def attr(name, value)
          add_value(name.to_s, value) unless @stack.empty?
        end

        # Handle text content
        #
        # @param value [String] Text content
        # @return [void]
        def text(value) = add_value(TEXT_CONTENT_KEY, value)

        # Handle CDATA content
        #
        # @param value [String] CDATA content
        # @return [void]
        def cdata(value) = add_value(TEXT_CONTENT_KEY, value)

        # Handle parse errors
        #
        # @param message [String] Error message
        # @param line [Integer] Line number
        # @param column [Integer] Column number
        # @return [void]
        # @raise [Ox::ParseError] always
        def error(message, line, column)
          raise ::Ox::ParseError, "#{message} at #{line}:#{column}"
        end

        private

        # Get the current element hash
        #
        # @return [Hash] current hash being built
        def current = @stack.last

        # Add a value to the current hash, merging with existing if needed
        #
        # @param key [String] Key to add
        # @param value [Object] Value to add
        # @return [void]
        def add_value(key, value)
          existing = current[key]
          current[key] = existing ? merge_values(existing, value) : value
        end

        # Merge a value with an existing value, creating array if needed
        #
        # @param existing [Object] Existing value
        # @param value [Object] Value to append
        # @return [Array] array with both values
        def merge_values(existing, value)
          existing.is_a?(Array) ? existing << value : [existing, value]
        end

        # Remove empty or whitespace-only text content
        #
        # @return [void]
        def strip_whitespace_content
          content = current[TEXT_CONTENT_KEY]
          should_remove = content.empty? || (current.size > 1 && content.strip.empty?)
          current.delete(TEXT_CONTENT_KEY) if should_remove
        end
      end
    end
  end
end