File: rexml.rb

package info (click to toggle)
ruby-multi-xml 0.8.1-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 472 kB
  • sloc: ruby: 2,822; sh: 4; makefile: 2
file content (119 lines) | stat: -rw-r--r-- 3,554 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
require "rexml/document"

module MultiXml
  module Parsers
    # XML parser using Ruby's built-in REXML library
    #
    # @api private
    module Rexml
      extend self

      # Get the parse error class for this parser
      #
      # @api private
      # @return [Class] REXML::ParseException
      def parse_error = ::REXML::ParseException

      # Parse XML from an IO object
      #
      # @api private
      # @param io [IO] IO-like object containing XML
      # @return [Hash] Parsed XML as a hash
      # @raise [REXML::ParseException] if XML is malformed
      def parse(io)
        doc = REXML::Document.new(io)
        element_to_hash({}, doc.root)
      end

      private

      # Convert an element to hash format
      #
      # @api private
      # @param hash [Hash] Accumulator hash
      # @param element [REXML::Element] Element to convert
      # @return [Hash] Updated hash
      def element_to_hash(hash, element)
        add_to_hash(hash, element.name, collapse_element(element))
      end

      # Collapse an element into a hash with attributes and content
      #
      # @api private
      # @param element [REXML::Element] Element to collapse
      # @return [Hash] Hash representation
      def collapse_element(element)
        node_hash = collect_attributes(element)

        if element.has_elements?
          collect_child_elements(element, node_hash)
          add_text_content(node_hash, element) unless whitespace_only?(element)
        elsif node_hash.empty? || !whitespace_only?(element)
          add_text_content(node_hash, element)
        end

        node_hash
      end

      # Collect all attributes from an element into a hash
      #
      # @api private
      # @param element [REXML::Element] Element with attributes
      # @return [Hash] Hash of attribute name-value pairs
      def collect_attributes(element)
        element.attributes.each_with_object({}) { |(name, value), hash| hash[name] = value }
      end

      # Collect all child elements into a hash
      #
      # @api private
      # @param element [REXML::Element] Parent element
      # @param node_hash [Hash] Hash to populate
      # @return [void]
      def collect_child_elements(element, node_hash)
        element.each_element { |child| element_to_hash(node_hash, child) }
      end

      # Add text content from an element to a hash
      #
      # @api private
      # @param hash [Hash] Target hash
      # @param element [REXML::Element] Element with text
      # @return [Hash] Updated hash
      def add_text_content(hash, element)
        return hash unless element.has_text?

        text = element.texts.map(&:value).join
        add_to_hash(hash, TEXT_CONTENT_KEY, text)
      end

      # Add a value to a hash, handling duplicates as arrays
      #
      # @api private
      # @param hash [Hash] Target hash
      # @param key [String] Key to add
      # @param value [Object] Value to add
      # @return [Hash] Updated hash
      def add_to_hash(hash, key, value)
        existing = hash[key]
        hash[key] = if existing
          existing.is_a?(Array) ? existing << value : [existing, value]
        elsif value.is_a?(Array)
          [value]
        else
          value
        end
        hash
      end

      # Check if element contains only whitespace text
      #
      # @api private
      # @param element [REXML::Element] Element to check
      # @return [Boolean] true if whitespace only
      def whitespace_only?(element)
        element.texts.join.strip.empty?
      end
    end
  end
end