File: dom_parser.rb

package info (click to toggle)
ruby-multi-xml 0.8.1-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 472 kB
  • sloc: ruby: 2,822; sh: 4; makefile: 2
file content (97 lines) | stat: -rw-r--r-- 2,938 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
module MultiXml
  module Parsers
    # Shared DOM traversal logic for converting XML nodes to hashes
    #
    # Used by Nokogiri, LibXML, and Oga parsers.
    # Including modules must implement:
    # - each_child(node) { |child| ... }
    # - each_attr(node) { |attr| ... }
    # - node_name(node) -> String
    #
    # @api private
    module DomParser
      # Convert an XML node to a hash representation
      #
      # @api private
      # @param node [Object] XML node to convert
      # @param hash [Hash] Accumulator hash for results
      # @return [Hash] Hash representation of the node
      def node_to_hash(node, hash = {})
        node_hash = {TEXT_CONTENT_KEY => +""}
        add_value(hash, node_name(node), node_hash)
        collect_children(node, node_hash)
        collect_attributes(node, node_hash)
        strip_whitespace_content(node_hash)
        hash
      end

      private

      # Add a value to a hash, converting to array on duplicates
      #
      # @api private
      # @param hash [Hash] Target hash
      # @param key [String] Key to add
      # @param value [Object] Value to add
      # @return [void]
      def add_value(hash, key, value)
        existing = hash[key]
        hash[key] = case existing
        when Array then existing << value
        when Hash then [existing, value]
        else value
        end
      end

      # Collect all child nodes into a hash
      #
      # @api private
      # @param node [Object] Parent node
      # @param node_hash [Hash] Hash to populate
      # @return [void]
      def collect_children(node, node_hash)
        each_child(node) do |child|
          if child.element?
            node_to_hash(child, node_hash)
          elsif text_or_cdata?(child)
            node_hash[TEXT_CONTENT_KEY] << child.content
          end
        end
      end

      # Check if a node is text or CDATA
      #
      # @api private
      # @param node [Object] Node to check
      # @return [Boolean] true if text or CDATA
      def text_or_cdata?(node)
        node.text? || node.cdata?
      end

      # Collect all attributes from a node
      #
      # @api private
      # @param node [Object] Node with attributes
      # @param node_hash [Hash] Hash to populate
      # @return [void]
      def collect_attributes(node, node_hash)
        each_attr(node) do |attr|
          name = node_name(attr)
          existing = node_hash[name]
          node_hash[name] = existing ? [attr.value, existing] : attr.value
        end
      end

      # Remove empty or whitespace-only text content
      #
      # @api private
      # @param node_hash [Hash] Hash to clean up
      # @return [void]
      def strip_whitespace_content(node_hash)
        content = node_hash[TEXT_CONTENT_KEY]
        should_remove = content.empty? || (node_hash.size > 1 && content.strip.empty?)
        node_hash.delete(TEXT_CONTENT_KEY) if should_remove
      end
    end
  end
end