File: sax_handler.rb

package info (click to toggle)
ruby-multi-xml 0.8.1-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 472 kB
  • sloc: ruby: 2,822; sh: 4; makefile: 2
file content (117 lines) | stat: -rw-r--r-- 3,280 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
require "cgi/escape"

module MultiXml
  module Parsers
    # Shared SAX handler logic for building hash trees from XML events
    #
    # This module provides the core stack-based parsing logic used by both
    # NokogiriSax and LibxmlSax parsers. Including classes must implement
    # the callback methods that their respective SAX libraries expect.
    #
    # @api private
    module SaxHandler
      # Initialize the handler state
      #
      # @api private
      # @return [void]
      def initialize_handler
        @result = {}
        @stack = [@result]
        @pending_attrs = []
      end

      # Get the parsed result
      #
      # @api private
      # @return [Hash] the parsed hash
      attr_reader :result

      private

      # Get the current element hash
      #
      # @api private
      # @return [Hash] current hash being built
      def current = @stack.last

      # Handle start of an element by pushing onto the stack
      #
      # @api private
      # @param name [String] Element name
      # @param attrs [Hash, Array] Element attributes
      # @return [void]
      def handle_start_element(name, attrs)
        child = {TEXT_CONTENT_KEY => +""}
        add_child_to_current(name, child)
        @stack << child
        @pending_attrs << normalize_attrs(attrs)
      end

      # Handle end of an element by applying attributes and popping the stack
      #
      # @api private
      # @return [void]
      def handle_end_element
        apply_attributes(@pending_attrs.pop)
        strip_whitespace_content
        @stack.pop
      end

      # Append text to the current element's content
      #
      # @api private
      # @param text [String] Text to append
      # @return [void]
      def append_text(text)
        current[TEXT_CONTENT_KEY] << text
      end

      # Add a child hash to the current element
      #
      # @api private
      # @param name [String] Child element name
      # @param child [Hash] Child hash to add
      # @return [void]
      def add_child_to_current(name, child)
        existing = current[name]
        current[name] = case existing
        when Array then existing << child
        when Hash then [existing, child]
        else child
        end
      end

      # Normalize attributes to a hash
      #
      # @api private
      # @param attrs [Hash, Array] Attributes as hash or array of pairs
      # @return [Hash] Normalized attributes hash
      def normalize_attrs(attrs)
        attrs.is_a?(Hash) ? attrs : attrs.to_h
      end

      # Apply pending attributes to the current element
      #
      # @api private
      # @param attrs [Hash] Attributes to apply
      # @return [void]
      def apply_attributes(attrs)
        attrs.each do |name, value|
          unescaped = CGI.unescapeHTML(value)
          existing = current[name]
          current[name] = existing ? [unescaped, existing] : unescaped
        end
      end

      # Remove empty or whitespace-only text content
      #
      # @api private
      # @return [void]
      def strip_whitespace_content
        content = current[TEXT_CONTENT_KEY]
        should_remove = content.empty? || (current.size > 1 && content.strip.empty?)
        current.delete(TEXT_CONTENT_KEY) if should_remove
      end
    end
  end
end