File: reader.rb

package info (click to toggle)
ruby-nokogiri 1.10.0%2Bdfsg1-2
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 5,088 kB
  • sloc: xml: 28,081; ruby: 16,687; java: 13,293; ansic: 4,954; yacc: 265; sh: 76; makefile: 19
file content (112 lines) | stat: -rw-r--r-- 3,070 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
module Nokogiri
  module XML
    ###
    # Nokogiri::XML::Reader parses an XML document similar to the way a cursor
    # would move.  The Reader is given an XML document, and yields nodes
    # to an each block.
    #
    # Here is an example of usage:
    #
    #   reader = Nokogiri::XML::Reader(<<-eoxml)
    #     <x xmlns:tenderlove='http://tenderlovemaking.com/'>
    #       <tenderlove:foo awesome='true'>snuggles!</tenderlove:foo>
    #     </x>
    #   eoxml
    #
    #   reader.each do |node|
    #
    #     # node is an instance of Nokogiri::XML::Reader
    #     puts node.name
    #
    #   end
    #
    # Note that Nokogiri::XML::Reader#each can only be called once!!  Once
    # the cursor moves through the entire document, you must parse the
    # document again.  So make sure that you capture any information you
    # need during the first iteration.
    #
    # The Reader parser is good for when you need the speed of a SAX parser,
    # but do not want to write a Document handler.
    class Reader
      include Enumerable

      TYPE_NONE = 0
      # Element node type
      TYPE_ELEMENT = 1
      # Attribute node type
      TYPE_ATTRIBUTE = 2
      # Text node type
      TYPE_TEXT = 3
      # CDATA node type
      TYPE_CDATA = 4
      # Entity Reference node type
      TYPE_ENTITY_REFERENCE = 5
      # Entity node type
      TYPE_ENTITY = 6
      # PI node type
      TYPE_PROCESSING_INSTRUCTION = 7
      # Comment node type
      TYPE_COMMENT = 8
      # Document node type
      TYPE_DOCUMENT = 9
      # Document Type node type
      TYPE_DOCUMENT_TYPE = 10
      # Document Fragment node type
      TYPE_DOCUMENT_FRAGMENT = 11
      # Notation node type
      TYPE_NOTATION = 12
      # Whitespace node type
      TYPE_WHITESPACE = 13
      # Significant Whitespace node type
      TYPE_SIGNIFICANT_WHITESPACE = 14
      # Element end node type
      TYPE_END_ELEMENT = 15
      # Entity end node type
      TYPE_END_ENTITY = 16
      # XML Declaration node type
      TYPE_XML_DECLARATION = 17

      # A list of errors encountered while parsing
      attr_accessor :errors

      # The encoding for the document
      attr_reader :encoding

      # The XML source
      attr_reader :source

      alias :self_closing? :empty_element?

      def initialize source, url = nil, encoding = nil # :nodoc:
        @source   = source
        @errors   = []
        @encoding = encoding
      end
      private :initialize

      ###
      # Get a list of attributes for the current node.
      def attributes
        Hash[attribute_nodes.map { |node|
          [node.name, node.to_s]
        }].merge(namespaces || {})
      end

      ###
      # Get a list of attributes for the current node
      def attribute_nodes
        nodes = attr_nodes
        nodes.each { |v| v.instance_variable_set(:@_r, self) }
        nodes
      end

      ###
      # Move the cursor through the document yielding the cursor to the block
      def each
        while cursor = self.read
          yield cursor
        end
      end
    end
  end
end