File: document.rb

package info (click to toggle)
librexml-ruby 1.2.5-1
  • links: PTS
  • area: main
  • in suites: woody
  • size: 792 kB
  • ctags: 655
  • sloc: ruby: 3,778; xml: 1,609; java: 109; makefile: 43
file content (224 lines) | stat: -rw-r--r-- 7,273 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
require "rexml/element"
require "rexml/xmldecl"
require "rexml/source"
require "rexml/comment"
require "rexml/doctype"
require "rexml/instruction"
require "rexml/rexml"
require "rexml/parseexception"

module REXML
  # Represents a full XML document, including PIs, a doctype, etc.  A
  # Document has a single child that can be accessed by root().
  # Note that if you want to have an XML declaration written for a document
  # you create, you must add one; REXML documents do not write a default
	# declaration for you.  See |DECLARATION| and |write|.
	class Document < Element
		# A convenient default XML declaration.  If you want an XML declaration,
		# the easiest way to add one is mydoc << Document::DECLARATION
		DECLARATION = XMLDecl.new( "1.0", "UTF-8" )

		# Constructor
		# @param source if supplied, must be a Document, String, or IO. 
		# Documents have their context and Element attributes cloned.
	  # Strings are expected to be valid XML documents.  IOs are expected
	  # to be sources of valid XML documents.
	  # @param context if supplied, contains the context of the document;
	  # this should be a Hash.
	  # NOTE that I'm not sure what the context is for; I cloned it out of
	  # the Electric XML API (in which it also seems to do nothing), and it
	  # is now legacy.  It may do something, someday... it may disappear.
		def initialize( source = nil, context = {} )
			super()
			@context = context
			return if source.nil?
			if source.kind_of? Source
				parse( source )
			elsif source.kind_of? Document
				super source
				@context = source.context
			else
				parse( SourceFactory.create_from(source) )
			end
		end

		# Should be obvious
		def clone
			Document.new self
		end

		# We override this, because XMLDecls and DocTypes must go at the start
		# of the document
		def add( child )
			if child.kind_of? XMLDecl
				@children.unshift child
			elsif child.kind_of? DocType
				if @children[0].kind_of? XMLDecl
					@children[1,0] = child
				else
					@children.unshift child
				end
			else
				rv = super
				raise "attempted adding second root element to document" if @elements.size > 1
				rv
			end
		end
		alias :<< :add

		def add_element(arg=nil, arg2=nil)
			rv = super
			raise "attempted adding second root element to document" if @elements.size > 1
			rv
		end

		# @return the root Element of the document, or nil if this document
		# has no children.
		def root
			@children.find { |item| item.kind_of? Element }
		end

		# @return the DocType child of the document, if one exists,
		# and nil otherwise.
		def doc_type
			@children.find { |item| item.kind_of? DocType }
		end

		# @return the XMLDecl of this document; if no XMLDecl has been
		# set, the default declaration is returned.
		def xml_decl
			rv = @children.find { |item| item.kind_of? XMLDecl }
			rv = DECLARATION if rv.nil?
			rv
		end

		# @return the XMLDecl version of this document as a String.
		# If no XMLDecl has been set, returns the default version.
		def version
			decl = xml_decl()
			decl.nil? ? XMLDecl.DEFAULT_VERSION : decl.version
		end

		# @return the XMLDecl encoding of this document as a String.
		# If no XMLDecl has been set, returns the default encoding.
		def encoding
			decl = xml_decl()
			decl.nil? ? XMLDecl.DEFAULT_ENCODING : decl.encoding
		end

		# @return the XMLDecl standalone value of this document as a String.
		# If no XMLDecl has been set, returns the default setting.
		def stand_alone?
			decl = xml_decl()
			decl.nil? ? XMLDecl.DEFAULT_STANDALONE : decl.stand_alone?
		end

		# Write the XML tree out, optionally with indent.  This writes out the
		# entire XML document, including XML declarations, doctype declarations,
		# and processing instructions (if any are given).
		# A controversial point is whether Document should always write the XML
		# declaration (<?xml version='1.0'?>) whether or not one is given by the
		# user (or source document).  REXML does not write one if one was not
		# specified, because it adds unneccessary bandwidth to applications such
		# as XML-RPC.
		# @param output an object which supports '<< string'; this is where the
		# document will be written
		# @param indent (optional) if given, the starting indent for the lines
		# in the document.
		def write( output, indent=0 )
			@children.each { |node|
				node.write( output, indent )
				output << "\n" unless node == @children[-1]
			}
		end

		# Stream parser.  The source will be parsed as a Stream.  
		# If a block is supplied, yield will be called for tag starts, ends,
		# and text.  If a listener is supplied, the listener will also be
		# notified, by calling the appropriate methods on events.
		# The arguments to the block will be:
		# IF TAG START: "tag name", { attributes } (possibly empty)
		# IF TEXT: "text"
		# IF TAG END: "/tag name"
		# The listener must supply the following methods:
		# tag_start( "name", { attributes } )
		# tag_end( "name" )
		# text( "text" )
		# instruction( "name", "instruction" )
		# comment( "comment" )
		# doctype( "name", *contents )
		def Document.parse_stream( source, listener )
			if			source.kind_of? Source
				# do nothing
			elsif		source.kind_of?  IO
				source = IOSource.new(source)
			elsif		source.kind_of? String
				source = Source.new source
			else
				raise "Unknown source type!"
			end

			while not source.empty?
				source.match( /^\s*/um, true )
				word = source.match( /^\s*(<.*?)>/um )
				word = word[1] unless word.nil?
				case word
				when nil
					word = source.match( /\s*(\S+)/um, true )
					return if word.nil?
					raise "data found outside of root element ('#{nw}')" if nw.strip.length > 0
				when Comment::START_RE
					Comment.parse_stream source, listener
				when DocType::START_RE
					DocType.parse_stream source, listener
				when XMLDecl::START_RE
					XMLDecl.parse_stream source, listener
				when Instruction::START_RE
					Instruction.parse_stream source, listener
				else
					Element.parse_stream source, listener
				end
			end
			# Here we need to check for invalid documents.
		end

		# This and parse_stream could have been combined, but separating them
		# improves the speed of REXML
		def parse( source )
			begin 
				while not source.empty?
					source.match( /^\s*/um, true )
					word = source.match( /^(<.*?)>/um )
					word = word[1] unless word.nil?
					case word
					when nil
						word = source.match( /\s*(\S+)/um, true )
						return if word.nil?
						raise ParseException.new( "data found outside of root element (data is '#{word}')", source ) if word[0].strip.length > 0
					when Comment::START_RE
						self.add( Comment.new( source ) )
					when DocType::START_RE
						self.add( DocType.new( source ) )
					when XMLDecl::START_RE
						self.add( XMLDecl.new( source ) )
					when Instruction::START_RE
						self.add( Instruction.new( source ) )
					else
						Element.new( source, self, @context )
					end
				end
				unless @elements.size == 1
					#@children.find_all{|x| puts x if x.kind_of? Element }
					raise "the document does not have exactly one root"
				end
			rescue ParseException
				$!.source = source
				$!.element = self
				raise
			rescue Exception
				old_ex = $!
				raise ParseException.new("unidentified error", source, self, old_ex)
			end
		end
	end
end