1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224
|
require "rexml/element"
require "rexml/xmldecl"
require "rexml/source"
require "rexml/comment"
require "rexml/doctype"
require "rexml/instruction"
require "rexml/rexml"
require "rexml/parseexception"
module REXML
# Represents a full XML document, including PIs, a doctype, etc. A
# Document has a single child that can be accessed by root().
# Note that if you want to have an XML declaration written for a document
# you create, you must add one; REXML documents do not write a default
# declaration for you. See |DECLARATION| and |write|.
class Document < Element
# A convenient default XML declaration. If you want an XML declaration,
# the easiest way to add one is mydoc << Document::DECLARATION
DECLARATION = XMLDecl.new( "1.0", "UTF-8" )
# Constructor
# @param source if supplied, must be a Document, String, or IO.
# Documents have their context and Element attributes cloned.
# Strings are expected to be valid XML documents. IOs are expected
# to be sources of valid XML documents.
# @param context if supplied, contains the context of the document;
# this should be a Hash.
# NOTE that I'm not sure what the context is for; I cloned it out of
# the Electric XML API (in which it also seems to do nothing), and it
# is now legacy. It may do something, someday... it may disappear.
def initialize( source = nil, context = {} )
super()
@context = context
return if source.nil?
if source.kind_of? Source
parse( source )
elsif source.kind_of? Document
super source
@context = source.context
else
parse( SourceFactory.create_from(source) )
end
end
# Should be obvious
def clone
Document.new self
end
# We override this, because XMLDecls and DocTypes must go at the start
# of the document
def add( child )
if child.kind_of? XMLDecl
@children.unshift child
elsif child.kind_of? DocType
if @children[0].kind_of? XMLDecl
@children[1,0] = child
else
@children.unshift child
end
else
rv = super
raise "attempted adding second root element to document" if @elements.size > 1
rv
end
end
alias :<< :add
def add_element(arg=nil, arg2=nil)
rv = super
raise "attempted adding second root element to document" if @elements.size > 1
rv
end
# @return the root Element of the document, or nil if this document
# has no children.
def root
@children.find { |item| item.kind_of? Element }
end
# @return the DocType child of the document, if one exists,
# and nil otherwise.
def doc_type
@children.find { |item| item.kind_of? DocType }
end
# @return the XMLDecl of this document; if no XMLDecl has been
# set, the default declaration is returned.
def xml_decl
rv = @children.find { |item| item.kind_of? XMLDecl }
rv = DECLARATION if rv.nil?
rv
end
# @return the XMLDecl version of this document as a String.
# If no XMLDecl has been set, returns the default version.
def version
decl = xml_decl()
decl.nil? ? XMLDecl.DEFAULT_VERSION : decl.version
end
# @return the XMLDecl encoding of this document as a String.
# If no XMLDecl has been set, returns the default encoding.
def encoding
decl = xml_decl()
decl.nil? ? XMLDecl.DEFAULT_ENCODING : decl.encoding
end
# @return the XMLDecl standalone value of this document as a String.
# If no XMLDecl has been set, returns the default setting.
def stand_alone?
decl = xml_decl()
decl.nil? ? XMLDecl.DEFAULT_STANDALONE : decl.stand_alone?
end
# Write the XML tree out, optionally with indent. This writes out the
# entire XML document, including XML declarations, doctype declarations,
# and processing instructions (if any are given).
# A controversial point is whether Document should always write the XML
# declaration (<?xml version='1.0'?>) whether or not one is given by the
# user (or source document). REXML does not write one if one was not
# specified, because it adds unneccessary bandwidth to applications such
# as XML-RPC.
# @param output an object which supports '<< string'; this is where the
# document will be written
# @param indent (optional) if given, the starting indent for the lines
# in the document.
def write( output, indent=0 )
@children.each { |node|
node.write( output, indent )
output << "\n" unless node == @children[-1]
}
end
# Stream parser. The source will be parsed as a Stream.
# If a block is supplied, yield will be called for tag starts, ends,
# and text. If a listener is supplied, the listener will also be
# notified, by calling the appropriate methods on events.
# The arguments to the block will be:
# IF TAG START: "tag name", { attributes } (possibly empty)
# IF TEXT: "text"
# IF TAG END: "/tag name"
# The listener must supply the following methods:
# tag_start( "name", { attributes } )
# tag_end( "name" )
# text( "text" )
# instruction( "name", "instruction" )
# comment( "comment" )
# doctype( "name", *contents )
def Document.parse_stream( source, listener )
if source.kind_of? Source
# do nothing
elsif source.kind_of? IO
source = IOSource.new(source)
elsif source.kind_of? String
source = Source.new source
else
raise "Unknown source type!"
end
while not source.empty?
source.match( /^\s*/um, true )
word = source.match( /^\s*(<.*?)>/um )
word = word[1] unless word.nil?
case word
when nil
word = source.match( /\s*(\S+)/um, true )
return if word.nil?
raise "data found outside of root element ('#{nw}')" if nw.strip.length > 0
when Comment::START_RE
Comment.parse_stream source, listener
when DocType::START_RE
DocType.parse_stream source, listener
when XMLDecl::START_RE
XMLDecl.parse_stream source, listener
when Instruction::START_RE
Instruction.parse_stream source, listener
else
Element.parse_stream source, listener
end
end
# Here we need to check for invalid documents.
end
# This and parse_stream could have been combined, but separating them
# improves the speed of REXML
def parse( source )
begin
while not source.empty?
source.match( /^\s*/um, true )
word = source.match( /^(<.*?)>/um )
word = word[1] unless word.nil?
case word
when nil
word = source.match( /\s*(\S+)/um, true )
return if word.nil?
raise ParseException.new( "data found outside of root element (data is '#{word}')", source ) if word[0].strip.length > 0
when Comment::START_RE
self.add( Comment.new( source ) )
when DocType::START_RE
self.add( DocType.new( source ) )
when XMLDecl::START_RE
self.add( XMLDecl.new( source ) )
when Instruction::START_RE
self.add( Instruction.new( source ) )
else
Element.new( source, self, @context )
end
end
unless @elements.size == 1
#@children.find_all{|x| puts x if x.kind_of? Element }
raise "the document does not have exactly one root"
end
rescue ParseException
$!.source = source
$!.element = self
raise
rescue Exception
old_ex = $!
raise ParseException.new("unidentified error", source, self, old_ex)
end
end
end
end
|