1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796
|
require "rexml/parent"
require "rexml/namespace"
require "rexml/attribute"
require "rexml/cdata"
require "rexml/xpath"
require "rexml/parseexception"
module REXML
# Represents a tagged XML element. Elements are characterized by
# having children, attributes, and names, and can themselves be
# children.
class Element < Parent
include Namespace
UNDEFINED = "UNDEFINED"; # The default name
# Mechanisms for accessing attributes and child elements of this
# element.
attr_reader :attributes, :elements
attr_accessor :context
# Constructor
# @param parent if supplied, must be a Parent, and will be used as
# the parent of this object.
# @param arg if not supplied, will be set to the default value.
# If a String, the name of this object will be set to the argument.
# If an Element, the object will be shallowly cloned; name, attributes,
# and namespaces will be copied.
# If a Source, the source will be scanned and parsed for an Element,
# and all child elements will be recursively parsed as well.
def initialize( arg = UNDEFINED, parent=nil, context=nil )
super(parent)
@elements = Elements.new self
@attributes = Attributes.new self
@context = context
if arg.kind_of? Source
parse arg
elsif arg.kind_of? String
self.name = arg
elsif arg.kind_of? Element
self.name = arg.expanded_name
arg.attributes.each_attribute{ |attribute|
@attributes << Attribute.new( attribute )
}
@context = arg.context
end
end
def clone
Element.new self
end
# @return the root of the document that this element belongs to.
# If this element doesn't belong to a document, but does belong to
# another Element, the parent's root will be returned, until the
# earliest ancestor is found.
def root
parent.nil? ? self : parent.root
end
def whitespace
@whitespace = (@context and @context[:respect_whitespace] and
(@context[:respect_whitespace] == :all or
@context[:respect_whitespace].include? expanded_name)
)
@whitespace
end
def raw
@raw = (@context and @context[:raw] and
(@context[:raw] == :all or
@context[:raw].include? expanded_name))
@raw
end
once :whitespace, :raw
#################################################
# Namespaces #
#################################################
# @return an Array of all defined namespaces
def prefixes
prefixes = []
prefixes = parent.prefixes if parent
prefixes += attributes.prefixes
return prefixes
end
# @return the URI for a namespace, or the empty string if no such
# namespace is declared
def namespace prefix=nil
if prefix.nil?
prefix = prefix()
end
prefix = "xmlns" if prefix == ""
ns = attributes[ prefix ]
ns = parent.namespace(prefix) if ns.nil? and parent
return ns
end
# Adds a namespace to this element
# @param prefix the namespace. If the second argument is not supplied,
# this should be the URI of the default namespace.
# @param value (optional) the URI of the namespace. If "", then unsets
# the default namespace
def add_namespace( prefix, uri=nil )
unless uri
@attributes["xmlns"] = prefix
else
@attributes[prefix, "xmlns"] = uri
end
end
# NEEDS DOCUMENTATION
def delete_namespace namespace="xmlns"
attribute = attributes[name, namespace]
attribute.remove unless attribute.nil?
end
#################################################
# Elements #
#################################################
# Adds a child to this element, optionally setting attributes in
# the element.
# @param element optional. If Element, the element is added.
# Otherwise, a new Element is constructed with the argument (see
# Element.initialize).
# @param attrs If supplied, must be a hash of String name,value
# pairs, which will be used to set the attributes of the new Element.
# @return the Element that was added
def add_element element=nil, attrs=nil
el = @elements.add element
attrs.each_pair do |key, value|
el.attributes[key]=value
end if attrs.kind_of? Hash
el
end
# Deletes a child element.
# @param element Must be an Element, String, or Integer. If Element,
# the element is removed. If String, the element is found (via XPath)
# and removed. NOTE that this means that any parent can remove any
# descendant. If Integer, the Element indexed by that number will be
# removed.
# @return the element that was removed.
def delete_element element
@elements.delete element
end
# @return true if this element has at least one child Element
def has_elements?
!@elements.empty?
end
# Iterates through the children, yielding for each Element that
# has a particular attribute set.
# @param key the name of the attribute to search for
# @param value the value of the attribute
# @param max (optional) causes this method to return after yielding
# for this number of matching children
# @param name (optional) if supplied, this is an XPath that filters
# the children to check.
def each_element_with_attribute( key, value, max=0, name=nil, &block )
each_with_something( proc {|child|
child.attributes[key]==value
}, max, name, &block )
end
# Iterates through the children, yielding for each Element that
# has a particular text set.
# @param key the name of the attribute to search for
# @param value the value of the attribute
# @param max (optional) causes this method to return after yielding
# for this number of matching children
# @param name (optional) if supplied, this is an XPath that filters
# the children to check.
# @see text
def each_element_with_text( text, max=0, name=nil, &block )
each_with_something( proc {|child| child.text == text},
max, name, &block )
end
# Synonym for elements.each
def each_element( xpath=nil, &block )
@elements.each( xpath, &block )
end
# This is a little slower than calling elements.each directly.
# @return an array of Elements that match the supplied path
# @param xpath any XPath by which to search for elements in the tree
def get_elements( xpath )
rv = []
@elements.each( xpath ) {|child| rv<<child }
rv
end
# Returns the next sibling that is an element, or nil if there is
# no Element sibling after this one
def next_element
element = next_sibling
element = element.next_sibling until element.nil? or element.kind_of? Element
return element
end
# Returns the previous sibling that is an element, or nil if there is
# no Element sibling prior to this one
def previous_element
element = previous_sibling
element = element.previous_sibling until element.nil? or element.kind_of? Element
return element
end
#################################################
# Text #
#################################################
# @return true if this element has at least one Text child
def has_text?
not text().nil?
end
# A convenience method which returns the first child text element.
# NOTE that an element may have multiple Text elements, perhaps
# separated by other children; consider:
# "<p>some text <b>this is bold!</b> more text<p>"
# The element <p> has two text elements, "some text " and " more text".
# This method would return only the first, "some text ".
# @return the String content of the first child text element
# encountered.
def text( path = nil )
rv = get_text path
return rv.to_s unless rv.nil?
nil
end
# This is the same method as text(), only this method returns the
# actual Text object, rather than the String content.
# @return the first Text child encountered.
def get_text path = nil
rv = nil
if path
element = @elements[ path ]
rv = element.get_text unless element.nil?
else
rv = find { |node| node.kind_of? Text }
end
return rv
end
# Sets the first Text child of this object. See text() for a
# discussion about Text children.
# If a Text child already exists, the child is replaced by this
# content. This means that Text content can be deleted by calling
# this method with a nil argument. In this case, the next Text
# child becomes the first Text child. In no case is the order of
# any siblings disturbed.
# @param text If a String, a new Text child is created and added to
# this Element as the first Text child. If Text, the text is set
# as the first Child element. If nil, then any existing first Text
# child is removed.
# @return self. NOTE that contrary to most REXML methods, the replaced
# content is not returned.
def text=( text )
text = Text.new( text, whitespace(), raw() ) if text.kind_of? String
old_text = get_text
if text.nil?
old_text.remove unless old_text.nil?
else
if old_text.nil?
self << text
else
old_text.replace_with( text )
end
end
return self
end
# A helper method to add a Text child. Actual Text instances can
# be added with regular Parent methods, such as add() and <<()
# @param text if a String, a new Text instance is created and added
# to the parent. If Text, the object is added directly.
# @return self. NOTE that contrary to most REXML methods, the object
# added to the parent is not returned.
def add_text( text )
text = Text.new( text, whitespace(), raw() ) if text.kind_of? String
self << text unless text.nil?
return self
end
#################################################
# Attributes #
#################################################
# @return true if this element has any attributes set, false
# otherwise.
def has_attributes?
return !@attributes.empty?
end
# Adds an attribute to this element, overwriting any existing attribute
# by the same name.
# @param key can be either an Attribute or a String. If an Attribute,
# the attribute is added to the list of Element attributes. If String,
# the argument is used as the name of the new attribute, and the value
# parameter must be supplied.
# @param value not required, and is ignored if the first argument is
# an Attribute. Otherwise, this is a String, and is used as the value
# of the new Attribute.
# @return the Attribute added
def add_attribute( key, value=nil )
if key.kind_of? Attribute
@attributes << key
else
@attributes[key] = value
end
end
# Add multiple attributes to this element. EG:
# el.add_attributes( {"name1"=>"value1", "name2"=>"value2"} )
# el.add_attributes( [ ["name1","value1"], ["name2"=>"value2"] ] )
# @p hash is either a hash, or array of arrays
def add_attributes hash
if hash.kind_of? Hash
hash.each_pair {|key, value| @attributes[key] = value }
elsif hash.kind_of? Array
hash.each { |value| @attributes[ value[0] ] = value[1] }
end
end
# Removes an attribute
# @param key either an Attribute or a String. In either case, the
# attribute is found by matching the attribute name to the argument,
# and then removed. If no attribute is found, no action is taken.
# @return the attribute removed, or nil if this Element did not contain
# a matching attribute
def delete_attribute(key)
attr = @attributes.get_attribute(key)
attr.remove unless attr.nil?
end
# Writes out this element, and recursively, all children.
# @param writer A String or IO (or any object supporting <<(String))
# @param indent if supplied, must be an Integer, which is then used
# to determine the indentation of this element and its children.
def write(writer, indent=0)
indent writer, indent
writer << "<#@expanded_name"
@attributes.each_attribute do |attr|
writer << " "
attr.write( writer, indent )
end unless @attributes.empty?
if @children.empty?
writer << "/"
else
writer << ">"
write_children writer, indent
writer << "</#{expanded_name}"
end
writer << ">"
end
def Element.parse_stream source, listener
begin
# Get the next tag
name, closed, attrs = base_parser(source)
attrs.each { |ar| ar[1,1] = [] }
listener.tag_start name, attrs
unless closed
while true
md = source.match(/\A(\s*[^>]*>)/um)
raise ParseException.new("malformed XML: bad content", source,
self) unless md
line = md[1]
return if line.nil?
case line
when /\A<\w/um
Element.parse_stream source, listener
when /\A<\//um
break
when CData::START_RE
CData.parse_stream source, listener
when Comment::START_RE
Comment.parse_stream source, listener
when Instruction::START_RE
Instruction.parse_stream source, listener
else
Text.parse_stream source, listener
end
end
if source.match(/^\s*<\/#{name}\s*>/um, true)
listener.tag_end name
else
raise ParseException.new( "Missing end tag for #{name}",
source, self )
end
else
listener.tag_end name
end
rescue ParseException
$!.source = source
$!.element = self
raise
rescue Exception
raise
old_ex = $!
raise ParseException.new("unidentified error", source, self, old_ex)
end
end
private
# A private helper method
def each_with_something( test, max=0, name=nil )
num = 0
child=nil
@elements.each( name ){ |child|
yield child if test.call(child) and num += 1
return if max>0 and num == max
}
end
# A helper method, to consolidate the common code between parse and
# parse_stream.
# @return an array with [tag_name_S, closed_B, attributes_A]
def Element.base_parser source
# Get the next tag
md = source.match(/^<((?:[:\w_][-\.\w_]*:)?[-!\*\.\w_]*)\s*((((["']).*?\5)|[^\/'">]*)*?)(\/)?>/um, true)
raise ParseException.new("malformed XML: missing tag start", source,
self) unless md
attrs = []
if md[2]
attrs = md[2].scan( Attribute::PATTERN )
raise ParseException.new( "error parsing attributes: [#{attrs.join ', '}], excess = \"#$'\"", source, self ) if $' and $'.strip.size > 0
end
return [md[1], md[6], attrs]
end
def parse source
begin
# Get the next tag
self.name, closed, attrs = Element.base_parser(source)
attrs.each do |set|
@attributes.add( Attribute.new( set[0], set[2] ) )
end
unless closed
parse_children source
raise ParseException.new( "Missing end tag for #{expanded_name}",
source, self) unless source.match(/^\s*<\/#@expanded_name\s*>/um,
true)
end
rescue ParseException
$!.source = source
$!.element = self
raise
rescue Exception
old_ex = $!
raise ParseException.new("unidentified error", source, self, old_ex)
end
end
# A private helper method
def parse_children source
while true
md = source.match(/\A(\s*[^>]*>)/um)
raise ParseException.new( "missing tag start", source, self ) unless md
case md[1]
when nil
return
when /\A<[\w_:]/um
Element.new(source, self, @context)
when /\A<\//um
return
when CData::START_RE
add(CData.new(source))
when Comment::START_RE
add(Comment.new(source))
when Instruction::START_RE
add(Instruction.new(source))
else
add(Text.new(source, whitespace(), raw()))
end
end
end
# A private helper method
def write_children( writer, indent )
cr = indent < 0 ? "" : "\n"
#if size == 1 and @children[0].kind_of?(Text)
# self[0].write( writer, -1 )
if indent == -1
each { |child| child.write( writer, indent ) }
else
next_indent = indent+2
each { |child|
writer << cr
child.write( writer, next_indent )
}
writer << cr
indent( writer, indent )
end
end
end
########################################################################
# ELEMENTS #
########################################################################
# A class which provides filtering of children for Elements, and
# XPath search support.
class Elements
include Enumerable
# Constructor
# @param parent the parent Element
def initialize parent
@element = parent
end
# Fetches a child element
# @param index the search parameter. This is either an Integer, which
# will be used to find the index'th child Element, or an XPath,
# which will be used to search for the Element. NOTE that because
# of the nature of XPath searches, any element in the connected XML
# document can be fetched through any other element.
# @param name optional, and only used in the first argument is an
# Integer. In that case, the index'th child Element that has the
# supplied name will be returned. Note that the indexes start at 1.
# @return the first matching Element, or nil if no child matched
def []( index, name=nil)
if index.kind_of? Integer
raise "index (#{index}) must be >= 1" if index < 1
literal, name = literalize name
num = 0
child = nil
@element.find { |child|
child.kind_of? Element and
(name.nil? ? true : child.has_name?( name, literal )) and
(num += 1) == index
}
else
XPath::each(@element, index ) { |element| return element }
return nil
end
end
# Sets an element, replacing any previous matching element. If no
# existing element is found ,the element is added.
# @param index is used to find a matching element to replace. See []().
# @param element the element to replace the existing element with
# @return the previous element; nil if no previous element was found.
def []=( index, element )
previous = self[index]
if previous.nil?
@element.add element
else
previous.replace_with element
end
return previous
end
# @return true if there are no Element children, false otherwise
def empty?
@element.find{ |child| child.kind_of? Element}.nil?
end
# @return the index of the supplied child (starting at 1), or -1 if
# the element is not a child
# @param element a child of this Element
def index element
rv = 0
found = @element.find do |child|
child.kind_of? Element and
(rv += 1) and
child == element
end
return rv if found == element
return -1
end
# Deletes a child Element
# @param element Either an Element, which is removed directly; an
# xpath, where the first matching child is removed; or an Integer,
# where the n'th Element is removed.
# @return the removed child
def delete element
if element.kind_of? Element
@element.delete element
else
el = self[element]
el.remove if el
end
end
# Removes multiple elements
# @param xpath all elements matching this String path are removed.
# @return an Array of Elements that have been removed
def delete_all( xpath )
rv = []
XPath::each( @element, xpath) {|element| rv << element }
rv.each do |element|
@element.delete element
element.remove
end
return rv
end
# Adds an element
# @param element if supplied, is either an Element, String, or
# Source (see Element.initialize). If not supplied or nil, a
# new, default Element will be constructed
# @return the added Element
def add element=nil
if element.nil?
Element.new "", self, @element.context
elsif not element.kind_of?(Element)
Element.new element, self, @element.context
else
@element << element
element.context = @element.context
end
end
alias :<< :add
# Iterates through all of the child Elements, optionally filtering
# them by a given XPath
# Only supply the first parameter (and a block). The second
# parameter is used internally for recursion.
# @param xpath optional. If supplied, this is a String XPath,
# and is used to filter the children, so that only matching children
# are yielded
def each( xpath=nil, &block)
XPath::each( @element, xpath, &block )
end
def size
count = 0
@element.each {|child| count+=1 if child.kind_of? Element }
count
end
def to_a( xpath=nil )
rv = []
each( xpath ) { |el| rv << el }
rv
end
private
# A private helper method
def literalize name
literal = !(name =~ /^'.*'$/u).nil?
name = name[1..-2] if literal and name
[literal, name]
end
end
########################################################################
# ATTRIBUTES #
########################################################################
# A class that holds the Attributes of an Element.
class Attributes < Hash
# Constructor
# @param element the Element of which this is an Attribute
def initialize element
@element = element
end
# Fetches an attribute value. If you want to get the Attribute itself,
# use get_attribute()
# @param name an XPath attribute name
# @return A String, which is the value of the first matching attribute,
# or nil if there was none
def [](name)
attr = get_attribute(name)
return attr.value unless attr.nil?
return nil
end
def each_attribute
each_value do |val|
if val.kind_of? Attribute
yield val
else
val.each_value { |atr| yield atr }
end
end
end
# Fetches an attribute
# @param name the XPath by which to search for the attribute
# @return The first matching Attribute, or nil if there was none
def get_attribute( name )
attr = fetch( name, nil )
if attr.nil?
return nil if name.nil?
# Look for prefix
if name.include? ':'
name =~ /(\w+):(\w+)/u
prefix, name = $1, $2
attr = fetch( name, nil )
# check prefix
return nil if attr == nil
if attr.kind_of? Attribute
return attr if uri == attr.prefix
else
attr = attr[ prefix ]
return attr
end
end
return nil
end
if attr.kind_of? Hash
attr = attr[ @element.prefix ]
end
return attr
end
# Sets an attribute, overwriting any existing attribute value by the
# same name
# @param name the name of the attribute
# @param value (optional) If supplied, the value of the attribute. If
# nil, any existing matching attribute is deleted.
# @return self NOTE that unlike most REXML methods, this does not return
# the set Attribute.
def []=( name, value = nil )
value = Attribute.new(name, value) unless value.kind_of? Attribute
value.element = @element
old_attr = get_attribute name
if old_attr.nil?
store(value.name, value)
elsif old_attr.kind_of? Hash
old_attr[value.prefix] = value
elsif old_attr.prefix != value.prefix
hash = { old_attr.prefix => old_attr,
value.prefix => value }
store value.name, hash
else
store value.name, value
end
return @element
end
def prefixes
ns = []
each_attribute do |attribute|
ns << attribute.name if attribute.prefix == "xmlns"
end
ns
end
# Removes an attribute
# @return the removed attribute
def delete( attribute )
attr = super
attr.remove unless attr.nil?
attribute
end
# Adds an attribute, overriding any existing attribute by the
# same name.
# @param attribute must be an Attribute
def add( attribute )
self[attribute.name] = attribute
end
alias :<< :add
# DO NOT USE THIS METHOD! It WILL fail, and may be removed. If you
# absolutely need this method, write the author.
# Deletes all attributes matching an xpath
# @param xpath A String; all attributes that match this path will
# be removed
# @return an Array of the Attributes that were removed
def delete_all( xpath )
rv = []
each_attribute_pair(xpath) { |name,attribute| rv << attribute }
rv.each{ |attr| attr.remove }
return rv
end
private
# Private helper class.
def literalize name
literal = !(name =~ /^'.*'$/u).nil?
name = name[1..-2] if literal
[literal, name]
end
end
end
|