1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49
|
"""
Demonstrates some advanced DOM manipulation.
This function looks for simple XLinks and replaces the node containing
such links with the contents of the referenced document.
"""
from xml.dom import Node
from xml.dom.NodeFilter import NodeFilter
from xml.dom import ext
from xml.dom.ext.reader import PyExpat
def XllReplace(start_node):
reader = PyExpat.Reader()
owner_doc = start_node.ownerDocument
snit = owner_doc.createNodeIterator(start_node, NodeFilter.SHOW_ELEMENT, None, 0)
curr_node = snit.nextNode()
while curr_node:
#Only empty nodes are allowed to have Links
if not curr_node.childNodes.length and curr_node.attributes:
is_link = 0
href = None
for k in curr_node.attributes.keys():
if (curr_node.attributes[k].localName, curr_node.attributes[k].namespaceURI) == ("link", "http://www.w3.org/XML/XLink/0.9"):
is_link = 1
elif (curr_node.attributes[k].localName, curr_node.attributes[k].namespaceURI) == ("href", "http://www.w3.org/XML/XLink/0.9"):
href = curr_node.attributes[k].value
if is_link and href:
#Then make a tree of the new file and insert it
f = open(href, "r")
st = f.read()
new_df = reader.fromString(st, ownerDoc=start_node.ownerDocument)
#Get the first element node and assume it's the document node
for a_node in new_df.childNodes:
if a_node.nodeType == Node.ELEMENT_NODE:
doc_root = a_node
break
curr_node.parentNode.replaceChild(doc_root, curr_node)
curr_node = snit.nextNode()
return start_node
if __name__ == "__main__":
import sys
reader = PyExpat.Reader()
xml_dom_tree = reader.fromUri(sys.argv[1])
XllReplace(xml_dom_tree)
ext.PrettyPrint(xml_dom_tree)
reader.releaseNode(xml_dom_tree)
|