1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198
|
setGeneric("simplifyNamespaces",
function(doc, ...)
standardGeneric("simplifyNamespaces"))
setMethod("simplifyNamespaces", "character",
function(doc, ...) {
pdoc = xmlParseDoc(doc, NSCLEAN)
simplifyNamespaces(pdoc, ...)
})
xmlCleanNamespaces =
#
# @eg xmlParse("~/GitWorkingArea/XML/inst/exampleData/redundantNS.xml")
#
# ?Should we write the result to a file if we are given a file?
#
#
function(doc, options = integer(), out = docName(doc), ...)
{
if(is(doc, "XMLInternalDocument"))
doc = saveXML(doc)
options = unique(c(options, NSCLEAN))
newDoc = xmlParse(doc, ..., options = options)
if(is.logical(out))
out = if(out) docName(doc) else character()
if(is.character(out) && length(out))
saveXML(newDoc, out)
else
newDoc
}
setMethod("simplifyNamespaces", "XMLInternalDocument",
function(doc, alreadyCleaned = FALSE, ...) {
# find all the nodes, but discard the root node.
allNodes = getNodeSet(doc, "//node()") # [-1]
root = xmlRoot(doc)
# For each node, get its namespace definitions,
# and then zoom in on the nodes that have namespace definitions.
nsDefs = lapply(allNodes, xmlNamespaceDefinitions, simplify = TRUE)
w = sapply(nsDefs, length) > 0
tmp = structure(unlist(nsDefs[w]), names = sapply(nsDefs[w], names))
d = data.frame(uri = tmp, prefix = names(tmp), stringsAsFactors = FALSE)
multi = unlist(by(d, d$prefix, function(x) if(length(unique(x$uri)) == 1) character() else x$prefix[1]))
if(length(multi))
d = d[ ! (d$prefix %in% multi), ]
# Now we can move these namespace definitions to the top.
#
#
#
by(d, nsDefs,
function(x) {
u = unique(x$prefix)
})
# remove the
sapply(allNodes[w], removeXMLNamespaces)
nsDefs
})
getNodeNamespace =
# Figure out what namespace to use for this node and return a reference to that
# namespace definition object in C (a xmlNsPtr)
function(ns, nsDefs, node, namespace, noNamespace, namespaceDefinitions = NULL, parent = NULL,
suppressNamespaceWarning = FALSE)
{
if(noNamespace)
return(NULL)
if(is.character(namespace) && length(namespace) == 1L && !is.na(namespace) && namespace == "") {
if(length(namespaceDefinitions) == 0)
return(findNamespaceDefinition(node, ""))
}
if((is.list(namespace) || is.character(namespace)) && length(namespace) > 0) {
# a single element with no name so this is the prefix.
if(length(namespace) == 1 && length(names(namespace)) == 0) {
if(namespace %in% namespaceDefinitions) {
i = match(namespace, namespaceDefinitions)
ns = nsPrefix = names(namespaceDefinitions)[i]
} else if(namespace != "") {
ns = nsPrefix = namespace
}
} else {
# we have names and/or more than one element. So these are namespace definitions
if(length(names(namespace)) == 0)
names(namespace) <- rep("", length(namespace))
if(length(namespace) > 1 && !is.na(match(namespace[1], names(namespace)[-1]))) {
if(length(ns))
warning("ignoring first element of namespace and using prefix from node name, ", ns)
else {
ns = namespace[1]
namespace = namespace[-1]
}
}
if(length(namespace) > 1 && sum(names(namespace) == "") > 1)
warning("more than one namespace to use as the default")
nsDefs = lapply(seq(along = namespace),
function(i) {
prefix = names(namespace)[i]
newNamespace(node, namespace[[i]], prefix)
# Don't set the namespace. This is just a definition/declaration for
# this node, but not necessarily the namespace to use for this node.
# We set this below
})
names(nsDefs) = names(namespace)
}
}
# Now handle the prefix for this node.
if(length(ns)) {
i = match(ns, names(nsDefs))
if(is.na(i)) {
if(!is.null(parent))
ns = findNamespaceDefinition(node, ns)
else {
# raiseNsWarning(ns, suppressNamespaceWarning)
# attr(node, "xml:namespace") = ns
# ns = NULL
ns = newNamespace(node, character(), ns)
}
if(!inherits(ns, "XMLNamespaceRef"))
ns <- newNamespace(node, ns, "")
} else
ns <- nsDefs[[i]]
} else {
i = match("", names(nsDefs))
ns = if(is.na(i)) NULL else nsDefs[[i]]
# if now namespace and we have a parent, use its namespace
# if it has a namespace
if(!noNamespace && length(ns) == 0 && length(parent) > 0) {
ns = xmlNamespaceRef(parent)
if(!is.null(ns) && names(as(ns, "character")) != "")
ns = NULL
}
}
ns
}
raiseNsWarning =
function(ns, suppressNamespaceWarning)
{
if(is.character(suppressNamespaceWarning))
f = get(suppressNamespaceWarning, mode = "function")
else if(is.logical(suppressNamespaceWarning)) {
if(!suppressNamespaceWarning)
f = warning
else
return(NULL)
} else
f = function(...) {}
f("cannot find namespace definition for '", ns, "' because the node is not in a document and there are no matching local namespace definitions for this node")
}
fixDummyNS =
function(node, suppressNamespaceWarning = getOption('suppressXMLNamespaceWarning', FALSE))
{
return(NULL)
nodes = getNodeSet(node, "//*[./namespace::*[. = '<dummy>']]", addFinalizer = FALSE)
lapply(nodes, completeDummyNS, suppressNamespaceWarning = suppressNamespaceWarning)
}
completeDummyNS =
function(node, suppressNamespaceWarning = getOption('suppressXMLNamespaceWarning', FALSE))
{
if(is.null(xmlParent(node)))
return(FALSE)
prefix = names(xmlNamespace(node))
ns = findNamespaceDefinition(xmlParent(node), prefix, error = FALSE)
if(is.null(ns))
raiseNsWarning(prefix, suppressNamespaceWarning)
# (if(suppressNamespaceWarning) warning else stop)("can't find namespace definition for prefix ", prefix)
else {
# remove the current namespace definition and kill it.
.Call("R_replaceDummyNS", node, ns, prefix, PACKAGE = "XML")
# setXMLNamespace(node, ns)
}
}
|