File: compare.R

package info (click to toggle)
r-cran-xml 3.99-0.19-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 3,688 kB
  • sloc: ansic: 6,659; xml: 2,890; asm: 486; sh: 12; makefile: 2
file content (29 lines) | stat: -rw-r--r-- 841 bytes parent folder | download | duplicates (9)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29

# For comparing if two XML documents are "similar" whatever that means.
# We look at the distribution of node names

summary.XMLInternalDocument =
function(object, ...)
{
  counts = sort(table(xpathSApply(object, "//*", xmlName, ...)), decreasing = TRUE)
  list(nameCounts = counts,
       numNodes = sum(counts))
}


compareXMLDocs =
function(a, b, ...)
{
 sa = summary(a, ...)
 sb = summary(b, ...)

 inAOnly = setdiff(names(sa$nameCounts), names(sb$nameCounts))
 inBOnly = setdiff(names(sb$nameCounts), names(sa$nameCounts))

 common.ids = intersect(names(sa$nameCounts), names(sb$nameCounts)) #  != sb$nameCounts[names(sa$nameCounts)
 diffs = sa$nameCounts[common.ids] -  sb$nameCounts[common.ids]
 diffs = diffs[diffs != 0]
 
 list(inA = sa$nameCounts[inAOnly],  inB = sb$nameCounts[inBOnly], countDiffs = diffs)
 #all.equal(sa, sb)
}