File: getElements.S

package info (click to toggle)
r-cran-xml 3.99-0.19-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 3,688 kB
  • sloc: ansic: 6,659; xml: 2,890; asm: 486; sh: 12; makefile: 2
file content (43 lines) | stat: -rw-r--r-- 920 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
getElements =
function(elementNames)
{
 els = list()

 startElement = function(node, ...) {

  if(xmlName(node) %in% elementNames)
     els[[length(els) + 1]] <<- node

   node
 }

  list(startElement = startElement, els = function() els)
}

#
h =  getElements("a")
htmlTreeParse("http://www.omegahat.net/RSXML/index.html", handlers = h)

links = sapply(h$els(), function(x) xmlGetAttr(x, "href"))

# Of course, we could do these two steps in one with a handler
# function like
#

getLinks = function() {
  links = character()

  list(a = function(x) links <<- c(links, xmlGetAttr(x, "href")), 
       links = function() links)
}

# h = getLinks()
# htmlTreeParse("http://www.omegahat.net/RSXML/index.html", handlers = h)
# h$links()

# If we want the nodes, we can use getElements. Otherwise, we 
# can use a specialize handler that converts the information on the
# fly into the data structures we want at the end.