File: htmlLists.R

package info (click to toggle)
r-cran-xml 3.99-0.19-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 3,688 kB
  • sloc: ansic: 6,659; xml: 2,890; asm: 486; sh: 12; makefile: 2
file content (56 lines) | stat: -rw-r--r-- 1,744 bytes parent folder | download | duplicates (7)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
setGeneric("readHTMLList",
          function(doc, 
                    trim = TRUE, elFun = xmlValue,
                      which = integer(), ...)
             standardGeneric("readHTMLList"))


setMethod("readHTMLList",
           "character",
          function(doc, 
                    trim = TRUE, elFun = xmlValue,
                     which = integer(), encoding = character(), ...) {
             readHTMLList(htmlParse(doc, encoding = encoding), trim, elFun, which, ...)
          })


setMethod("readHTMLList",
           "HTMLInternalDocument",
          function(doc, 
                    trim = TRUE, elFun = xmlValue,
                     which = integer(), ...) {
            lists = getNodeSet(doc, "//ol | //ul | //dl")
            if(length(which))
               lists = lists[which]
            ans = lapply(lists, readHTMLList, trim = trim, elFun = elFun)
            if(length(which) == 1)
              ans[[1]]
            else
              ans
          })

setMethod("readHTMLList",
           "XMLInternalNode",
          function(doc, 
                    trim = TRUE, elFun = xmlValue,
                     which = integer(), ...) {

            if(xmlName(doc) == "dl")
                return(readHTMLDefinitionList(doc, trim, elFun))

            
            ans = unname(sapply(xmlChildren(doc)[!xmlSApply(doc, is, "XMLInternalTextNode")], elFun))

            if(trim) 
              ans = unname(sapply(ans, function(x) if(is.character(x)) trim(x) else x))

            ans
          })

readHTMLDefinitionList =
function(node, trim = TRUE, elFun = xmlValue)
{
  kids = xmlChildren(node)
  structure(sapply(kids[names(node) == "dd"], elFun),
            names = sapply(kids[names(node) == "dt"], elFun))
}