File: bondsTables.R

package info (click to toggle)
r-cran-xml 3.99-0.19-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 3,688 kB
  • sloc: ansic: 6,659; xml: 2,890; asm: 486; sh: 12; makefile: 2
file content (30 lines) | stat: -rw-r--r-- 886 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
library(XML)

doc = htmlTreeParse("http://finance.yahoo.com/bonds/composite_bond_rates?bypass=true", useInternalNodes = TRUE)

# Use XPath expression to find the nodes 
#  <div><table class="yfirttbl">..
# as these are the ones we want.

o = getNodeSet(doc, "//div/table[@class='yfirttbl']")

# Write a function that will extract the information out of a given table node.
readHTMLTable =
function(tb)
{
  # get the header information.
  colNames = sapply(tb[["thead"]][["tr"]]["th"], xmlValue)
  vals = sapply(tb[["tbody"]]["tr"],  function(x) sapply(x["td"], xmlValue))
  matrix(as.numeric(vals[-1,]),
             nrow = ncol(vals),
             dimnames = list(vals[1,], colNames[-1]),
             byrow = TRUE
         )
}  


# Now process each of the table nodes in the o list.
tables = lapply(o, readHTMLTable)
names(tables) = lapply(o, function(x) xmlValue(x[["caption"]]))