File: bondsTables.R

package info (click to toggle)

r-cran-xml 3.99-0.19-1

links: PTS, VCS
area: main
in suites: forky, sid
size: 3,688 kB
sloc: ansic: 6,659; xml: 2,890; asm: 486; sh: 12; makefile: 2

file content (30 lines) | stat: -rw-r--r-- 886 bytes

parent folder | download | duplicates (5)

library(XML)

doc = htmlTreeParse("http://finance.yahoo.com/bonds/composite_bond_rates?bypass=true", useInternalNodes = TRUE)

# Use XPath expression to find the nodes 
#  <div><table class="yfirttbl">..
# as these are the ones we want.

o = getNodeSet(doc, "//div/table[@class='yfirttbl']")

# Write a function that will extract the information out of a given table node.
readHTMLTable =
function(tb)
{
  # get the header information.
  colNames = sapply(tb[["thead"]][["tr"]]["th"], xmlValue)
  vals = sapply(tb[["tbody"]]["tr"],  function(x) sapply(x["td"], xmlValue))
  matrix(as.numeric(vals[-1,]),
             nrow = ncol(vals),
             dimnames = list(vals[1,], colNames[-1]),
             byrow = TRUE
         )
}  


# Now process each of the table nodes in the o list.
tables = lapply(o, readHTMLTable)
names(tables) = lapply(o, function(x) xmlValue(x[["caption"]]))