1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127
|
test_that("download_xml fails if curl is not installed", {
skip("how to test error with `check_installed()`?")
mockery::stub(download_xml, "requireNamespace", function(...) FALSE)
expect_error(
download_xml("http://httpbin.org/xml"),
"`curl` must be installed to use `download_xml\\(\\)`"
)
})
test_that("read_xml errors with an empty document", {
expect_snapshot(error = TRUE, {
read_xml(character())
})
tf <- tempfile()
file.create(tf)
on.exit(unlink(tf))
expect_error(read_xml(tf), "Document is empty")
})
test_that("read_html correctly parses malformed document", {
lego <- read_html(test_path("lego.html.bz2"))
expect_length(xml_find_all(lego, ".//p"), 39)
})
test_that("parse_options errors when given an invalid option", {
expect_error(
parse_options("INVALID", xml_parse_options()),
'`options` "INVALID" is not a valid option'
)
expect_snapshot(error = TRUE,
read_html(test_path("lego.html.bz2"), options = "INVALID")
)
# Empty inputs returned as 0
expect_identical(0L, parse_options("", xml_parse_options()))
expect_identical(0L, parse_options(NULL, xml_parse_options()))
# Numerics returned as integers
expect_identical(12L, parse_options(12L, xml_parse_options()))
expect_identical(12L, parse_options(12, xml_parse_options()))
# Multiple inputs summed
expect_identical(3L, parse_options(c("RECOVER", "NOENT"), xml_parse_options()))
})
test_that("read_html properly passes parser arguments", {
skip_if_not(libxml2_version() >= "2.9.2")
blanks <- read_html(xml2_example("cd_catalog.xml"), options = c("RECOVER", "NOERROR"))
expect_equal(
sub("\r\n", "\n", fixed = TRUE, as_list(blanks)$html$body$catalog$cd[[1]]),
"\n "
)
no_blanks <- read_html(xml2_example("cd_catalog.xml"), options = c("RECOVER", "NOERROR", "NOBLANKS"))
expect_equal(
as_list(no_blanks)$html$body$catalog$cd[[1]],
list("Empire Burlesque")
)
})
test_that("read_xml works with httr response objects", {
skip("httpbin is unreliable")
x <- read_xml(httr::GET("http://httpbin.org/xml"))
expect_s3_class(x, "xml_document")
expect_length(xml_find_all(x, "//slide"), 2)
})
test_that("read_xml and read_html fail for bad status codes", {
skip("httpbin is unreliable")
expect_error(
read_xml(httr::GET("http://httpbin.org/status/404")),
class = "http_404"
)
expect_error(
read_html(httr::GET("http://httpbin.org/status/404")),
class = "http_404"
)
})
test_that("read_xml works with raw inputs", {
x <- read_xml("<foo/>")
expect_equal(xml_url(x), NA_character_)
})
test_that("read_html works with non-ASCII encodings", {
tmp <- tempfile()
on.exit(unlink(tmp))
writeLines("<html><body>\U2019</body></html>", tmp, useBytes = TRUE)
res <- read_html(tmp, encoding = "UTF-8")
expect_equal(
as.character(res, options = ""),
"<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\" \"http://www.w3.org/TR/REC-html40/loose.dtd\">\n<html><body>\U2019</body></html>\n"
)
})
test_that("read_xml and read_html fail with > 1 input", {
expect_snapshot(error = TRUE, {
read_xml(c("foo", "bar"))
read_html(c("foo", "bar"))
})
})
# Fails in libxml2 2.12
#test_that("Truncated HTML should not error", {
# res <- read_html('<html><head')
# expect_s3_class(res, "xml_document")
#})
test_that("read_xml from a textConnection", {
s <- '<?xml version="1.0" encoding="UTF-8"?>\n<outer>\n<inner>Inner</inner>\n</outer>'
con <- textConnection(s)
x <- read_xml(con)
close(con)
expect_s3_class(x, "xml_document")
})
|