1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137
|
test_that("download_xml fails if curl is not installed", {
skip("how to test error with `check_installed()`?")
mockery::stub(download_xml, "requireNamespace", function(...) FALSE)
expect_error(
download_xml("http://httpbin.org/xml"),
"`curl` must be installed to use `download_xml\\(\\)`"
)
})
test_that("read_xml errors with an empty document", {
expect_snapshot(error = TRUE, {
read_xml(character())
})
tf <- tempfile()
file.create(tf)
on.exit(unlink(tf))
expect_error(read_xml(tf), "Document is empty")
})
test_that("read_html correctly parses malformed document", {
lego <- read_html(test_path("lego.html.bz2"))
expect_length(xml_find_all(lego, ".//p"), 39)
})
test_that("parse_options errors when given an invalid option", {
expect_error(
parse_options("INVALID", xml_parse_options()),
'`options` "INVALID" is not a valid option'
)
expect_snapshot(
error = TRUE,
read_html(test_path("lego.html.bz2"), options = "INVALID")
)
# Empty inputs returned as 0
expect_identical(0L, parse_options("", xml_parse_options()))
expect_identical(0L, parse_options(NULL, xml_parse_options()))
# Numerics returned as integers
expect_identical(12L, parse_options(12L, xml_parse_options()))
expect_identical(12L, parse_options(12, xml_parse_options()))
# Multiple inputs summed
expect_identical(
3L,
parse_options(c("RECOVER", "NOENT"), xml_parse_options())
)
})
test_that("read_html properly passes parser arguments", {
skip_if_not(libxml2_version() >= "2.9.2")
blanks <- read_html(
xml2_example("cd_catalog.xml"),
options = c("RECOVER", "NOERROR")
)
expect_equal(
sub("\r\n", "\n", fixed = TRUE, as_list(blanks)$html$body$catalog$cd[[1]]),
"\n "
)
no_blanks <- read_html(
xml2_example("cd_catalog.xml"),
options = c("RECOVER", "NOERROR", "NOBLANKS")
)
expect_equal(
as_list(no_blanks)$html$body$catalog$cd[[1]],
list("Empire Burlesque")
)
})
test_that("read_xml works with httr response objects", {
skip("httpbin is unreliable")
x <- read_xml(httr::GET("http://httpbin.org/xml"))
expect_s3_class(x, "xml_document")
expect_length(xml_find_all(x, "//slide"), 2)
})
test_that("read_xml and read_html fail for bad status codes", {
skip("httpbin is unreliable")
expect_error(
read_xml(httr::GET("http://httpbin.org/status/404")),
class = "http_404"
)
expect_error(
read_html(httr::GET("http://httpbin.org/status/404")),
class = "http_404"
)
})
test_that("read_xml works with raw inputs", {
x <- read_xml("<foo/>")
expect_equal(xml_url(x), NA_character_)
})
test_that("read_html works with non-ASCII encodings", {
tmp <- tempfile()
on.exit(unlink(tmp))
writeLines("<html><body>\U2019</body></html>", tmp, useBytes = TRUE)
res <- read_html(tmp, encoding = "UTF-8")
expect_equal(
as.character(res, options = ""),
"<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\" \"http://www.w3.org/TR/REC-html40/loose.dtd\">\n<html><body>\U2019</body></html>\n"
)
})
test_that("read_xml and read_html fail with > 1 input", {
expect_snapshot(error = TRUE, {
read_xml(c("foo", "bar"))
read_html(c("foo", "bar"))
})
})
# Fails in libxml2 2.12
#test_that("Truncated HTML should not error", {
# res <- read_html('<html><head')
# expect_s3_class(res, "xml_document")
#})
test_that("read_xml from a textConnection", {
s <- '<?xml version="1.0" encoding="UTF-8"?>\n<outer>\n<inner>Inner</inner>\n</outer>'
con <- textConnection(s)
x <- read_xml(con)
close(con)
expect_s3_class(x, "xml_document")
})
|