File: test-utf8_valid.R

package info (click to toggle)
r-cran-utf8 1.2.6-1
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 2,060 kB
  • sloc: ansic: 25,890; python: 1,616; sh: 13; makefile: 6
file content (54 lines) | stat: -rw-r--r-- 1,522 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
test_that("'as_utf8' errors on latin1 declared to be UTF-8", {
  x <- c("a", "b", "the command of her beauty, and her \xa320,000", "d")
  Encoding(x) <- "UTF-8"

  expect_equal(utf8_valid(x), c(TRUE, TRUE, FALSE, TRUE))
  expect_error(
    as_utf8(x),
    "entry 3 has wrong Encoding; marked as \"UTF-8\" but invalid leading byte (0xA3) at position 36",
    fixed = TRUE
  )
})


test_that("utf8_valid errors on invalid UTF-8", {
  x <- c("a", "b", "c", "d", "\xf8\x88\x80\x80\x80") # intToUtf8(0x00200000)
  Encoding(x) <- "UTF-8"

  expect_equal(utf8_valid(x), c(TRUE, TRUE, TRUE, TRUE, FALSE))
  expect_error(
    as_utf8(x),
    "entry 5 has wrong Encoding; marked as \"UTF-8\" but invalid leading byte (0xF8) at position 1",
    fixed = TRUE
  )
})


test_that("utf8_valid passes on valid UTF-8 in bytes encoding", {
  x <- "hello\u2002"
  Encoding(x) <- "bytes"
  expect_equal(utf8_valid(x), TRUE)

  y <- x
  Encoding(y) <- "UTF-8"
  expect_equal(as_utf8(x), enc2utf8(y))
})


test_that("utf8_valid passes on valid ASCII in unknown encoding", {
  x <- "world"
  expect_equal(utf8_valid(x), TRUE)
  expect_equal(as_utf8(x), enc2utf8(x))
})


test_that("utf8_valid errors on invalid UTF8 in bytes encoding", {
  x <- paste0("hello", "\xfc\x8f\xbf\xbf\xbf\xbf") # intToUtf8(0xfffffff)
  Encoding(x) <- "bytes"
  expect_equal(utf8_valid(x), FALSE)
  expect_error(
    as_utf8(x),
    "entry 1 cannot be converted from \"bytes\" Encoding to \"UTF-8\"; invalid leading byte (0xFC) at position 6",
    fixed = TRUE
  )
})