1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127
|
options(sd_num_thread=1L)
# tests against cases that used to segfault when we did not check
# NULL cases.
expect_error(afind("a","b",nthread=1:4))
expect_error(afind("a","b",nthread="foo"))
expect_error(afind("a","b",nthread=integer(0)))
expect_error(afind("a","b",nthread=NULL))
texts = c("When I grow up, I want to be"
, "one of the harversters of the sea"
, "I think before my days are gone"
, "I want to be a fisherman")
patterns = c("fish", "gone","to be")
out <- afind(texts, patterns, method="osa")
location <- matrix(c(
1, 1, 24,
6, 1, 28,
1, 28, 6,
16, 3, 8),
nrow=4, byrow=TRUE)
distance <- matrix(c(
4, 3, 0,
2, 2, 3,
3, 0, 2,
0, 3, 0),
nrow=4, byrow=TRUE)
match <- matrix(c(
"When", "When", "to be",
"f th", "one ", "he se",
"I th", "gone", "nk be",
"fish", "want", "to be"),
nrow=4, byrow=TRUE)
expect_equal(out$location, location)
expect_equal(out$distance, distance)
expect_equal(out$match, match)
# test paralellization
out1 <- afind(texts, patterns, method="osa", nthread=2L)
expect_identical(out, out1)
# test 'value' option
out2 <- afind(texts, patterns, value=FALSE)
expect_equal(length(out2), 2)
# test grep/grepl equivalents 'grab', 'grabl'
expect_equal(grab(texts, "harvester", maxDist=2), 2)
expect_equal(grab(texts, "harvester", value=TRUE, maxDist=2), "harverste")
expect_equal(grabl(texts, "harvester", maxDist=2)
, c(FALSE,TRUE,FALSE,FALSE))
expect_equal(extract(texts, "harvester", maxDist=2)
, matrix(c(NA, "harverste",NA,NA),nrow=4) )
## Test running_cosine
pattern <- c("phish", "want to")
expect_identical(
afind(texts, pattern, method="cosine", q=3)
, afind(texts, pattern, method="running_cosine", q=3)
)
## test whether the correct positions are returned for all methods.
methods = names(stringdist:::METHODS)
methods = methods[!methods %in% c("soundex","hamming")]
text <- "If you squeeze my lizzard, I put my snake on you."
pattern <- "lizard"
for ( method in methods ){
expect_equal(afind(text, pattern, method=method, q=3, p=0.1)$location[1,1], 19, info=method)
}
## test the usual edge cases
# notice: window size = 0.
expect_equal(afind("foo","")$distance[1], 0)
expect_equal(afind("foo",NA)$distance[1], NA_real_)
expect_equal(afind("foo",NA)$location[1], NA_integer_)
expect_equal(afind("foo",NA)$match[1], NA_character_)
expect_equal(afind(NA,"foo")$distance[1], NA_real_)
expect_equal(afind(NA,"foo")$location[1], NA_integer_)
expect_equal(afind(NA,"foo")$match[1], NA_character_)
expect_equal(afind("","foo")$distance[1], 3)
expect_equal(afind("","foo")$location[1], 1)
expect_equal(afind("","foo")$match[1], "")
expect_equal(grab("foo", ""), 1L)
expect_equal(grabl("foo",""), TRUE)
expect_equal(grab("foo",NA), integer(0))
# note that 'grepl' gives FALSE in this case (which is inconsistent with
# grepl(NA, NA), grepl(NA, "foo").
expect_equal(grabl("foo",NA), NA)
|