File: test-ses.Rout.save

package info (click to toggle)
r-cran-diffobj 0.3.5-1
links: PTS, VCS
area: main
in suites: bookworm, forky, sid, trixie
size: 2,432 kB
sloc: ansic: 455; javascript: 96; sh: 32; makefile: 8
file content (333 lines) | stat: -rwxr-xr-x 12,744 bytes

R version 4.0.4 (2021-02-15) -- "Lost Library Book"
Copyright (C) 2021 The R Foundation for Statistical Computing
Platform: x86_64-apple-darwin17.0 (64-bit)

R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.

R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.

Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.

> NAME <- "ses"
> source(file.path('_helper', 'init.R'))
> 
> # Any tests added here should also be added to the valgrind test file
> 
> # - basic ----------------------------------------------------------------------
> 
> all.equal(ses(letters[1:10], letters[1:10]), character())
[1] TRUE
> all.equal(ses(letters[1:10], LETTERS[1:10]), "1,10c1,10")
[1] TRUE
> all.equal(ses(letters[1:5], LETTERS[1:10]), "1,5c1,10")
[1] TRUE
> all.equal(ses(letters[1:10], LETTERS[1:5]), "1,10c1,5")
[1] TRUE
> all.equal(ses(letters[2:10], letters[1:7]), c("0a1", "7,9d7"))
[1] TRUE
> all.equal(
+   ses(letters[c(1:5, 1:5, 1:5)], c("e", "d", "a", "b", "c")),
+   c("1,4d0", "6,8d1", "10d2", "14,15d5")
+ )
[1] TRUE
> all.equal(
+   ses(c("e", "d", "a", "b", "c"), letters[c(1:5, 1:5, 1:5)]),
+   c("0a1,4", "1a6,8", "2a10", "5a14,15")
+ )
[1] TRUE
> # edit distance = 1
> 
> # - trigger edit distance 1 branches -------------------------------------------
> 
> all.equal(ses("a", c("a", "b")), "1a2")
[1] TRUE
> all.equal(ses(c("a", "b"), "a"), "2d1")
[1] TRUE
> all.equal(ses("c", c("b", "c")), "0a1")
[1] TRUE
> all.equal(ses(c("b", "c"), "c"), "1d0")
[1] TRUE
> 
> all.equal(ses("a", character()), "1d0")
[1] TRUE
> all.equal(ses(character(), "a"), "0a1")
[1] TRUE
> all.equal(ses(character(), character()), character())
[1] TRUE
> 
> ## this is from the atomic tests, haven't dug into why they actually trigger
> ## the desired branches, but it is fairly complex
> set.seed(2)
> w1 <- sample(
+   c(
+   "carrot", "cat", "cake", "eat", "rabbit", "holes", "the", "a", "pasta",
+   "boom", "noon", "sky", "hat", "blah", "paris", "dog", "snake"
+   ), 25, replace=TRUE
+ )
> w4 <- w3 <- w2 <- w1
> w2[sample(seq_along(w1), 5)] <- LETTERS[1:5]
> w3 <- w1[8:15]
> w4 <- c(w1[1:5], toupper(w1[1:5]), w1[6:15], toupper(w1[1:5]))
> 
> all.equal(ses(w1, w4), c("5a6,10", "15,21d19", "23,25c21,25"))
[1] TRUE
> 
> # - longer strings -------------------------------------------------------------
> 
> # A bigger string
> 
> string <- do.call(paste0, expand.grid(LETTERS, LETTERS, LETTERS))
> 
> all.equal(
+   ses(string, c("hello", string[-c(5, 500, 1000)], "goodbye")),
+   c("0a1", "5d5", "500d499", "1000d998", "17576a17575")
+ )
[1] TRUE
> all.equal(
+   ses(c(string[200:500], "hello", string[-(1:400)][-c(5, 500, 1000)]), string),
+   c("0a1,199", "207,306d405", "800a900", "1299a1400")
+ )
[1] TRUE
> 
> # - max diffs ------------------------------------------------------------------
> 
> ses(letters[1:10], LETTERS[1:10], max.diffs=5) # "Exceeded `max.diffs`"
[1] "1,10c1,10"
Warning message:
In diff_myers(args[["a"]], args[["b"]], max.diffs = args[["max.diffs"]],  :
  Exceeded `max.diffs`: 20 vs 5 allowed. Diff is probably suboptimal.
> all.equal(
+   ses(letters[1:10], LETTERS[1:10], max.diffs=5, warn=FALSE),
+   "1,10c1,10"
+ )
[1] TRUE
> all.equal(
+   ses(
+     letters[1:10],
+     c(letters[1], LETTERS[2:5], letters[6:10]), max.diffs=5, warn=FALSE
+   ),
+   "2,5c2,5"
+ )
[1] TRUE
> all.equal(
+   ses(
+     letters[1:10],
+     c(letters[1], LETTERS[2:5], letters[6:8], LETTERS[9], letters[10]),
+     max.diffs=5, warn=FALSE
+   ),
+   c("2,5c2,5", "9c9")
+ )
[1] TRUE
> # - Issue 152 --------------------------------------------------------------
> 
> # h/t @hadley, used to error, now warns
> 
> all.equal(ses(letters[1:4], letters[1:3]), "4d3")
[1] TRUE
> all.equal(ses(letters[1:3], letters[1:4]), "3a4")
[1] TRUE
> ses(1, 2:9, max.diffs = 8)
[1] "1c1,8"
Warning message:
In diff_myers(args[["a"]], args[["b"]], max.diffs = args[["max.diffs"]],  :
  Exceeded `max.diffs`: 9 vs 8 allowed. Diff is probably suboptimal.
> 
> # h/t @gadenbui, data is extracted from palmerpenguins@0.1.0::penguins
> #
> # comparison <- subset(penguins, year == 2007 | flipper_length_mm > 220)
> # test <- subset(penguins, year == 2008)
> # a <- test$bill_length_mm
> # b <- comparison$bill_length_mm
> 
> a <- c(39.6, 40.1, 35, 42, 34.5, 41.4, 39, 40.6, 36.5, 37.6, 35.7, 
+ 41.3, 37.6, 41.1, 36.4, 41.6, 35.5, 41.1, 35.9, 41.8, 33.5, 39.7, 
+ 39.6, 45.8, 35.5, 42.8, 40.9, 37.2, 36.2, 42.1, 34.6, 42.9, 36.7, 
+ 35.1, 37.3, 41.3, 36.3, 36.9, 38.3, 38.9, 35.7, 41.1, 34, 39.6, 
+ 36.2, 40.8, 38.1, 40.3, 33.1, 43.2, 49.1, 48.4, 42.6, 44.4, 44, 
+ 48.7, 42.7, 49.6, 45.3, 49.6, 50.5, 43.6, 45.5, 50.5, 44.9, 45.2, 
+ 46.6, 48.5, 45.1, 50.1, 46.5, 45, 43.8, 45.5, 43.2, 50.4, 45.3, 
+ 46.2, 45.7, 54.3, 45.8, 49.8, 46.2, 49.5, 43.5, 50.7, 47.7, 46.4, 
+ 48.2, 46.5, 46.4, 48.6, 47.5, 51.1, 45.2, 45.2, 50.5, 49.5, 46.4, 
+ 52.8, 40.9, 54.2, 42.5, 51, 49.7, 47.5, 47.6, 52, 46.9, 53.5, 
+ 49, 46.2, 50.9, 45.5)
> b <- c(39.1, 39.5, 40.3, NA, 36.7, 39.3, 38.9, 39.2, 34.1, 42, 37.8, 
+ 37.8, 41.1, 38.6, 34.6, 36.6, 38.7, 42.5, 34.4, 46, 37.8, 37.7, 
+ 35.9, 38.2, 38.8, 35.3, 40.6, 40.5, 37.9, 40.5, 39.5, 37.2, 39.5, 
+ 40.9, 36.4, 39.2, 38.8, 42.2, 37.6, 39.8, 36.5, 40.8, 36, 44.1, 
+ 37, 39.6, 41.1, 37.5, 36, 42.3, 46.1, 50, 48.7, 50, 47.6, 46.5, 
+ 45.4, 46.7, 43.3, 46.8, 40.9, 49, 45.5, 48.4, 45.8, 49.3, 42, 
+ 49.2, 46.2, 48.7, 50.2, 45.1, 46.5, 46.3, 42.9, 46.1, 44.5, 47.8, 
+ 48.2, 50, 47.3, 42.8, 45.1, 59.6, 49.6, 50.5, 50.5, 50.1, 50.4, 
+ 46.2, 54.3, 49.8, 49.5, 50.7, 46.4, 48.2, 48.6, 45.2, 52.5, 50, 
+ 50.8, 52.1, 52.2, 49.5, 50.8, 46.9, 51.1, 55.9, 49.1, 49.8, 51.5, 
+ 55.1, 48.8, 50.4, 46.5, 50, 51.3, 45.4, 52.7, 45.2, 46.1, 51.3, 
+ 46, 51.3, 46.6, 51.7, 47, 52, 45.9, 50.5, 50.3, 58, 46.4, 49.2, 
+ 42.4, 48.5, 43.2, 50.6, 46.7, 52)
> 
> # In <0.3.4: Exceeded buffer for finding fake snake
> ses(a[-c(15:38, 50:90)], b[-c(40:85, 100:125)], max.diffs=80)
[1] "1,3c1,9"     "5,13c11,12"  "15,25c14,48" "26a50"       "28,29d51"   
[6] "31c53,57"    "33c59,60"    "35,42c62,67" "44,49d68"   
Warning message:
In diff_myers(args[["a"]], args[["b"]], max.diffs = args[["max.diffs"]],  :
  Exceeded `max.diffs`: 101 vs 80 allowed. Diff is probably suboptimal.
> 
> # In <0.3.4: Faux Snake Process Failed
> ses(a[-(18:38)], b[-(50:80)], max.diffs=115)
 [1] "1,3c1,9"       "5,7c11,26"     "9,12c28,38"    "13a40,46"     
 [5] "15,54c48,82"   "56,68d83"      "70,74c85,88"   "75a90,98"     
 [9] "77c100,101"    "79,86c103,108" "88,93d109"    
Warning message:
In diff_myers(args[["a"]], args[["b"]], max.diffs = args[["max.diffs"]],  :
  Exceeded `max.diffs`: 182 vs 115 allowed. Diff is probably suboptimal.
> 
> # - issue 157 ------------------------------------------------------------------
> 
> # Arguably could match on 'A' instead of 'X' and be more compact
> a <- c('a', 'b', 'c', 'A', 'X', 'Y', 'Z', 'W')
> b <- c('X', 'C', 'A', 'U', 1, 2, 3)
> ses(a, b, max.diffs=13)
[1] "1,4d0"   "6,8c2,7"
> 
> # segfault (but may have beend debugging code)
> ses(letters[1:2], LETTERS[1:2], max.diffs = 4)
[1] "1,2c1,2"
> 
> # snake overrun
> ses(c("G", "C", "T", "C", "A", "C", "G", "C"), c("T", "G"), max.diffs=2)
[1] "0a1"   "2,8d2"
Warning message:
In diff_myers(args[["a"]], args[["b"]], max.diffs = args[["max.diffs"]],  :
  Exceeded `max.diffs`: 8 vs 2 allowed. Diff is probably suboptimal.
> 
> # effect of max.diffs on compactness (waldo logical comparison)
> ses(c('A','A','A','A','A'), c('B','A','B','A','B'), max.diffs=0)
[1] "1c1" "3c3" "5c5"
Warning message:
In diff_myers(args[["a"]], args[["b"]], max.diffs = args[["max.diffs"]],  :
  Exceeded `max.diffs`: 6 vs 0 allowed. Diff is probably suboptimal.
> ses(c('A','A','A','A','A'), c('B','A','B','A','B'), max.diffs=1)
[1] "0a1"   "2c3"   "4,5c5"
Warning message:
In diff_myers(args[["a"]], args[["b"]], max.diffs = args[["max.diffs"]],  :
  Exceeded `max.diffs`: 6 vs 1 allowed. Diff is probably suboptimal.
> ses(c('A','A','A','A','A'), c('B','A','B','A','B'), max.diffs=2)
[1] "0a1"   "2,4c3" "5a5"  
Warning message:
In diff_myers(args[["a"]], args[["b"]], max.diffs = args[["max.diffs"]],  :
  Exceeded `max.diffs`: 6 vs 2 allowed. Diff is probably suboptimal.
> 
> # back snake all matches before faux snake triggered
> ses_dat(
+   a=c("T", "A", "A", "C", "C", "A"),
+   b=c("A", "G", "A", "A"), max.diffs = 0
+ )
"ses_dat" object (Match: 3, Delete: 3, Insert: 1):
                
D: T       C C  
M:   A   A     A
I:     G        
Warning message:
In diff_myers(args[["a"]], args[["b"]], max.diffs = args[["max.diffs"]],  :
  Exceeded `max.diffs`: 4 vs 0 allowed. Diff is probably suboptimal.
> 
> # - errors ---------------------------------------------------------------------
> 
> try(ses('a', 'b', max.diffs='hello')) # "must be scalar integer"
Error in ses_prep(a = a, b = b, max.diffs = max.diffs, warn = warn) : 
  Argument `max.diffs` must be scalar integer.
> try(ses('a', 'b', warn='hello'))      # "must be TRUE or FALSE"
Error in ses_prep(a = a, b = b, max.diffs = max.diffs, warn = warn) : 
  Argument `warn` must be TRUE or FALSE.
> 
> a <- structure(1, class='diffobj_ogewlhgiadfl2')
> try(ses(a, 1)) # "could not be coerced")
Error in as.character.diffobj_ogewlhgiadfl2(a) : failure2
Error in ses_prep(a = a, b = b, max.diffs = max.diffs, warn = warn) : 
  Argument `a` is not character and could not be coerced to such
> try(ses(1, a)) # "could not be coerced"
Error in as.character.diffobj_ogewlhgiadfl2(b) : failure2
Error in ses_prep(a = a, b = b, max.diffs = max.diffs, warn = warn) : 
  Argument `b` is not character and could not be coerced to such
> 
> # We want to have a test file that fully covers the C code in order to run
> # valgrind with just that one.  We were unable to isolate simple diffs that
> # triggered all the code, but we were able to do it with the below in addition
> # to the above.
> 
> # - Repeat tests for full coverage in SES file ---------------------------------
> # From test.diffStr.R
> # formula display changed
> if(
+   R.Version()$major >= 3 && R.Version()$minor >= "3.1" ||
+   R.Version()$major >= 4) {
+   rdsf1 <- function(x)
+     readRDS(file.path("_helper", "objs", "diffStr", sprintf("%s.rds", x)))
+   all.equal(
+     as.character(
+       diffStr(mdl1, mdl2, extra=list(strict.width="wrap"), line.limit=30)
+     ),
+     rdsf1(500)
+   )
+ }
[1] TRUE
> # from testthat.warnings.R
> 
> A3 <- c("a b c", "d e f A B C D", "g h i", "f")
> B3 <- c("a b c", "xd e f E Q L S", "g h i", "q")
> 
> diffChr(A3, B3, max.diffs=2) # warn: "Exceeded diff"
[33m<[39m [33mA3[39m              [34m>[39m [34mB3[39m            
[36m@@ 1,4 @@       [39m  [36m@@ 1,4 @@       [39m
  [90m[39ma b c[90m[39m             [90m[39ma b c[90m[39m         
[33m<[39m [90m[39m[33md[39m [33me[39m [33mf[39m [33mA[39m [33mB[39m [33mC[39m [33mD[39m[90m[39m   [34m>[39m [90m[39m[34mxd[39m [34me[39m [34mf[39m [34mE[39m [34mQ[39m [34mL[39m [34mS[39m[90m[39m
[33m<[39m [90m[39m[33mg[39m [33mh[39m [33mi[39m[90m[39m           [34m>[39m [90m[39m[34mg[39m [34mh[39m [34mi[39m[90m[39m         
[33m<[39m [90m[39m[33mf[39m[90m[39m               [34m>[39m [90m[39m[34mq[39m[90m[39m             
Warning message:
Exceeded diff limit during diff computation (6 vs. 2 allowed); overall diff is likely not optimal 
> 
> # - ses_dat --------------------------------------------------------------------
> 
> a <- b <- do.call(paste0, expand.grid(LETTERS, LETTERS))
> set.seed(2)
> b <- b[-sample(length(b), 100)]
> a <- a[-sample(length(b), 100)]
> 
> dat <- ses_dat(a, b)
> all.equal(dat[['val']][dat[['op']] != 'Delete'], b)
[1] TRUE
> all.equal(dat[['val']][dat[['op']] != 'Insert'], a)
[1] TRUE
> all.equal(a[dat[['id.a']][!is.na(dat[['id.a']])]], a)
[1] TRUE
> 
> dat2 <- ses_dat(a, b, extra=FALSE)
> all.equal(dat[1:2], dat2)
[1] TRUE
> all.equal(length(dat2), 2L)
[1] TRUE
> 
> try(ses_dat(a, b, extra=NA)) # 'TRUE or FALSE'
Error in ses_dat(a, b, extra = NA) : 
  Argument `extra` must be TRUE or FALSE.
> 
> # - encoding agnostic #144 -----------------------------------------------------
> 
> # h/t @hadley, these are different in string cache, but should compare equal
> # as per ?identical
> x <- c("fa\xE7ile", "fa\ue7ile")
> Encoding(x) <- c("latin1", "UTF-8")
> y <- rev(x)
> all.equal(diffobj::ses(x, y), character())
[1] TRUE
> 
> proc.time()
   user  system elapsed 
  1.135   0.107   1.244