File: nafill.Rraw

package info (click to toggle)
r-cran-data.table 1.14.8%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 15,936 kB
  • sloc: ansic: 15,680; sh: 100; makefile: 6
file content (211 lines) | stat: -rw-r--r-- 13,802 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
require(methods)
if (exists("test.data.table", .GlobalEnv, inherits=FALSE)) {
  if ((tt<-compiler::enableJIT(-1))>0)
    cat("This is dev mode and JIT is enabled (level ", tt, ") so there will be a brief pause around the first test.\n", sep="")
} else {
  require(data.table)
  test = data.table:::test
  INT = data.table:::INT
  colnamesInt = data.table:::colnamesInt
  coerceFill = data.table:::coerceFill
                                        # masked by which package?
                                        # =================================
  copy = data.table::copy               # bit64;  copy is used in this file, so this line is needed
  setattr = data.table::setattr         # bit  ;  setattr does not appear in this file, so not needed. Here in case that changes.
                                        # use of copy and setattr within data.table's own code is not masked by other packages
                                        # we only have to do this in test files because, like a user would, these test files run like a user
}

sugg = c(
  "bit64",
  "nanotime"
)
for (s in sugg) {
  assign(paste0("test_",s), loaded<-suppressWarnings(suppressMessages(require(s, character.only=TRUE))))
  if (!loaded) cat("\n**** Suggested package",s,"is not installed. Tests using it will be skipped.\n\n")
}

x = 1:10
x[c(1:2, 5:6, 9:10)] = NA
test(1.01, nafill(x, "locf"), INT(NA,NA,3,4,4,4,7,8,8,8))
test(1.02, nafill(x, "nocb"), INT(3,3,3,4,7,7,7,8,NA,NA))
test(1.03, nafill(x, fill=0L), INT(0,0,3,4,0,0,7,8,0,0))
test(1.04, nafill(x, fill=5), INT(5,5,3,4,5,5,7,8,5,5))
test(1.05, nafill(x, fill=NA_integer_), x)
test(1.06, nafill(x, fill=NA), x)
test(1.07, nafill(x, fill=NA_real_), x)
test(1.08, nafill(x, fill=Inf), x)
test(1.09, nafill(x, fill=NaN), x)
y = x/2
test(1.11, nafill(y, "locf"), c(NA,NA,3,4,4,4,7,8,8,8)/2)
test(1.12, nafill(y, "nocb"), c(3,3,3,4,7,7,7,8,NA,NA)/2)
test(1.13, nafill(y, fill=0L), c(0,0,3,4,0,0,7,8,0,0)/2)
test(1.14, nafill(y, fill=5/2), c(5,5,3,4,5,5,7,8,5,5)/2)
test(1.15, nafill(y, fill=NA_integer_), y)
test(1.16, nafill(y, fill=NA), y)
test(1.17, nafill(y, fill=NA_real_), y)
test(1.18, nafill(y, fill=Inf), c(Inf,Inf,3,4,Inf,Inf,7,8,Inf,Inf)/2)
test(1.19, nafill(y, fill=NaN), c(NaN,NaN,3,4,NaN,NaN,7,8,NaN,NaN)/2)
z = y
z[5L] = NaN
z[2L] = Inf
z[9L] = -Inf
test(1.21, nafill(z, "locf"), c(NA,Inf,3,4,4,4,7,8,-Inf,-Inf)/2)
test(1.22, nafill(z, "nocb"), c(Inf,Inf,3,4,7,7,7,8,-Inf,NA)/2)
dt = data.table(x, y, z)
test(1.31, nafill(dt, "locf"), unname(lapply(dt, nafill, "locf")))
test(1.32, nafill(dt, "nocb"), unname(lapply(dt, nafill, "nocb")))
test(1.33, nafill(dt, fill=0), unname(lapply(dt, nafill, fill=0)))
l = list(x, y[1:8], z[1:6])
test(1.41, nafill(l, "locf"), lapply(l, nafill, "locf"))
test(1.42, nafill(l, "nocb"), lapply(l, nafill, "nocb"))
test(1.43, nafill(l, fill=0), lapply(l, nafill, fill=0))
l = list(a=c(1:2,NA,4:5), b=as.Date(c(1:2,NA,4:5), origin="1970-01-01"), d=c(NA,2L,NA,4L,NA), e=as.Date(c(NA,2L,NA,4L,NA), origin="1970-01-01")) # Date retain class #3617
test(1.44, nafill(l, "locf"), list(c(1:2,2L,4:5), structure(c(1,2,2,4,5), class="Date"), c(NA,2L,2L,4L,4L), structure(c(NA,2,2,4,4), class="Date")))
test(1.45, nafill(l, "nocb"), list(c(1:2,4L,4:5), structure(c(1,2,4,4,5), class="Date"), c(2L,2L,4L,4L,NA), structure(c(2,2,4,4,NA), class="Date")))
test(1.46, nafill(l, fill=0), list(c(1:2,0L,4:5), structure(c(1,2,0,4,5), class="Date"), c(0L,2L,0L,4L,0L), structure(c(0,2,0,4,0), class="Date")))
test(1.47, nafill(l, fill=as.Date(0, origin="1970-01-01")), list(c(1:2,0L,4:5), structure(c(1,2,0,4,5), class="Date"), c(0L,2L,0L,4L,0L), structure(c(0,2,0,4,0), class="Date")))
test(1.48, nafill(l, fill=as.Date("2019-06-05")), list(c(1:2,18052L,4:5), structure(c(1,2,18052,4,5), class="Date"), c(18052L,2L,18052L,4L,18052L), structure(c(18052,2,18052,4,18052), class="Date")))
test(1.49, nafill(numeric()), numeric())
if (test_bit64) {
  l = list(a=as.integer64(c(1:2,NA,4:5)), b=as.integer64(c(NA,2L,NA,4L,NA)))
  test(1.61, lapply(nafill(l, "locf"), as.character), lapply(list(c(1:2,2L,4:5), c(NA,2L,2L,4L,4L)), as.character))
  test(1.62, lapply(nafill(l, "nocb"), as.character), lapply(list(c(1:2,4L,4:5), c(2L,2L,4L,4L,NA)), as.character))
  test(1.63, lapply(nafill(l, fill=0), as.character), lapply(list(c(1:2,0L,4:5), c(0L,2L,0L,4L,0L)), as.character))
  test(1.64, lapply(nafill(l, fill=as.integer64(0)), as.character), lapply(list(c(1:2,0L,4:5), c(0L,2L,0L,4L,0L)), as.character))
  test(1.65, lapply(nafill(l, fill=as.integer64("3000000000")), as.character), list(c("1","2","3000000000","4","5"), c("3000000000","2","3000000000","4","3000000000")))
  l = lapply(l, `+`, as.integer64("3000000000"))
  test(1.66, lapply(nafill(l, "locf"), as.character), list(c("3000000001","3000000002","3000000002","3000000004","3000000005"), c(NA_character_,"3000000002","3000000002","3000000004","3000000004")))
  test(1.67, lapply(nafill(l, "nocb"), as.character), list(c("3000000001","3000000002","3000000004","3000000004","3000000005"), c("3000000002","3000000002","3000000004","3000000004",NA_character_)))
  test(1.68, lapply(nafill(l, fill=as.integer64("3000000000")), as.character), list(c("3000000001","3000000002","3000000000","3000000004","3000000005"), c("3000000000","3000000002","3000000000","3000000004","3000000000")))
  test(1.69, nafill(c(1L,2L,NA,4L), fill=as.integer64(3L)), 1:4)
  test(1.70, nafill(c(1L,2L,NA,4L), fill=as.integer64(NA)), c(1:2,NA,4L))
  test(1.71, nafill(c(1,2,NA,4), fill=as.integer64(3)), c(1,2,3,4))
  test(1.72, nafill(c(1,2,NA,4), fill=as.integer64(NA)), c(1,2,NA,4))
  test(1.73, nafill(as.integer64(c(1,2,NA,4)), fill=3), as.integer64(1:4))
  test(1.74, nafill(as.integer64(c(1,2,NA,4)), fill=3L), as.integer64(1:4))
  test(1.75, nafill(as.integer64(c(1,2,NA,4)), fill=NA_integer_), as.integer64(c(1:2,NA,4L)))
  test(1.76, nafill(as.integer64(c(1,2,NA,4)), fill=NA_real_), as.integer64(c(1:2,NA,4L)))
  test(1.77, nafill(as.integer64(c(1,2,NA,4)), fill=NA), as.integer64(c(1:2,NA,4L)))
}
if (test_nanotime) {
  l = list(a=nanotime(c(1:2,NA,4:5)), b=nanotime(c(NA,2L,NA,4L,NA)))
  test(1.91, lapply(nafill(l, "locf"), as.character), lapply(list(nanotime(c(1:2,2L,4:5)), nanotime(c(NA,2L,2L,4L,4L))), as.character))
  test(1.92, lapply(nafill(l, "nocb"), as.character), lapply(list(nanotime(c(1:2,4L,4:5)), nanotime(c(2L,2L,4L,4L,NA))), as.character))
  test(1.93, lapply(nafill(l, fill=0), as.character), lapply(list(nanotime(c(1:2,0L,4:5)), nanotime(c(0L,2L,0L,4L,0L))), as.character))
  test(1.94, lapply(nafill(l, fill=nanotime(0)), as.character), lapply(list(nanotime(c(1:2,0L,4:5)), nanotime(c(0L,2L,0L,4L,0L))), as.character))
}

# setnafill
dt = data.table(V1=1:10, V2=10:1, V3=1:10/2)
dt[c(1L,4:5,9:10), V1:=NA][c(2:3, 5:6, 10L), V2:=NA][c(1:2, 5:6, 9:10), V3:=NA]
db = copy(dt)
test(2.01, {setnafill(dt, fill=0); dt}, as.data.table(nafill(db, fill=0)))
dt = copy(db)
test(2.02, {setnafill(dt, "locf"); dt}, as.data.table(nafill(db, "locf")))
dt = copy(db)
test(2.03, {setnafill(dt, "nocb"); dt}, as.data.table(nafill(db, "nocb")))
dt = copy(db)
test(2.04, {setnafill(dt, fill=0, cols=c("V2","V3")); dt}, db[, c(list(V1), nafill(.SD, fill=0)), .SDcols=c("V2","V3")])
dt = copy(db)
test(2.05, {setnafill(dt, "locf", cols=c("V2","V3")); dt}, db[, c(list(V1), nafill(.SD, "locf")), .SDcols=c("V2","V3")])
dt = copy(db)
test(2.06, {setnafill(dt, "nocb", cols=c("V2","V3")); dt}, db[, c(list(V1), nafill(.SD, "nocb")), .SDcols=c("V2","V3")])
db[, "V4" := c(letters[1:3],NA,letters[5:7],NA,letters[9:10])]
dt = copy(db)
test(2.07, {setnafill(dt, "locf", cols=c("V2","V3")); dt}, db[, c(list(V1), nafill(.SD, "locf"), list(V4)), .SDcols=c("V2","V3")])
l = list(a=c(1:2,NA,4:5), b=as.Date(c(1:2,NA,4:5), origin="1970-01-01"), d=c(NA,2L,NA,4L,NA), e=as.Date(c(NA,2L,NA,4L,NA), origin="1970-01-01")) # Date retain class #3617
setnafill(l, fill=as.Date("2019-06-05"))
test(2.08, unname(l), list(c(1:2,18052L,4:5), structure(c(1,2,18052,4,5), class="Date"), c(18052L,2L,18052L,4L,18052L), structure(c(18052,2,18052,4,18052), class="Date")))

# exceptions test coverage
x = 1:10
test(3.01, nafill(x, "locf", fill=0L), nafill(x, "locf"), warning="argument 'fill' ignored")
test(3.02, setnafill(list(copy(x)), "locf", fill=0L), setnafill(list(copy(x)), "locf"), warning="argument 'fill' ignored")
test(3.03, setnafill(x, "locf"), error="in-place update is supported only for list")
test(3.04, nafill(letters[1:5], fill=0), error="must be numeric type, or list/data.table")
test(3.05, setnafill(list(letters[1:5]), fill=0), error="must be numeric type, or list/data.table")
test(3.06, nafill(x, fill=1:2), error="fill must be a vector of length 1")
test(3.07, nafill(x, fill="asd"), error="fill argument must be numeric")

# colnamesInt helper
dt = data.table(a=1, b=2, d=3)
test(4.01, colnamesInt(dt, "a"), 1L)
test(4.02, colnamesInt(dt, 1L), 1L)
test(4.03, colnamesInt(dt, 1), 1L)
test(4.04, colnamesInt(dt, c("a","d")), c(1L, 3L))
test(4.05, colnamesInt(dt, c(1L, 3L)), c(1L, 3L))
test(4.06, colnamesInt(dt, c(1, 3)), c(1L, 3L))
test(4.07, colnamesInt(dt, c("a", "e")), error="specify non existing column*.*e")
test(4.08, colnamesInt(dt, c(1L, 4L)), error="specify non existing column*.*4")
test(4.09, colnamesInt(dt, c(1, 4)), error="specify non existing column*.*4")
test(4.10, colnamesInt(dt, c("a", NA)), error="specify non existing column*.*NA")
test(4.11, colnamesInt(dt, c(1L, NA)), error="specify non existing column")
test(4.12, colnamesInt(dt, c(1, NA)), error="specify non existing column")
test(4.13, colnamesInt(dt, c("a","d","a"), check_dups=TRUE), error="specify duplicated column")
test(4.14, colnamesInt(dt, c(1L, 3L, 1L), check_dups=TRUE), error="specify duplicated column")
test(4.15, colnamesInt(dt, c(1, 3, 1), check_dups=TRUE), error="specify duplicated column")
test(4.16, colnamesInt(dt, list("a")), error="must be character or numeric")
test(4.17, colnamesInt(dt, NA), error="must be character or numeric")
test(4.18, colnamesInt(dt, character()), integer())
test(4.19, colnamesInt(dt, numeric()), integer())
test(4.20, colnamesInt(dt, integer()), integer())
test(4.21, colnamesInt(dt, NULL), seq_along(dt))
test(4.22, colnamesInt("asd", 1), error="must be data.table compatible")
test(4.23, colnamesInt(dt, 1, check_dups="a"), error="check_dups")
names(dt) <- NULL
test(4.24, colnamesInt(dt, "a"), error="has no names")

# verbose
dt = data.table(a=c(1L, 2L, NA_integer_), b=c(1, 2, NA_real_))
old=options(datatable.verbose=TRUE)
test(5.01, nafill(dt, "locf"), output="nafillInteger: took.*nafillDouble: took.*nafillR.*took")
test(5.02, setnafill(dt, "locf"), output="nafillInteger: took.*nafillDouble: took.*nafillR.*took")
if (test_bit64) {
  test(5.03, nafill(as.integer64(c(NA,2,NA,3)), "locf"), as.integer64(c(NA,2,2,3)), output="nafillInteger64: took.*nafillR.*took")
}
options(old)

# coerceFill
if (test_bit64) {
  test(6.01, coerceFill(1:2), error="fill argument must be length 1")
  test(6.02, coerceFill("a"), error="fill argument must be numeric")
  test(6.11, identical(coerceFill(NA), list(NA_integer_, NA_real_, as.integer64(NA))))
  test(6.21, identical(coerceFill(3L), list(3L, 3, as.integer64(3))))
  test(6.22, identical(coerceFill(0L), list(0L, 0, as.integer64(0))))
  test(6.23, identical(coerceFill(NA_integer_), list(NA_integer_, NA_real_, as.integer64(NA))))
  test(6.31, identical(coerceFill(as.integer64(3)), list(3L, 3, as.integer64(3))))
  test(6.32, identical(coerceFill(as.integer64(3000000003)), list(NA_integer_, 3000000003, as.integer64("3000000003"))))
  test(6.33, identical(coerceFill(as.integer64(0)), list(0L, 0, as.integer64(0))))
  test(6.34, identical(coerceFill(as.integer64(NA)), list(NA_integer_, NA_real_, as.integer64(NA))))
  test(6.41, identical(coerceFill(3), list(3L, 3, as.integer64(3))))
  test(6.42, identical(coerceFill(0), list(0L, 0, as.integer64(0))))
  test(6.43, identical(coerceFill(NA_real_), list(NA_integer_, NA_real_, as.integer64(NA))))
  test(6.44, identical(coerceFill(NaN), list(NA_integer_, NaN, as.integer64(NA))))
  test(6.45, identical(coerceFill(Inf), list(NA_integer_, Inf, as.integer64(NA))))
  test(6.46, identical(coerceFill(-Inf), list(NA_integer_, -Inf, as.integer64(NA))))
  test(6.47, identical(coerceFill(-(2^62)), list(NA_integer_, -(2^62), as.integer64("-4611686018427387904"))))
  test(6.48, identical(coerceFill(-(2^64)), list(NA_integer_, -(2^64), as.integer64(NA))))
  test(6.49, identical(coerceFill(x<-as.integer64(-2147483647)), list(-2147483647L, -2147483647, x)))
  test(6.50, identical(coerceFill(x<-as.integer64(-2147483648)), list(NA_integer_, -2147483648, x)))
  test(6.51, identical(coerceFill(x<-as.integer64(-2147483649)), list(NA_integer_, -2147483649, x)))
  test(6.52, identical(coerceFill(-2147483647), list(-2147483647L, -2147483647, as.integer64("-2147483647"))))
  test(6.53, identical(coerceFill(-2147483648), list(NA_integer_, -2147483648,  as.integer64("-2147483648"))))
  test(6.54, identical(coerceFill(-2147483649), list(NA_integer_, -2147483649,  as.integer64("-2147483649"))))
}

# nan argument to treat NaN as NA in nafill, #4020
x = c(-1, NA, NaN, 0, 1, 2)
ans1 = c(-1, 0, 0, 0:2)
ans2 = c(-1, 0, NaN, 0:2)
DT = data.table(a=x, b=x)
test(7.01, nafill(x, fill=0), ans1)
test(7.02, nafill(x, fill=0, nan=NaN), ans2)
test(7.03, nafill(x, 'locf'), c(-1, -1, -1, 0:2))
test(7.04, nafill(x, 'locf', nan=NaN), c(-1, -1, NaN, 0:2))
test(7.05, nafill(x, 'nocb'), ans1)
test(7.06, nafill(x, 'nocb', nan=NaN), c(-1, NaN, NaN, 0:2))
test(7.07, setnafill(DT, fill=0, cols=1L), copy(DT)[ , a := ans1])
test(7.08, setnafill(DT, fill=0, nan=NaN), copy(DT)[ , c('a', 'b') := .(ans1, ans2)])
test(7.09, nafill(x, fill=0, nan=c(NA, NaN)), error="Argument 'nan' must be length 1")
test(7.10, nafill(x, fill=0, nan=Inf), error="Argument 'nan' must be NA or NaN")