File: test-simulations.R

package info (click to toggle)
r-cran-modeldata 1.4.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 5,592 kB
  • sloc: sh: 13; makefile: 2
file content (161 lines) | stat: -rw-r--r-- 5,943 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
test_that("classification simulation", {
  set.seed(1)
  dat_1 <- sim_classification(500, num_linear = 0)
  dat_2 <- sim_classification(10, num_linear = 11)
  dat_3 <- sim_classification(1000, num_linear = 1, intercept = 50)
  dat_4 <- sim_classification(500, num_linear = 0, keep_truth = TRUE)

  expect_equal(
    names(dat_1),
    c(
      "class", "two_factor_1", "two_factor_2", "non_linear_1", "non_linear_2",
      "non_linear_3"
    )
  )
  expect_equal(
    names(dat_2),
    c(
      "class", "two_factor_1", "two_factor_2", "non_linear_1", "non_linear_2",
      "non_linear_3", modeldata:::names0(11, "linear_")
    )
  )
  expect_equal(
    names(dat_3),
    c(
      "class", "two_factor_1", "two_factor_2", "non_linear_1", "non_linear_2",
      "non_linear_3", "linear_1"
    )
  )
  expect_equal(
    names(dat_4),
    c(
      "class", "two_factor_1", "two_factor_2", "non_linear_1", "non_linear_2",
      "non_linear_3", ".truth"
    )
  )
  expect_equal(nrow(dat_1), 500)
  expect_equal(nrow(dat_2), 10)
  expect_equal(nrow(dat_3), 1000)
  expect_true(all(vapply(dat_1[, -1], is.numeric, logical(1))))

  expect_equal(sum(dat_3 == "class_2"), 0)
  expect_equal(levels(dat_3$class), paste0("class_", 1:2))
  expect_error(
    sim_classification(5, method = "potato"),
    "must be one of"
  )
})

test_that("sapp_2014_1 simulation", {
  set.seed(1)
  dat_1 <- sim_regression(10, method = "sapp_2014_1")
  dat_2 <- sim_regression(10, method = "sapp_2014_1", keep_truth = TRUE)
  expect_equal(names(dat_1), c("outcome", modeldata:::names0(20, "predictor_")))
  expect_equal(names(dat_2), c("outcome", modeldata:::names0(20, "predictor_"), ".truth"))
  expect_equal(nrow(dat_1), 10)
  expect_true(all(vapply(dat_1, is.numeric, logical(1))))
  expect_error(
    sim_regression(5, method = "potato"),
    "must be one of"
  )
})

test_that("sapp_2014_2 simulation", {
  set.seed(1)
  dat_1 <- sim_regression(10, method = "sapp_2014_2")
  dat_2 <- sim_regression(10, method = "sapp_2014_2", keep_truth = TRUE)
  expect_equal(names(dat_1), c("outcome", modeldata:::names0(200, "predictor_")))
  expect_equal(names(dat_2), c("outcome", modeldata:::names0(200, "predictor_"), ".truth"))
  expect_equal(nrow(dat_1), 10)
  expect_true(all(vapply(dat_1, is.numeric, logical(1))))
})

test_that("van_der_laan_2007_1 simulation", {
  set.seed(1)
  dat_1 <- sim_regression(10, method = "van_der_laan_2007_1")
  dat_2 <- sim_regression(10, method = "van_der_laan_2007_1", factors = TRUE)
  dat_3 <- sim_regression(10, method = "van_der_laan_2007_1", keep_truth = TRUE)
  expect_equal(names(dat_1), c("outcome", modeldata:::names0(10, "predictor_")))
  expect_equal(names(dat_3), c("outcome", modeldata:::names0(10, "predictor_"), ".truth"))
  expect_equal(nrow(dat_1), 10)
  expect_true(all(vapply(dat_1, is.numeric, logical(1))))
  expect_true(all(vapply(dat_1[, -1], is.integer, logical(1))))
  expect_true(all(vapply(dat_2[, -1], is.factor, logical(1))))
  expect_equal(levels(dat_2$predictor_01), c("yes", "no"))
})

test_that("van_der_laan_2007_2 simulation", {
  set.seed(1)
  dat_1 <- sim_regression(10, method = "van_der_laan_2007_2")
  dat_2 <- sim_regression(10, method = "van_der_laan_2007_2", keep_truth = TRUE)
  expect_equal(names(dat_1), c("outcome", modeldata:::names0(20, "predictor_")))
  expect_equal(names(dat_2), c("outcome", modeldata:::names0(20, "predictor_"), ".truth"))
  expect_equal(nrow(dat_1), 10)
  expect_true(all(vapply(dat_1, is.numeric, logical(1))))
})

test_that("hooker_2004 simulation", {
  set.seed(1)
  dat_1 <- sim_regression(10, method = "hooker_2004")
  dat_2 <- sim_regression(10, method = "hooker_2004", keep_truth = TRUE)
  expect_equal(names(dat_1), c("outcome", modeldata:::names0(10, "predictor_")))
  expect_equal(names(dat_2), c("outcome", modeldata:::names0(10, "predictor_"), ".truth"))
  expect_equal(nrow(dat_1), 10)
  expect_true(all(vapply(dat_1, is.numeric, logical(1))))
})


test_that("noise simulation", {
  set.seed(1)
  dat_1 <- sim_noise(1000, num_vars = 10)
  dat_2 <- sim_noise(1000, num_vars = 3, cov_param = .5)
  dat_3 <- sim_noise(1000, num_vars = 3, cov_type = "toeplitz", cov_param = .99)
  dat_4 <- sim_noise(10, num_vars = 3, outcome = "classification")
  dat_5 <- sim_noise(10, num_vars = 3, outcome = "classification", num_classes = 10)
  dat_6 <- sim_noise(10, num_vars = 3, outcome = "regression")

  expect_equal(names(dat_1), modeldata:::names0(10, "noise_"))
  expect_equal(names(dat_2), modeldata:::names0(3, "noise_"))
  expect_equal(nrow(dat_1), 1000)
  expect_equal(nrow(dat_4), 10)

  expect_true(all(vapply(dat_1, is.numeric, logical(1))))
  expect_true(all(vapply(dat_1[, -1], is.numeric, logical(1))))
  expect_true(is.factor(dat_5$class))
  expect_true(all(vapply(dat_6, is.numeric, logical(1))))

  cor_1 <- cor(dat_1)[upper.tri(cor(dat_1))]
  expect_true(all(cor_1 <= 0.1 & cor_1 >= -0.1))

  cor_2 <- cor(dat_2)[upper.tri(cor(dat_2))]
  expect_true(all(cor_2 <= 0.6 & cor_2 >= 0.4))

  cor_3 <- cor(dat_3)[upper.tri(cor(dat_3))]
  expect_true(all(cor_3 >= 0.95))

  expect_equal(levels(dat_4$class), paste0("class_", 1:2))
  expect_equal(levels(dat_5$class), modeldata:::names0(10, "class_"))
})


test_that("logistic simulation", {
  set.seed(1)
  dat_1 <- sim_logistic(10, ~ A)
  dat_2 <- sim_logistic(10, rlang::expr(~ B), keep_truth = TRUE)
  expect_equal(names(dat_1), c(LETTERS[1:2], "class"))
  expect_equal(names(dat_2), c(LETTERS[1:2], ".linear_pred", ".truth", "class"))
  expect_equal(nrow(dat_1), 10)
})


test_that("multinomial simulation", {
  expect_snapshot_error(sim_multinomial(10, ~ A + C, ~ B, ~ A + B))
  set.seed(1)
  dat_1 <- sim_multinomial(10, ~ A, ~ B, ~ A + B)
  dat_2 <- sim_multinomial(10, ~ A, ~ B, ~ A + B, keep_truth = TRUE)
  expect_equal(names(dat_1), c(LETTERS[1:2], "class"))
  expect_equal(names(dat_2), c(LETTERS[1:2], "class", ".truth_one", ".truth_two", ".truth_three"))
  expect_equal(nrow(dat_1), 10)
})