File: test1-sCorrect-missingValues.R

package info (click to toggle)
r-cran-lavasearch2 2.0.3%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 1,832 kB
  • sloc: cpp: 28; sh: 13; makefile: 2
file content (165 lines) | stat: -rw-r--r-- 7,245 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
### test1-sCorrect-missingValues.R --- 
##----------------------------------------------------------------------
## Author: Brice Ozenne
## Created: mar  7 2018 (13:39) 
## Version: 
## Last-Updated: Jan 17 2022 (23:21) 
##           By: Brice Ozenne
##     Update #: 57
##----------------------------------------------------------------------
## 
### Commentary: 
## 
### Change Log:
##----------------------------------------------------------------------
## 
### Code:

## * header
## rm(list = ls())
if(FALSE){ ## already called in test-all.R
    library(testthat)
    library(lavaSearch2)
}

lava.options(symbols = c("~","~~"))
library(nlme)
context("sCorrect (dealing with missing values)")

## * simulation
n <- 2e1
mSim <- lvm(c(Y1~eta1,Y2~eta1+X2,Y3~eta1+X1,
           Z1~eta2,Z2~eta2,Z3~eta2+X3))
regression(mSim) <- eta1~X1+Gender
latent(mSim) <- ~eta1+eta2
categorical(mSim, labels = c("Male","Female")) <- ~Gender
transform(mSim, Id~Y1) <- function(x){1:NROW(x)}
set.seed(10)
d <- lava::sim(mSim, n = n, latent = FALSE)
dL <- reshape2::melt(d, id.vars = c("Id","X1","X2","X3","Gender"),
           measure.vars = c("Y1","Y2","Y3","Z1","Z2","Z3"))
dLred <- dL[dL$variable %in% c("Y1","Y2","Y3"),]
dLred2 <- dL[dL$variable %in% c("Y1","Y2"),]
dLred3 <- dLred2
dLred3[dL$variable == "Y2","Id"] <- paste0("-",dLred3[dL$variable == "Y2","Id"])

## ** t test
## formula:
## df = \frac{ 2 * s_pool^2 }{ var(s_pool^2) }
##    = \frac{ ( s_X^2/m + s_Y^2/n )^2}{( s_X^4/(m(m-1)) + s_Y^4/(n(n-1)))}

## using the t test function
e.ttest <- t.test(d$Y1, d$Y2)
e.ttest$parameter

## by hand
sX1 <- var(d$Y1)/n
sX2 <- var(d$Y2)/n
df <- (sX1+sX2)^2/(sX1^2/(n-1) + sX2^2/(n-1))
df-e.ttest$parameter

## * LVM: factor model
m <- lvm(c(Y1~eta1,Y2~eta1,Y3~eta1+X1))
regression(m) <- eta1~X1+X2

e.lvm <- estimate(m,d)
e2.lvm <- estimate2(e.lvm)

## ** complete case analysis
missing.Row <- d[1,]
missing.Row[,"Id"] <- -1
missing.Row[,c("Y1","Y2","Y3")] <- NA
## eNA.lvm <- estimate(m, rbind(d,missing.Row), missing = FALSE)
eNA.lvm <- estimate(m, rbind(missing.Row,d,missing.Row), missing = FALSE)

test_that("complete case analysis (factor model)", {
    expect_equal(unname(score2(eNA.lvm, ssc = FALSE, indiv = TRUE)), unname(score(eNA.lvm, indiv = TRUE)))
    ## FIX NA!!!!!!
    
    eNA2.lvm <- estimate2(eNA.lvm)
    
    expect_equal(model.tables(eNA2.lvm), model.tables(e2.lvm))
    ##               estimate        se        df       lower     upper   statistic     p.value
    ## eta1       -0.37387088 0.2962948 18.338440 -0.99554076 0.2477990 -1.26182045 0.222827159
    ## Y2         -0.02252887 0.3297457 15.258888 -0.72432797 0.6792702 -0.06832194 0.946416628
    ## Y3          0.38272845 0.2851559  5.682026 -0.32459821 1.0900551  1.34217267 0.230678148
    ## eta1~X1     0.99599616 0.3134807 18.469394  0.33859531 1.6533970  3.17721651 0.005092730
    ## eta1~X2    -0.04275890 0.2607688  9.859874 -0.62490927 0.5393915 -0.16397243 0.873065310
    ## Y2~eta1     1.05707590 0.2723211  5.481730  0.37515670 1.7389951  3.88172564 0.009722483
    ## Y3~eta1     1.08664682 0.3566308  1.982418 -0.46092704 2.6342207  3.04697992 0.093951240
    ## Y3~X1       0.61495545 0.4296209  3.045199 -0.74088088 1.9707918  1.43139085 0.246430988
    ## Y1~~Y1      0.49861889 0.3398654  5.101312 -0.36983983 1.3670776          NA          NA
    ## eta1~~eta1  1.10299240 0.5611968  4.190452 -0.42760479 2.6335896          NA          NA
    ## Y2~~Y2      1.60214650 0.6249695  4.823447 -0.02223785 3.2265309          NA          NA
    ## Y3~~Y3      0.64437632 0.4273389  4.023035 -0.53943230 1.8281849          NA          NA
})

## ** full information

### *** example of issue with lava
m <- lvm(c(Y1~1*eta1,Y2~1*eta1,Y3~1*eta1+X1,
           eta1~1))

missing.Row <- d[1,]
missing.Row[,"Id"] <- -1
missing.Row[,c("Y1","Y2")] <- NA
missing.Row2 <- d[3,]
missing.Row2[,"Id"] <- -2
missing.Row2[,c("Y1","Y3")] <- NA

dNA.wide <- rbind(missing.Row,d,missing.Row2)
dNA.long <- melt(dNA.wide, measure.vars = c("Y1","Y2","Y3"))
dNA.long$variable <- factor(dNA.long$variable, levels = c("Y1","Y2","Y3"))

test_that("full information (issue lava)", {
    eNA.lvm <- estimate(m, data = dNA.wide, missing = TRUE)
    test <- estimate2(eNA.lvm, ssc = FALSE)

    hessian.GS <- numDeriv::hessian(func = function(x){logLik(eNA.lvm, p = x)},
                                    x = coef(eNA.lvm))
    hessian.info <- information(eNA.lvm, p = coef(eNA.lvm), type = "hessian")
    hessian.lavaSearch2 <- hessian2(test)

    expect_equal(unname(hessian.lavaSearch2), unname(hessian.GS), tol = 1e-6)
    ## expect_equal(unname(hessian.info), unname(hessian.GS), tol = 1e-6) ## fails
})


## *** factor model
m <- lvm(c(Y1~eta1,Y2~eta1,Y3~eta1+X1))
regression(m) <- eta1~X1+X2

eNA.lvm <- estimate(m, dNA.wide, missing = TRUE)

test_that("full information (factor model)", {

    hessian.GS <- numDeriv::hessian(func = function(x){logLik(eNA.lvm, p = x)},
                                    x = coef(eNA.lvm))
    expect_equal(hessian.GS, unname(hessian2(eNA.lvm, ssc = FALSE)), tol = 1e-6)

    ## NOT TRUE!!!! issue in lava or on purpose?
    ## expect_equal(unname(information(eNA.lvm)), unname(solve(vcov(eNA.lvm))), tol = 1e-6)
    ## NOT TRUE !!! bug in lava? (see previous ***)
    ## expect_equal(information(eNA.lvm), unname(information2(eNA.lvm, ssc = FALSE)), tol = 1e-6)
    eNA2.lvm <- estimate2(eNA.lvm)
    model.tables(eNA2.lvm)
    ##               estimate        se         df        lower      upper   statistic     p.value
    ## eta1       -0.39593331 0.2854528 19.5265046  -0.99230434  0.2004377 -1.38703618 0.181061270
    ## Y3          0.37704733 0.2887687  8.9341075  -0.27692804  1.0310227  1.30570700 0.224270881
    ## Y2         -0.04308602 0.3263015 16.1994022  -0.73412297  0.6479509 -0.13204356 0.896575991
    ## eta1~X1     1.03194106 0.2972545 17.5733098   0.40634382  1.6575383  3.47157405 0.002802916
    ## eta1~X2    -0.01552331 0.2462704  9.4842975  -0.56831932  0.5372727 -0.06303358 0.951048335
    ## Y3~eta1     1.07977655 0.3656814  1.0724740  -2.88655281  5.0461059  2.95277987 0.194108317
    ## Y3~X1       0.62193065 0.4431914  2.2225543  -1.11318213  2.3570434  1.40330027 0.283921347
    ## eta1~~eta1  1.02409942 0.5259689  0.5684192 -44.51400617 46.5622050          NA          NA
    ## Y3~~Y3      0.62918891 0.4126684  4.8415855  -0.44213402  1.7005118          NA          NA
    ## Y2~eta1     1.07161261 0.2676486  8.1125205   0.45590084  1.6873244  4.00380494 0.003818464
    ## Y1~~Y1      0.51334322 0.3337466  2.9800835  -0.55281455  1.5795010          NA          NA
    ## Y2~~Y2      1.54266511 0.6031601  5.4456513   0.02959063  3.0557396          NA          NA
    expect_equal(summary(eNA2.lvm)$table2$df,
                 c(19.52650464, 8.93410748, 16.19940217, 17.57330981, 9.4842975, 1.07247398, 2.2225543, 0.56841916, 4.84158554, 8.1125205, 2.9800835, 5.44565132),
                 tol = 1e-6)
})

##----------------------------------------------------------------------
### test1-sCorrect-missingValues.R ends here