File: iidJack.R

package info (click to toggle)
r-cran-lavasearch2 2.0.3%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 1,832 kB
  • sloc: cpp: 28; sh: 13; makefile: 2
file content (266 lines) | stat: -rw-r--r-- 9,523 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
### iidJack.R --- 
#----------------------------------------------------------------------
## author: Brice Ozenne
## created: jun 23 2017 (09:15) 
## Version: 
## last-updated: Jan 11 2022 (16:08) 
##           By: Brice Ozenne
##     Update #: 333
#----------------------------------------------------------------------
## 
### Commentary: 
## 
### Change Log:
#----------------------------------------------------------------------
## 
### Code:

## * documentation - iidJack
#' @title Jackknife iid Decomposition from Model Object
#' @description Extract iid decomposition (i.e. influence function) from model object.
#'
#' @name iidJack
#' 
#' @param object a object containing the model.
#' @param data [data.frame] dataset used to perform the jackknife.
#' @param grouping [vector] variable defining cluster of observations that will be simultaneously removed by the jackknife.
#' @param cpus [integer >0] the number of processors to use.
#' If greater than 1, the fit of the model and the computation of the influence function for each jackknife sample is performed in parallel. 
#' @param keep.warnings [logical] keep warning messages obtained when estimating the model with the jackknife samples.
#' @param keep.error [logical]keep error messages obtained when estimating the model with the jackknife samples.
#' @param cl [cluster] a parallel socket cluster generated by \code{parallel::makeCluster}
#' that has been registered using \code{registerDoParallel}.
#' @param trace [logical] should a progress bar be used to trace the execution of the function
#' @param ... [internal] only used by the generic method.
#'
#' @return A matrix with in row the samples and in columns the parameters.
#' 
#' @examples
#' n <- 20
#' set.seed(10)
#' mSim <- lvm(c(Y1,Y2,Y3,Y4,Y5) ~ 1*eta)
#' latent(mSim) <- ~eta
#' categorical(mSim, K=2) <- ~G
#' transform(mSim, Id ~ eta) <- function(x){1:NROW(x)}
#' dW <- lava::sim(mSim, n, latent = FALSE)
#'
#' #### LVM ####
#' \dontrun{
#' m1 <- lvm(c(Y1,Y2,Y3,Y4,Y5) ~ 1*eta)
#' latent(m1) <- ~eta
#' regression(m1) <- eta ~ G
#' e <- estimate(m1, data = dW)
#'
#' iid1 <- iidJack(e)
#' iid2 <- iid(e)
#' attr(iid2, "bread") <- NULL
#'
#' apply(iid1,2,sd)
#' apply(iid2,2,sd)
#' quantile(iid2 - iid1)
#' }
#'
#'
#' @concept iid decomposition
#' @export
iidJack <- function(object,...) UseMethod("iidJack")

## * method iidJack.default
#' @rdname iidJack
#' @export
iidJack.default <- function(object, data = NULL, grouping = NULL, cpus = 1,
                            keep.warnings = TRUE, keep.error = TRUE,
                            cl = NULL, trace = TRUE, ...) {

    estimate.lvm <- lava_estimate.lvm

    ## ** test args
    init.cpus <- (cpus>1 && is.null(cl))
    if(init.cpus){
        test.package <- try(requireNamespace("foreach"), silent = TRUE)
        if(inherits(test.package,"try-error")){
            stop("There is no package \'foreach\' \n",
                 "This package is necessary when argument \'cpus\' is greater than 1 \n")
        }

        if(cpus > parallel::detectCores()){
            stop("Argument \'cpus\' is greater than the number of available CPU cores \n",
                 "available CPU cores: ",parallel::detectCores(),"\n")
        }
    }
    
    ## ** extract data
    if(is.null(data)){
        myData <- extractData(object, design.matrix = FALSE, as.data.frame = TRUE,
                              envir = parent.env(environment()))
    }else{ 
        myData <-  as.data.frame(data)
    }
    
    n.obs <- NROW(myData)
    if(any(class(object) %in% "lme")){
        getCoef <- nlme::fixef
    }else{
        getCoef <- coef
    }
    coef.x <- getCoef(object)
    names.coef <- names(coef.x)
    n.coef <- length(coef.x)

    ## ** update formula/model when defined by a variable and not in the current namespace
    if(length(object$call[[2]])==1){
        modelName <- as.character(object$call[[2]])
        if(modelName %in% ls() == FALSE){
            assign(modelName, value = evalInParentEnv(object$call[[2]]))
        }
    }

    ## ** define the grouping level for the data
    if(is.null(grouping)){
        myData$XXXgroupingXXX <- 1:n.obs
        grouping <- "XXXgroupingXXX"        
    }else{
        if(length(grouping)>1){
            stop("grouping must refer to only one variable \n")
        }
        if(grouping %in% names(myData) == FALSE){
            stop("variable defined in grouping not found in data \n")
        }
    }
    myData[,grouping] <- as.character(myData[,grouping])
    Ugrouping <- unique(myData[,grouping])
    n.group <- length(Ugrouping)

    ## ** warper
    warper <- function(i){ # i <- "31"
        newData <- subset(myData, subset = myData[[grouping]]!=i)
        xnew <- tryWithWarnings(stats::update(object, data = newData))
        if(!is.null(xnew$error)){
            xnew$value <- rep(NA, n.coef)
        }else{
            xnew$value <- getCoef(xnew$value)
        }
        return(xnew)
    }
    # warper("31")

    ## ** start parallel computation
    if(init.cpus){            
        ## define cluster
        if(trace>0){
            cl <- parallel::makeCluster(cpus, outfile = "")
        }else{
            cl <- parallel::makeCluster(cpus)
        }
        ## link to foreach
        doParallel::registerDoParallel(cl)
    }

    ## *** link to foreach
    doParallel::registerDoParallel(cl)

    ## ** parallel computations: get jackknife coef
    if(cpus>1){

        if(trace>0){
            pb <- utils::txtProgressBar(max = n.group, style = 3)          
        }
        ## *** packages/objects to export
        estimator <- as.character(object$call[[1]]) 

        vec.packages <- c("lava")
        possiblePackage <- gsub("package:","",utils::getAnywhere(estimator)$where[1])
        existingPackage <- as.character(utils::installed.packages()[,"Package"])

        ls.call <- as.list(object$call)
        test.length <- which(unlist(lapply(ls.call, length))==1)
        test.class <- which(unlist(lapply(ls.call, function(cc){
            (class(c) %in% c("numeric","character","logical")) == FALSE
        })))
        test.class <- which(unlist(lapply(ls.call, class)) %in% c("numeric","character","logical") == FALSE)
    
        indexExport <- intersect(test.class,test.length)
        toExport <- sapply(ls.call[indexExport], as.character)
    
        if(possiblePackage %in% existingPackage){
            vec.packages <- c(vec.packages,possiblePackage)
        }

        parallel::clusterCall(cl, fun = function(x){
            sapply(vec.packages, function(iP){
                suppressPackageStartupMessages(requireNamespace(iP, quietly = TRUE))
            })
        })
        
        ## *** objects to export
        if(length(object$call$data)==1){
            toExport <- c(toExport,as.character(object$call$data))
        }
        if(length(object$call$formula)==1){
            toExport <- c(toExport,as.character(object$call$formula))
        }
        if(length(object$call$fixed)==1){
            toExport <- c(toExport,as.character(object$call$fixed))        
        }
        toExport <- c(unique(toExport),"tryWithWarnings")

        ## *** parallel computation
        i <- NULL # [:for CRAN check] foreach
        resLoop <- foreach::`%dopar%`(
                                foreach::foreach(i = 1:n.group,
                                                 .export = toExport),{
                                                     
                                                     if(trace>0){utils::setTxtProgressBar(pb, i)}

                                                     return(warper(Ugrouping[i]))
                                                 })
    
        if(trace>0){close(pb)}

    }else{
        
        if(trace>0){
            test.package <- try(requireNamespace("pbapply"), silent = TRUE)
            if(inherits(test.package,"try-error")){
                stop("There is no package \'pbapply\' \n",
                     "This package is necessary when argument \'trace\' is TRUE \n")
            }
            resLoop <- pbapply::pblapply(Ugrouping, warper)
            
        }else{
            resLoop <- lapply(Ugrouping, warper)
        }
        

    }
    coefJack <- do.call(rbind, lapply(resLoop,"[[","value"))
    rownames(coefJack) <- 1:n.group
    
    ## ** end parallel computation
    if(init.cpus){
        parallel::stopCluster(cl)
    }
    
    ## ** post treatment: from jackknife coef to the iid decomposition
    # defined as (n-1)*(coef-coef(-i))
    # division by n to match output of lava, i.e. IF/n
    iidJack <- -(n.group-1)/n.group * sweep(coefJack, MARGIN = 2, STATS = coef.x, FUN = "-")
    colnames(iidJack) <- names.coef

    if(keep.warnings){
        ls.warnings <- lapply(resLoop,"[[","warnings")
        names(ls.warnings) <- 1:n.group
        ls.warnings <- Filter(Negate(is.null), ls.warnings)
        attr(iidJack,"warnings") <- ls.warnings
    }
    if(keep.error){
        ls.error <- lapply(resLoop,"[[","error")
        names(ls.error) <- 1:n.group
        ls.error <- Filter(Negate(is.null), ls.error)
        attr(iidJack,"error") <- ls.error
    }
    return(iidJack)
}
    
#----------------------------------------------------------------------
### iidJack.R ends here