File: stabsel.R

package info (click to toggle)
r-cran-regsem 1.6.2+dfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: bullseye, sid
  • size: 496 kB
  • sloc: cpp: 263; ansic: 15; makefile: 2
file content (167 lines) | stat: -rw-r--r-- 6,172 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
#'
#'
#' Stability selection
#' @param data data frame
#' @param model lavaan syntax model.
#' @param det.range Whether to determine the range of penalization values for stability selection through bootstrapping. Default is FALSE, from and to arguments are then needed. If set to TRUE, then jump, times and detr.nlambda arguments will be needed.
#' @param from Minimum value of penalization values for stability selection.
#' @param to Maximum value of penalization values for stability selection.
#' @param times Number of bootstrapping sample used to determine the range. Default is 50.
#' @param jump Amount to increase penalization each iteration. Default is 0.01
#' @param detr.nlambda Number of penalization values to test for determing range.
#' @param n.lambda Number of penalization values to test for stability selection.
#' @param n.boot Number of bootstrap samples needed for stability selection.
#' @param det.thr Whether to determine the probability threshold value. Default is FALSE, p is then needed. If set to TRUE, p.from, p.to, p.method arguments will be needed.
#' @param p Probability threshold: above which selection probability is the path kept in the modle. Default value is 0.8.
#' @param p.from Lower bound of probability threshold to test. Default is 0.5.
#' @param p.to Upper bound of probability threshold to test. Default is 1.
#' @param p.jump Amount to increase threshold each iteration. Default is 0.05.
#' @param p.method Which fit index to use to choose a final model?
#' @param type Penalty type
#' @param pars_pen Parameter indicators to penalize.
#' @param ... Any additional arguments to pass to regsem() or cv_regsem().
#' @examples
#' \dontrun{
#' library(regsem)
#' # put variables on same scale for regsem
#' HS <- data.frame(scale(HolzingerSwineford1939[,7:15]))
#' mod <- '
#' f =~ 1*x1 + x2 + x3 + x4 + x5 + x6 + x7 + x8 + x9
#' x1 ~~ r1*x2;x1 ~~ r2*x3;x1 ~~ r3*x4;x1 ~~ r4*x5
#' '
#' outt = cfa(mod, HS)
#'
#'stabsel.out = stabsel(data=HS,model=mod,det.range=T,detr.nlambda=20,n.lambda=5,
#'                     n.boot=10,p=0.9,type="alasso", p.method="aic",
#'                     pars_pen=c("r1","r2","r3","r4"))
#' stabsel.out$selection_results
#'
#' }
#' @export


stabsel<-function(data,
                  model,
                  det.range=FALSE,
                  from,
                  to,
                  times=50,
                  jump=0.01,
                  detr.nlambda=20,
                  n.lambda=40,
                  n.boot=100,
                  det.thr=FALSE,
                  p=0.8,
                  p.from=0.5,
                  p.to=1,
                  p.jump=0.05,
                  p.method="aic",
                  type="lasso",
                  pars_pen="regressions",
                  ...){
  rtn<-list()
  #determine range for lambda (by prestage or user specify)
  if (det.range==TRUE){#use a prestep to determine lambda range
    lam<-det_range(data,model,times,n.lambda=detr.nlambda,jump=jump,type=type,pars_pen=pars_pen,...)
    lb<-lam$lb;ub<-lam$ub
    #rtn$det.range<-lam
  }else{
    lb<-from;ub<-to
  }
  #determine lambda values:
  Lam<-seq(from=lb,to=ub,length.out=n.lambda)
  if (ub==0){
    return("determine range fails")
  }else if (lb==0){
    warning("removed lambda = 0")
    Lam<-Lam[Lam!=0]
  }


  nlambda<-length(Lam)
  #
  est_model <- sem(model, data)
  parT<-parTable(est_model)
  regsem.out.ss <- regsem(est_model,lambda =1e-10,type=type,pars_pen=pars_pen,...)
  pars.pen<-regsem.out.ss$pars_pen
  nm<-names(regsem.out.ss$coefficients)
  n.pen<-length(pars.pen)
  #stability selection:
  nsize<-dim(data)[1]
  p.select<-data.frame(matrix(NA,1,n.pen))#probabilities of being selected
  for (k in 1:nlambda){
    for(i in 1:n.boot){
      ids = sample(1:nsize,nsize,replace=T)
      datasub.ss <- data[ids,]
      data.hold.ss<-data[-unique(ids),]

      try(est_model_ss <- sem(model, data = datasub.ss))
      try(regsem.out.ss <- regsem(est_model_ss,lambda = Lam[k],type=type,pars_pen=pars_pen,...))
      coeff<-rep(NA,n.pen)
      try(coeff<-regsem.out.ss$coefficients[pars.pen])
      if (i==1){
        pars = coeff
      }else{
        pars = rbind(pars,coeff)
      }
    }
    pars.ss.1<-pars!=0
    p.select[k,]<-colSums(pars.ss.1,na.rm=T)/dim(na.omit(pars.ss.1))[1]
  }
  names(p.select)<-nm[pars.pen]

  #Selection results:

  if (det.thr==FALSE){
    #library(matrixStats)
    max.prob<-matrixStats::colMaxs(as.matrix(p.select))
    sel.res<-max.prob>=p#true false
    names(sel.res)<-colnames(pars.ss.1)
    sel.nm<-colnames(p.select)[sel.res]#names of selected paths
    sel.pars<-pars.pen[sel.res]#label of selected paths in number
    rm.path.nm<-colnames(p.select)[sel.res==0]#names of paths to be removed
    rm.path<-pars.pen[sel.res==0]

    #coefficients from relaxed lasso:
    new.mod<-pen_mod(est_model,nm,rm.path)
    new_est_mod<-sem(new.mod,data)
    coefficient<-coef(new_est_mod)

    #returns:
    rtn$data<-data
    rtn$model<-model
    rtn$sem_model<-est_model
    rtn$nm<-nm
    rtn$pars_pen<-pars.pen
    rtn$Lambda<-Lam
    rtn$probabilities<-p.select
    rtn$threshold<-p
    rtn$selection_results<-sel.res
    rtn$remove_path<-rm.path.nm
    rtn$new_model<-new.mod
    rtn$new_model_est<-new_est_mod
    rtn$coefficients<-coefficient
    rtn
  }else {
    thr<-stabsel_thr(data=data,model=model,est_model=est_model,prob=p.select,nm=nm,pars.pen=pars.pen,from=p.from,to=p.to,jump=p.jump,method=p.method)

    #returns:
    rtn$data<-data
    rtn$model<-model
    rtn$sem_model<-est_model
    rtn$nm<-names(regsem.out.ss$coefficients)
    rtn$pars_pen<-pars.pen
    rtn$Lambda<-Lam
    rtn$probabilities<-p.select
    rtn$test_thresholds<-thr$test_thresholds
    rtn$p.method<-thr$method
    rtn$fit<-thr$fit
    rtn$opt_threshold<-thr$opt_threshold
    rtn$opt_sel_results<-thr$opt_sel_results
    rtn$remove_path<-thr$remove_path
    rtn$new_model<-thr$opt_model
    rtn$new_model_est<-thr$opt_model_est
    rtn$coefficients<-thr$coefficients
    rtn
  }
}