1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160
|
#'
#'
#' Function to performed exploratory mediation with continuous and categorical variables
#'
#' @param data Name of the dataset
#' @param iv Name of independent variable
#' @param mediators Name of mediators
#' @param dv Name of dependent variable
#' @param covariates Name of covariates to be included in model.
#' @param type What type of penalty. Options include lasso, ridge, and enet.
#' @param nfolds Number of cross-validation folds.
#' @param epsilon Threshold for determining whether effect is 0 or not.
#' @param seed Set seed to control CV results
#' @export
#' @examples
#' \dontrun{
#'# example
#'library(ISLR)
#'College1 = College[which(College$Private=="Yes"),]
#'Data = data.frame(scale(College1[c("Grad.Rate","Accept","Outstate","Room.Board","Books","Expend")]))
#'Data$Grad.Rate <- ifelse(Data$Grad.Rate > 0,1,0)
#'Data$Grad.Rate <- as.factor(Data$Grad.Rate)
#'#lavaan model with all mediators
#'model1 <-
#' ' # direct effect (c_prime)
#'Grad.Rate ~ c_prime*Accept
#'# mediators
#'Outstate ~ a1*Accept
#'Room.Board ~ a2*Accept
#'Books ~ a3*Accept
#'Expend ~ a6*Accept
#'Grad.Rate ~ b1*Outstate + b2*Room.Board + b3*Books + b6*Expend
#'# indirect effects (a*b)
#'a1b1 := a1*b1
#'a2b2 := a2*b2
#'a3b3 := a3*b3
#'a6b6 := a6*b6
#'# total effect (c)
#'c := c_prime + (a1*b1) + (a2*b2) + (a3*b3) + (a6*b6)
#''
#'#p-value approach using delta method standard errors
#'fit.delta = sem(model1,data=Data,fixed.x=TRUE,ordered="Grad.Rate")
#'summary(fit.delta)
#'
#'#xmed()
#'
#'iv <- "Accept"
#'dv <- "Grad.Rate"
#'mediators <- c("Outstate","Room.Board","Books","Expend")
#'
#'out <- xmed(Data,iv,mediators,dv)
#'out
#'}
xmed = function (data, iv, mediators, dv, covariates=NULL, type = "lasso", nfolds = 10,
epsilon = 0.001, seed = NULL)
{
res <- list()
Data <- data
if (type == "lasso") {
alpha = 1
}
else if (type == "ridge") {
alpha = 0
}
else if (type == "enet") {
alpha = 0.5
}
var.check = function(data) {
data = as.data.frame(data)
num.response.options = flag = integer(ncol(data))
for (i in 1:ncol(data)) {
num.response.options[i] = flag[i] = NA
num.response.options[i] = length(unique(data[, i]))
if (is.factor(data[, i]) & num.response.options[i] >
2) {
flag[i] = 2
}
else if (num.response.options[i] == 2) {
flag[i] = 1
}
else if (num.response.options[i] != 2) {
flag[i] = 0
}
}
return(flag)
}
check.out <- var.check(data[, c(iv, mediators, dv)])
if (any(check.out == 2)) {
stop("Factor variables with > 2 response options need to be recoded as integer or numeric variables")
}
data.proc <- caret::preProcess(Data[, c(iv, mediators, dv)])
data2 <- predict(data.proc, Data[, c(iv, mediators, dv)])
iv.mat <- as.matrix(data2[, iv])
mediators.mat <- as.matrix(data2[, mediators])
dv.mat <- as.matrix(data2[, dv])
if (sum(is.na(iv.mat)) > 0 | sum(is.na(mediators.mat)) > 0 | sum(is.na(dv.mat)) > 0) {
stop("Missing values are not allowed")
}
if (var.check(dv.mat) == 0) {
dv.class = "gaussian"
}
else if (var.check(dv.mat) == 1) {
dv.class = "binomial"
}
if(!is.null(seed)){
set.seed(seed)
}
b.cv.lasso = glmnet::cv.glmnet(mediators.mat, dv.mat, alpha = alpha, family = dv.class, standardize = FALSE,
lambda = exp(seq(log(0.001), log(5), length.out = 100)), nfolds=nfolds,
penalty.factor = c(rep(1, ncol(data) - 2), 0))
b.coefs = coef(b.cv.lasso, s = b.cv.lasso$lambda.min)[-1,1]
a.cv.lasso = a.fit.lasso = vector("list", ncol(mediators.mat))
a.lambda = numeric(ncol(mediators.mat))
for (i in 1:ncol(mediators.mat)) {
if (var.check(mediators.mat[, i]) == 0) {
med.class = "gaussian"
}
else if (var.check(mediators.mat[, i]) == 1) {
med.class = "binomial"
}
if(!is.null(seed)){
set.seed(seed)
}
a.cv.lasso[[i]] = glmnet::cv.glmnet(as.matrix(cbind(rnorm(nrow(data), 1, 1e-04), iv.mat)), mediators.mat[, i],
alpha = alpha, family = med.class, standardize = FALSE, nfolds=nfolds,
lambda = exp(seq(log(0.001), log(5), length.out = 100)),
intercept = F, penalty.factor = c(0, 1))
a.lambda[i] = a.cv.lasso[[i]]$lambda.min
}
a.coefs = numeric(length(b.coefs))
for (i in 1:length(a.coefs)) {
if (!is.null(a.cv.lasso[[i]])) {
a.coefs[i] = coef(a.cv.lasso[[i]], s = a.cv.lasso[[i]]$lambda.min)[-1,1][2]
}
}
names(a.coefs) = mediators
res$a.coefs <- a.coefs
res$b.coefs <- b.coefs
indirect = a.coefs * b.coefs
selected <- names(indirect[abs(indirect) > epsilon])
indirect <- as.data.frame(indirect)
indirect = round(indirect, 4)
indirect[abs(indirect) < epsilon] = 0
indirect <- t(indirect)
indirect[abs(indirect) >= epsilon] = as.numeric(indirect[abs(indirect) >= epsilon])
res$a.lambda = a.lambda
res$b.lambda = b.cv.lasso$lambda.min
res$selected = selected
res$indirect <- indirect
res$call <- match.call()
class(res) <- "xmed"
return(res)
}
|