1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165
|
#Resampling optimism of discrimination and reliability of a logistic
#regression model
#B: # reps
#bw=T to incorporate backward stepdown (using fastbw) with params rule,type,sls
#pr=T to print results of each bootstrap rep
validate.lrm <- function(fit,method="boot",
B=40, bw=FALSE, rule="aic", type="residual",
sls=.05, aics=0, force=NULL, estimates=TRUE, pr=FALSE,
kint,
Dxy.method=if(k==1)"somers2" else "lrm",
emax.lim=c(0,1), ...)
{
k <- fit$non.slopes
y <- fit$y
if(length(y)==0) stop("fit did not use x=TRUE,y=TRUE")
if(!is.factor(y)) y <- factor(y) ## was category 11Apr02
fit$y <- unclass(y) - 1 #mainly for Brier score (B)
if(missing(kint)) kint <- floor((k+1)/2)
penalty.matrix <- fit$penalty.matrix
discrim <- function(x, y, fit, iter, evalfit=FALSE, pr=FALSE,
Dxy.method="somers2",
penalty.matrix, kint, ...)
{
if(evalfit) { # Fit was for bootstrap sample
stats <- fit$stats
lr <- stats["Model L.R."]
Dxy <- if(Dxy.method=="lrm") stats["Dxy"] else
somers2(x,y)["Dxy"]
intercept <- 0
shrink <- 1
n <- stats["Obs"]
D <- (lr - 1)/n
U <- -2/n
Q <- D - U
R2 <- stats["R2"]
g <- stats['g']
gp <- stats['gp']
}
else {
k <- fit$non.slopes
null.model <- length(fit$coefficients)==k
refit <- if(null.model) lrm.fit(y=y) else lrm.fit(x,y,tol=1e-13)
kr <- refit$non.slopes
## Model with no variables = null model
stats <- refit$stats
lr <- stats["Model L.R."]
Dxy <- if(Dxy.method=="lrm") stats["Dxy"] else
somers2(x,y)["Dxy"]
intercept <- refit$coefficients[kint]
shrink <- if(null.model) 1 else refit$coefficients[kr + 1]
n <- stats["Obs"]
D <- (lr-1)/n
L01 <- -2 * sum( (y >= kint)*x - logb(1 + exp(x)), na.rm=TRUE)
U <- (L01 - refit$deviance[2] - 2)/n
Q <- D - U
R2 <- stats["R2"]
g <- GiniMd(shrink*x)
gp <- GiniMd(plogis(intercept + shrink*x))
}
P <- plogis(x) # 1/(1+exp(-x))
B <- sum(((y >= kint) - P)^2)/n
z <- c(Dxy, R2, intercept, shrink, D, U, Q, B, g, gp)
names(z) <- c("Dxy", "R2", "Intercept", "Slope", "D", "U", "Q", "B",
"g", "gp")
z
}
lrmfit <- function(x, y, maxit=12, tol=1e-7, penalty.matrix=NULL,
xcol=NULL, ...)
{
if(length(xcol) && length(penalty.matrix) > 0)
penalty.matrix <- penalty.matrix[xcol, xcol, drop=FALSE]
lrm.fit(x, y, maxit=maxit, penalty.matrix=penalty.matrix, tol=tol)
}
z <- predab.resample(fit, method=method, fit=lrmfit, measure=discrim, pr=pr,
B=B, bw=bw, rule=rule, type=type, sls=sls, aics=aics,
force=force, estimates=estimates, Dxy.method=Dxy.method,
non.slopes.in.x=FALSE,
penalty.matrix=penalty.matrix, kint=kint, ...)
kept <- attr(z, 'kept')
calib <- z[3:4,5]
p <- seq(emax.lim[1],emax.lim[2],.0005)
L <- logb(p/(1-p))
P <- plogis(calib[1]+calib[2]*L) # 1/(1+exp(-calib[1]-calib[2]*L))
emax <- max(abs(p-P), na.rm=TRUE)
z <- rbind(z[1:4,],c(0,0,emax,emax,emax,z[1,6]),z[5:nrow(z),])
dimnames(z) <- list(c("Dxy", "R2","Intercept", "Slope", "Emax", "D", "U", "Q",
"B", "g", "gp"),
c("index.orig","training","test","optimism",
"index.corrected","n"))
structure(z, class='validate', kept=kept)
}
validate.orm <- function(fit, method="boot",
B=40, bw=FALSE, rule="aic", type="residual",
sls=.05, aics=0, force=NULL, estimates=TRUE, pr=FALSE, ...)
{
k <- fit$non.slopes
y <- fit$y
if(length(y)==0) stop("fit did not use x=TRUE, y=TRUE")
if(!is.factor(y)) y <- factor(y)
penalty.matrix <- fit$penalty.matrix
discrim <- function(x, y, fit, iter, evalfit=FALSE, pr=FALSE,
penalty.matrix, ...)
{
if(evalfit) { # Fit was for bootstrap sample
stats <- fit$stats
lr <- stats["Model L.R."]
rho <- stats["rho"]
shrink <- 1
n <- stats["Obs"]
R2 <- stats["R2"]
g <- stats['g']
pdm <- stats['pdm']
}
else {
k <- fit$non.slopes
null.model <- length(fit$coefficients)==k
refit <- if(null.model) ormfit2(y=y) else ormfit2(x, y, tol=1e-13)
kr <- refit$non.slopes
## Model with no variables = null model
stats <- refit$stats
lr <- stats["Model L.R."]
rho <- stats['rho']
shrink <- if(null.model) 1 else refit$coefficients[kr + 1]
n <- stats["Obs"]
R2 <- stats["R2"]
g <- GiniMd(shrink*x)
pdm <- stats['pdm']
}
z <- c(rho, R2, shrink, g, pdm)
names(z) <- c("rho", "R2", "Slope", "g", "pdm")
z
}
ormfit2 <- function(x, y, maxit=12, tol=1e-7, penalty.matrix=NULL,
xcol=NULL, ...)
{
if(length(xcol) && length(penalty.matrix) > 0)
penalty.matrix <- penalty.matrix[xcol, xcol, drop=FALSE]
# x has names() like y >= ... - DROP ??
# predab.resample is getting constant x as if an intercept
orm.fit(x, y, maxit=maxit, penalty.matrix=penalty.matrix, tol=tol)
}
z <- predab.resample(fit, method=method, fit=ormfit2, measure=discrim, pr=pr,
B=B, bw=bw, rule=rule, type=type, sls=sls, aics=aics,
force=force, estimates=estimates,
non.slopes.in.x=FALSE,
penalty.matrix=penalty.matrix,
allow.varying.intercepts=TRUE, ...)
kept <- attr(z, 'kept')
dimnames(z) <- list(c("rho", "R2", "Slope", "g", "pdm"),
c("index.orig","training","test","optimism",
"index.corrected","n"))
structure(z, class='validate', kept=kept)
}
|