1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135
|
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/cutp.R
\name{cutp}
\alias{cutp}
\alias{cutp.coxph}
\alias{cutp.survfit}
\title{\bold{cut p}oint for a continuous variable in a
model fit with \code{coxph} or \code{survfit}.}
\usage{
cutp(x, ...)
\method{cutp}{coxph}(x, ..., defCont = 3)
\method{cutp}{survfit}(x, ..., defCont = 3)
}
\arguments{
\item{x}{A \code{survfit} or \code{coxph} object}
\item{...}{Additional arguments (not implemented).}
\item{defCont}{\bold{def}inition of a \bold{cont}inuous variable.
\cr
If the variable has \eqn{>} \code{defCont} unique values, it
is treated as continuous and a cut point is determined.}
}
\value{
A \code{list} of \code{data.table}s.
\cr
There is one list element per continuous variable.
\cr
Each has a column with possible values of the cut point
(i.e. unique values of the variable), and the
additional columns:
\item{U}{The score (log-rank) test for a model with the variable 'cut'
into into those \eqn{\geq}{>=} the cutpoint and those below.}
\item{Q}{The test statistic.}
\item{p}{The \eqn{p}-value.}
The tables are ordered by \eqn{p}-value, lowest first.
}
\description{
\bold{cut p}oint for a continuous variable in a
model fit with \code{coxph} or \code{survfit}.
Determine the optimal cut point for a continuous variable
in a \code{coxph} or \code{survfit} model.
}
\details{
For a cut point \eqn{\mu}{mu}, of a predictor \eqn{K},
the variable is split
into two groups, those \eqn{\geq \mu}{>= mu} and
those \eqn{< \mu}{< mu}.
\cr
The score (or log-rank) statistic, \eqn{sc},
is calculated for each unique element
\eqn{k} in \eqn{K} and uses
\itemize{
\item \eqn{e_i^+}{e1[i]} the number of events
\item \eqn{n_i^+}{n1[i]} the number at risk
}
in those above the cut point, respectively.
\cr
The basic statistic is
\deqn{sc_k = \sum_{i=1}^D ( e_i^+ - n_i^+ \frac{e_i}{n_i} )}{
sc[k] = sum (e1[i] - n1[i] * e[i] / n[i])}
\cr
The sum is taken across times with observed events, to \eqn{D},
the largest of these.
\cr
It is normalized (standardized), in the case of censoring,
by finding \eqn{\sigma^2}{s^2} which is:
\deqn{\sigma^2 = \frac{1}{D - 1}
\sum_i^D (1 - \sum_{j=1}^i \frac{1}{D+ 1 - j})^2}{
s^2 = (1 / (D - 1)) *
sum[i:D](1 - sum[j:i](1 / (D - j + 1))^2 )}
The test statistic is then
\deqn{Q = \frac{\max |sc_k|}{\sigma \sqrt{D-1}}}{
Q = max(abs(sc[k])) / s * sqrt((D - 1))}
Under the null hypothesis that the chosen cut point
does \emph{not} predict survival,
the distribution of \eqn{Q} has a limiting distibution which
is the supremum of the
absolute value of a Brownian bridge:
\deqn{p = Pr(\sup Q \geq q) = 2 \sum_{i=1}^{\infty}
(-1)^{i + 1} \exp (-2 i^2 q^2)}{
p= P(Q >= q) = 2 sum[i:Inf](-1)^(i + 1) * e^(-2 * i^2 *q^2)}
}
\examples{
## Mandrekar et al. above
data("bmt", package="KMsurv")
b1 <- bmt[bmt$group==1, ] # ALL patients
c1 <- coxph(Surv(t2, d3) ~ z1, data=b1) # z1=age
c1 <- cutp(c1)$z1
data.table::setorder(c1, "z1")
## [] below is used to print data.table to console
c1[]
\dontrun{
## compare to output from survival::coxph
matrix(
unlist(
lapply(26:30,
function(i) c(i, summary(coxph(Surv(t2, d3) ~ z1 >= i, data=b1))$sctest))),
ncol=5,
dimnames=list(c("age", "score_test", "df", "p")))
cutp(coxph(Surv(t2, d3) ~ z1, data=bmt[bmt$group==2, ]))$z1[]
cutp(coxph(Surv(t2, d3) ~ z1, data=bmt[bmt$group==3, ]))[[1]][]
## K&M. Example 8.3, pg 273-274.
data("kidtran", package="KMsurv")
k1 <- kidtran
## patients who are male and black
k2 <- k1[k1$gender==1 & k1$race==2, ]
c2 <- coxph(Surv(time, delta) ~ age, data=k2)
print(cutp(c2))
## check significance of computed value
summary(coxph(Surv(time, delta) ~ age >= 58, data=k2))
k3 <- k1[k1$gender==2 & k1$race==2, ]
c3 <- coxph(Surv(time, delta) ~ age, data=k3)
print(cutp(c3))
## doesn't apply to binary variables e.g. gender
print(cutp(coxph(Surv(time, delta) ~ age + gender, data=k1)))
}
}
\references{
Contal C, O'Quigley J, 1999.
An application of changepoint methods in studying the
effect of age on survival in breast cancer.
\emph{Computational Statistics & Data Analysis} \bold{30}(3):253--70.
\doi{10.1016/S0167-9473(98)00096-6}
Mandrekar JN, Mandrekar, SJ, Cha SS, 2003.
Cutpoint Determination Methods in Survival Analysis using SAS.
\emph{Proceedings of the 28th SAS Users Group International Conference (SUGI)}. Paper 261-28.
\href{https://support.sas.com/resources/papers/proceedings/proceedings/sugi28/261-28.pdf}{SAS (free)}
}
|