File: cutp.Rd

package info (click to toggle)
r-cran-survmisc 0.5.6-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, forky, sid, trixie
  • size: 504 kB
  • sloc: makefile: 5
file content (135 lines) | stat: -rwxr--r-- 4,544 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/cutp.R
\name{cutp}
\alias{cutp}
\alias{cutp.coxph}
\alias{cutp.survfit}
\title{\bold{cut p}oint for a continuous variable in a
model fit with \code{coxph} or \code{survfit}.}
\usage{
cutp(x, ...)

\method{cutp}{coxph}(x, ..., defCont = 3)

\method{cutp}{survfit}(x, ..., defCont = 3)
}
\arguments{
\item{x}{A \code{survfit} or \code{coxph} object}

\item{...}{Additional arguments (not implemented).}

\item{defCont}{\bold{def}inition of a \bold{cont}inuous variable.
\cr
If the variable has \eqn{>} \code{defCont} unique values, it
is treated as continuous and a cut point is determined.}
}
\value{
A \code{list} of \code{data.table}s.
 \cr
There is one list element per continuous variable.
 \cr
Each has a column with possible values of the cut point
 (i.e. unique values of the variable), and the
additional columns:
 \item{U}{The score (log-rank) test for a model with the variable 'cut'
  into into those \eqn{\geq}{>=} the cutpoint and those below.}
 \item{Q}{The test statistic.}
 \item{p}{The \eqn{p}-value.}
The tables are ordered by \eqn{p}-value, lowest first.
}
\description{
\bold{cut p}oint for a continuous variable in a
model fit with \code{coxph} or \code{survfit}.

Determine the optimal cut point for a continuous variable
in a \code{coxph} or \code{survfit} model.
}
\details{
For a cut point \eqn{\mu}{mu}, of a predictor \eqn{K},
the variable is split
into two groups, those \eqn{\geq \mu}{>= mu} and
those \eqn{< \mu}{< mu}.
 \cr
The score (or log-rank) statistic, \eqn{sc},
is calculated for each unique element
\eqn{k} in \eqn{K} and uses
\itemize{
 \item \eqn{e_i^+}{e1[i]} the number of events
 \item \eqn{n_i^+}{n1[i]} the number at risk
}
in those above the cut point, respectively.
 \cr
The basic statistic is 
\deqn{sc_k = \sum_{i=1}^D ( e_i^+ - n_i^+ \frac{e_i}{n_i} )}{
      sc[k] = sum (e1[i] - n1[i] * e[i] / n[i])}
 \cr
The sum is taken across times with observed events, to \eqn{D},
the largest of these.
 \cr
It is normalized (standardized), in the case of censoring,
by finding \eqn{\sigma^2}{s^2} which is:
\deqn{\sigma^2 = \frac{1}{D - 1}
                 \sum_i^D (1 - \sum_{j=1}^i \frac{1}{D+ 1 - j})^2}{
      s^2 = (1 / (D - 1)) *
            sum[i:D](1 - sum[j:i](1 / (D - j + 1))^2 )}
The test statistic is then
\deqn{Q = \frac{\max |sc_k|}{\sigma \sqrt{D-1}}}{
      Q = max(abs(sc[k])) / s * sqrt((D - 1))}
Under the null hypothesis that the chosen cut point
does \emph{not} predict survival,
the distribution of \eqn{Q} has a limiting distibution which
is the supremum of the
absolute value of a Brownian bridge:
\deqn{p = Pr(\sup Q \geq q) = 2 \sum_{i=1}^{\infty}
                             (-1)^{i + 1} \exp (-2 i^2 q^2)}{
      p= P(Q >= q) = 2 sum[i:Inf](-1)^(i + 1) * e^(-2 * i^2 *q^2)}
}
\examples{
## Mandrekar et al. above
data("bmt", package="KMsurv")
b1 <- bmt[bmt$group==1, ] # ALL patients
c1 <- coxph(Surv(t2, d3) ~ z1, data=b1) # z1=age
c1 <- cutp(c1)$z1
data.table::setorder(c1, "z1")
## [] below is used to print data.table to console
c1[]

\dontrun{
## compare to output from survival::coxph
matrix(
    unlist(
        lapply(26:30,
               function(i) c(i, summary(coxph(Surv(t2, d3) ~ z1 >= i, data=b1))$sctest))),
    ncol=5,
    dimnames=list(c("age", "score_test", "df", "p")))
cutp(coxph(Surv(t2, d3) ~ z1, data=bmt[bmt$group==2, ]))$z1[]
cutp(coxph(Surv(t2, d3) ~ z1, data=bmt[bmt$group==3, ]))[[1]][]
## K&M. Example 8.3, pg 273-274.
data("kidtran", package="KMsurv")
k1 <- kidtran
## patients who are male and black
k2 <- k1[k1$gender==1 & k1$race==2, ]
c2 <- coxph(Surv(time, delta) ~ age, data=k2)
print(cutp(c2))
## check significance of computed value
summary(coxph(Surv(time, delta) ~ age >= 58, data=k2))
k3 <- k1[k1$gender==2 & k1$race==2, ]
c3 <- coxph(Surv(time, delta) ~ age, data=k3)
print(cutp(c3))
## doesn't apply to binary variables e.g. gender
print(cutp(coxph(Surv(time, delta) ~ age + gender, data=k1)))
}

}
\references{
Contal C, O'Quigley J, 1999.
An application of changepoint methods in studying the
effect of age on survival in breast cancer.
\emph{Computational Statistics & Data Analysis} \bold{30}(3):253--70.
\doi{10.1016/S0167-9473(98)00096-6}

Mandrekar JN, Mandrekar, SJ, Cha SS, 2003.
Cutpoint Determination Methods in Survival Analysis using SAS.
\emph{Proceedings of the 28th SAS Users Group International Conference (SUGI)}. Paper 261-28.
\href{https://support.sas.com/resources/papers/proceedings/proceedings/sugi28/261-28.pdf}{SAS (free)}
}