File: survtab_ag.Rd

package info (click to toggle)
r-cran-popepi 0.4.13%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 1,656 kB
  • sloc: sh: 13; makefile: 2
file content (422 lines) | stat: -rw-r--r-- 16,734 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/survival_aggregated.R
\name{survtab_ag}
\alias{survtab_ag}
\title{Estimate Survival Time Functions}
\usage{
survtab_ag(
  formula = NULL,
  data,
  adjust = NULL,
  weights = NULL,
  surv.breaks = NULL,
  n = "at.risk",
  d = "from0to1",
  n.cens = "from0to0",
  pyrs = "pyrs",
  d.exp = "d.exp",
  n.pp = NULL,
  d.pp = "d.pp",
  d.pp.2 = "d.pp.2",
  n.cens.pp = "n.cens.pp",
  pyrs.pp = "pyrs.pp",
  d.exp.pp = "d.exp.pp",
  surv.type = "surv.rel",
  surv.method = "hazard",
  relsurv.method = "e2",
  subset = NULL,
  conf.level = 0.95,
  conf.type = "log-log",
  verbose = FALSE
)
}
\arguments{
\item{formula}{a \code{formula}; the response
must be the time scale to compute survival time function estimates
over, e.g. \code{fot ~ sex}. Variables on the right-hand side of the formula
separated by \code{+} are considered stratifying variables, for which
estimates are computed separately. May contain usage of \code{adjust()}
--- see Details and Examples.}

\item{data}{since popEpi 0.4.0, a \code{data.frame}
containing variables used in \code{formula} and other arguments.
\code{aggre} objects are recommended as they contain information on any
time scales and are therefore safer; for creating \code{aggre} objects see
\verb{[as.aggre]} when your data is already aggregated and \code{aggre}
for aggregating split \code{Lexis} objects.}

\item{adjust}{can be used as an alternative to passing variables to
argument \code{formula} within a call to \code{adjust()}; e.g.
\code{adjust = "agegr"}. \link[=flexible_argument]{Flexible input}.}

\item{weights}{typically a list of weights or a \code{character} string
specifying an age group standardization scheme; see
the \link[=direct_standardization]{dedicated help page}
and examples. NOTE: \code{weights = "internal"} is based on the counts
of persons in follow-up at the start of follow-up (typically T = 0)}

\item{surv.breaks}{a vector of breaks on the
survival time scale. Optional if \code{data} is an \code{aggre} object
and mandatory otherwise. Must define each intended interval;
e.g. \code{surv.breaks = 0:5} when data has intervals defined by
breaks \code{seq(0, 5, 1/12)} will aggregate to wider intervals first.
It is generally recommended (and sufficient;
see Seppa, Dyban and Hakulinen (2015)) to use monthly
intervals where applicable.}

\item{n}{variable containing counts of subjects at-risk at the start of a
time interval; e.g. \code{n = "at.risk"}.
Required when \code{surv.method = "lifetable"}.
\link[=flexible_argument]{Flexible input}.}

\item{d}{variable(s) containing counts of subjects experiencing an event.
With only one type of event, e.g. \code{d = "deaths"}. With multiple types of
events (for CIF or cause-specific survival estimation), supply e.g.
\code{d = c("canD", "othD")}. If the survival time function to be estimated
does not use multiple types of events, supplying more than one variable
to \code{d} simply causes the variables to be added together.
Always required. \link[=flexible_argument]{Flexible input}.}

\item{n.cens}{variable containing counts of subjects censored during a
survival time interval; E.g. \code{n.cens = "alive"}.
Required when \code{surv.method = "lifetable"}.
\link[=flexible_argument]{Flexible input}.}

\item{pyrs}{variable containing total subject-time accumulated within a
survival time interval; E.g. \code{pyrs = "pyrs"}.
Required when \code{surv.method = "hazard"}. Flexible input.}

\item{d.exp}{variable denoting total "expected numbers of events"
(typically computed \code{pyrs * pop.haz}, where
\code{pop.haz} is the expected hazard level)
accumulated within a survival time interval; E.g. \code{pyrs = "pyrs"}.
Required when computing EdererII relative survivals or
CIFs based on excess counts of events. Flexible input.}

\item{n.pp}{variable containing total Pohar-Perme weighted counts of
subjects at risk in an interval,
supplied as argument \code{n} is supplied.
Computed originally on the subject
level as analogous to \code{pp * as.integer(status == "at-risk")}.
Required when \code{relsurv.method = "pp"}. Flexible input.}

\item{d.pp}{variable(s) containing Pohar-Perme weighted counts of events,
supplied as argument \code{d} is supplied. Computed originally on the subject
level as analogous to \code{pp * as.integer(status == some_event)}.
Required when \code{relsurv.method = "pp"}. Flexible input.}

\item{d.pp.2}{variable(s) containing total Pohar-Perme
"double-weighted" counts of events,
supplied as argument \code{d} is supplied. Computed originally on the subject
level as analogous to \code{pp * pp * as.integer(status == some_event)}.
Required when \code{relsurv.method = "pp"}. Flexible input.}

\item{n.cens.pp}{variable containing total Pohar-Perme weighted counts
censorings,
supplied as argument \code{n.cens} is supplied.
Computed originally on the subject
level as analogous to \code{pp * as.integer(status == "censored")}.
Required when \code{relsurv.method = "pp"}. Flexible input.}

\item{pyrs.pp}{variable containing total Pohar-Perme weighted subject-times,
supplied as argument \code{pyrs} is supplied.
Computed originally on the subject
level as analogous to \code{pp * pyrs}.
Required when \code{relsurv.method = "pp"}. Flexible input.}

\item{d.exp.pp}{variable containing total Pohar-Perme weighted counts
of excess events,
supplied as argument \code{pyrs} is supplied.
Computed originally on the subject
level as analogous to \code{pp * d.exp}.
Required when \code{relsurv.method = "pp"}. Flexible input.}

\item{surv.type}{one of \code{'surv.obs'},
\code{'surv.cause'}, \code{'surv.rel'},
\code{'cif.obs'} or \code{'cif.rel'};
defines what kind of survival time function(s) is/are estimated; see Details}

\item{surv.method}{either \code{'lifetable'} or \code{'hazard'}; determines
the method of calculating survival time functions, where the former computes
ratios such as \code{p = d/(n - n.cens)}
and the latter utilizes subject-times
(typically person-years) for hazard estimates such as \code{d/pyrs}
which are used to compute survival time function estimates.
The former method requires argument \code{n.cens} and the latter
argument \code{pyrs} to be supplied.}

\item{relsurv.method}{either \code{'e2'} or \code{'pp'};
defines whether to compute relative survival using the
EdererII method or using Pohar-Perme weighting;
ignored if \code{surv.type != "surv.rel"}}

\item{subset}{a logical condition; e.g. \code{subset = sex == 1};
subsets the data before computations}

\item{conf.level}{confidence level used in confidence intervals;
e.g. \code{0.95} for 95 percent confidence intervals}

\item{conf.type}{character string; must be one of \code{"plain"},
\code{"log-log"} and \code{"log"};
defines the transformation used on the survival time
function to yield confidence
intervals via the delta method}

\item{verbose}{logical; if \code{TRUE}, the function is chatty and
returns some messages and timings along the process}
}
\value{
Returns a table of life time function values and other
information with survival intervals as rows.
Returns some of the following estimates of survival time functions:

\itemize{
\item \code{surv.obs} - observed (raw, overall) survival
\item \code{surv.obs.K} - observed cause-specific survival for cause K
\item \code{CIF_k} - cumulative incidence function for cause \code{k}
\item \code{CIF.rel} - cumulative incidence function using excess cases
\item \code{r.e2} -  relative survival, EdererII
\item \code{r.pp} -  relative survival, Pohar-Perme weighted
}
The suffix \code{.as} implies adjusted estimates, and \code{.lo} and
\code{.hi} imply lower and upper confidence limits, respectively.
The prefix \code{SE.} stands for standard error.
}
\description{
This function estimates survival time functions: survival,
relative/net survival, and crude/absolute risk functions (CIF).
}
\section{Basics}{


This function computes interval-based estimates of survival time functions,
where the intervals are set by the user. For product-limit-based
estimation see packages \pkg{survival} and \pkg{relsurv}.

if \code{surv.type = 'surv.obs'}, only 'raw' observed survival
is estimated over the chosen time intervals. With
\code{surv.type = 'surv.rel'}, also relative survival estimates
are supplied in addition to observed survival figures.

\code{surv.type = 'cif.obs'} requests cumulative incidence functions (CIF)
to be estimated.
CIFs are estimated for each competing risk based
on a survival-interval-specific proportional hazards
assumption as described by Chiang (1968).
With \code{surv.type = 'cif.rel'}, a CIF is estimated with using
excess cases as the ''cause-specific'' cases. Finally, with
\code{surv.type = 'surv.cause'}, cause-specific survivals are
estimated separately for each separate type of event.

In hazard-based estimation (\code{surv.method = "hazard"}) survival
time functions are transformations of the estimated corresponding hazard
in the intervals. The hazard itself is estimated using counts of events
(or excess events) and total subject-time in the interval. Life table
\code{surv.method = "lifetable"} estimates are constructed as transformations
of probabilities computed using counts of events and counts of subjects
at risk.

The vignette \href{../doc/survtab_examples.html}{survtab_examples}
has some practical examples.
}

\section{Relative survival}{


When \code{surv.type = 'surv.rel'}, the user can choose
\code{relsurv.method = 'pp'}, whereupon Pohar-Perme weighting is used.
By default \code{relsurv.method = 'e2'}, i.e. the Ederer II method
is used to estimate relative survival.
}

\section{Adjusted estimates}{


Adjusted estimates in this context mean computing estimates separately
by the levels of adjusting variables and returning weighted averages
of the estimates. For example, computing estimates separately by
age groups and returning a weighted average estimate (age-adjusted estimate).

Adjusting requires specification of both the adjusting variables and
the weights for all the levels of the adjusting variables. The former can be
accomplished by using \code{adjust()} with the argument \code{formula},
or by supplying variables directly to argument \code{adjust}. E.g. the
following are all equivalent:

\code{formula = fot ~ sex + adjust(agegr) + adjust(area)}

\code{formula = fot ~ sex + adjust(agegr, area)}

\verb{formula  = fot ~ sex, adjust = c("agegr", "area")}

\verb{formula  = fot ~ sex, adjust = list(agegr, area)}

The adjusting variables must match with the variable names in the
argument \code{weights};
see the \link[=direct_standardization]{dedicated help page}.
Typically weights are supplied as a \code{list} or
a \code{data.frame}. The former can be done by e.g.

\code{weights = list(agegr = VEC1, area = VEC2)},

where \code{VEC1} and \code{VEC2} are vectors of weights (which do not
have to add up to one). See
\href{../doc/survtab_examples.html}{survtab_examples}
for an example of using a \code{data.frame} to pass weights.
}

\section{Period analysis and other data selection schemes}{


To calculate e.g. period analysis (delayed entry) estimates,
limit the data when/before supplying to this function.See
\href{../doc/survtab_examples.html}{survtab_examples}.
}

\section{Data requirements}{


\code{survtab_ag} computes estimates of survival time functions using
pre-aggregated data. For using subject-level data directly, use
\verb{[survtab]}. For aggregating data, see \verb{[lexpand]}
and \verb{[aggre]}.

By default, and if data is an \code{aggre} object (not mandatory),
\code{survtab_ag} makes use of the exact same breaks that were used in
splitting the original data (with e.g. \code{lexpand}), so it is not
necessary to specify any \code{surv.breaks}. If specified, the
\code{surv.breaks} must be a subset of the pertinent
pre-existing breaks. When data is not an \code{aggre} object, breaks
must always be specified. Interval lengths (\code{delta} in output) are
also calculated based on whichever breaks are used,
so the upper limit of the breaks should
therefore be meaningful and never e.g. \code{Inf}.
}

\examples{
## see more examples with explanations in vignette("survtab_examples")

#### survtab_ag usage

data("sire", package = "popEpi")
## prepare data for e.g. 5-year "period analysis" for 2008-2012
## note: sire is a simulated cohort integrated into popEpi.
BL <- list(fot=seq(0, 5, by = 1/12),
           per = c("2008-01-01", "2013-01-01"))
x <- lexpand(sire, birth = bi_date, entry = dg_date, exit = ex_date,
             status = status \%in\% 1:2,
             breaks = BL,
             pophaz = popmort,
             aggre = list(fot))

## calculate relative EdererII period method
## NOTE: x is an aggre object here, so surv.breaks are deduced
## automatically
st <- survtab_ag(fot ~ 1, data = x)

summary(st, t = 1:5) ## annual estimates
summary(st, q = list(r.e2 = 0.75)) ## 1st interval where r.e2 < 0.75 at end
\donttest{
plot(st)


## non-aggre data: first call to survtab_ag would fail
df <- data.frame(x)
# st <- survtab_ag(fot ~ 1, data = x)
st <- survtab_ag(fot ~ 1, data = x, surv.breaks = BL$fot)

## calculate age-standardised 5-year relative survival ratio using
## Ederer II method and period approach

sire$agegr <- cut(sire$dg_age,c(0,45,55,65,75,Inf),right=FALSE)
BL <- list(fot=seq(0, 5, by = 1/12),
           per = c("2008-01-01", "2013-01-01"))
x <- lexpand(sire, birth = bi_date, entry = dg_date, exit = ex_date,
             status = status \%in\% 1:2,
             breaks = BL,
             pophaz = popmort,
             aggre = list(agegr, fot))

## age standardisation using internal weights (age distribution of
## patients diagnosed within the period window)
## (NOTE: what is done here is equivalent to using weights = "internal")
w <- aggregate(at.risk ~ agegr, data = x[x$fot == 0], FUN = sum)
names(w) <- c("agegr", "weights")

st <- survtab_ag(fot ~ adjust(agegr), data = x, weights = w)
plot(st, y = "r.e2.as", col = c("blue"))

## age standardisation using ICSS1 weights
data(ICSS)
cut <- c(0, 45, 55, 65, 75, Inf)
agegr <- cut(ICSS$age, cut, right = FALSE)
w <- aggregate(ICSS1~agegr, data = ICSS, FUN = sum)
names(w) <- c("agegr", "weights")

st <- survtab_ag(fot ~ adjust(agegr), data = x, weights = w)
lines(st, y = "r.e2.as", col = c("red"))


## cause-specific survival
sire$stat <- factor(sire$status, 0:2, c("alive", "canD", "othD"))
x <- lexpand(sire, birth = bi_date, entry = dg_date, exit = ex_date,
             status = stat,
             breaks = BL,
             pophaz = popmort,
             aggre = list(agegr, fot))
st <- survtab_ag(fot ~ adjust(agegr), data = x, weights = w,
                 d = c("fromalivetocanD", "fromalivetoothD"),
                 surv.type = "surv.cause")
plot(st, y = "surv.obs.fromalivetocanD.as")
lines(st, y = "surv.obs.fromalivetoothD.as", col = "red")


}
}
\references{
Perme, Maja Pohar, Janez Stare, and Jacques Esteve.
"On estimation in relative survival." Biometrics 68.1 (2012): 113-120.
\doi{10.1111/j.1541-0420.2011.01640.x}

Hakulinen, Timo, Karri Seppa, and Paul C. Lambert.
"Choosing the relative survival method for cancer survival estimation."
European Journal of Cancer 47.14 (2011): 2202-2210.
\doi{10.1016/j.ejca.2011.03.011}

Seppa, Karri, Timo Hakulinen, and Arun Pokhrel.
"Choosing the net survival method for cancer survival estimation."
European Journal of Cancer (2013).
\doi{10.1016/j.ejca.2013.09.019}

CHIANG, Chin Long. Introduction to stochastic processes in biostatistics.
1968. ISBN-14: 978-0471155003

Seppa K., Dyba T. and Hakulinen T.: Cancer Survival,
Reference Module in Biomedical Sciences. Elsevier. 08-Jan-2015.
\doi{10.1016/B978-0-12-801238-3.02745-8}
}
\seealso{
\verb{[splitMulti]}, \verb{[lexpand]},
\verb{[ICSS]}, \verb{[sire]}
\href{../doc/survtab_examples.html}{The survtab_examples vignette}

Other main functions: 
\code{\link{Surv}()},
\code{\link{rate}()},
\code{\link{relpois}()},
\code{\link{relpois_ag}()},
\code{\link{sir}()},
\code{\link{sirspline}()},
\code{\link{survmean}()},
\code{\link{survtab}()}

Other survtab functions: 
\code{\link{Surv}()},
\code{\link{lines.survtab}()},
\code{\link{plot.survtab}()},
\code{\link{print.survtab}()},
\code{\link{summary.survtab}()},
\code{\link{survtab}()}
}
\concept{main functions}
\concept{survtab functions}