1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133
|
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/movStats.r
\name{movStats}
\alias{movStats}
\title{movStats}
\usage{
movStats(
formula,
stat = NULL,
discrete = FALSE,
space = c("n", "x"),
eps = if (space == "n") 15,
varyeps = FALSE,
nignore = 10,
xinc = NULL,
xlim = NULL,
times = NULL,
tunits = "year",
msmooth = c("smoothed", "raw", "both"),
tsmooth = c("supsmu", "lowess"),
bass = 8,
span = 1/4,
maxdim = 6,
penalty = NULL,
trans = function(x) x,
itrans = function(x) x,
loess = FALSE,
ols = FALSE,
qreg = FALSE,
lrm = FALSE,
orm = FALSE,
hare = FALSE,
ordsurv = FALSE,
lrm_args = NULL,
family = "logistic",
k = 5,
tau = (1:3)/4,
melt = FALSE,
data = environment(formula),
pr = c("none", "kable", "plain", "margin")
)
}
\arguments{
\item{formula}{a formula with the analysis variable on the left and the x-variable on the right, following by optional stratification variables}
\item{stat}{function of one argument that returns a named list of computed values. Defaults to computing mean and quartiles + N except when y is binary in which case it computes moving proportions. If y has two columns the default statistics are Kaplan-Meier estimates of cumulative incidence at a vector of \code{times}.}
\item{discrete}{set to \code{TRUE} if x-axis variable is discrete and no intervals should be created for windows}
\item{space}{defines whether intervals used fixed width or fixed sample size}
\item{eps}{tolerance for window (half width of window). For \code{space='x'} is in data units, otherwise is the sample size for half the window, not counting the middle target point.}
\item{varyeps}{applies to \code{space='n'} and causes a smaller \code{eps} to be used in strata with fewer than `` observations so as to arrive at three x points}
\item{nignore}{see description, default is to exclude \code{nignore=10} points on the left and right tails from estimation and plotting}
\item{xinc}{increment in x to evaluate stats, default is xlim range/100 for \code{space='x'}. For \code{space='n'} \code{xinc} defaults to m observations, where m = max(n/200, 1).}
\item{xlim}{2-vector of limits to evaluate if \code{space='x'} (default is \code{nignore} smallest to \code{nignore} largest)}
\item{times}{vector of times for evaluating one minus Kaplan-Meier estimates}
\item{tunits}{time units when \code{times} is given}
\item{msmooth}{set to \code{'smoothed'} or \code{'both'} to compute \code{lowess}-smooth moving estimates. \code{msmooth='both'} will display both. \code{'raw'} will display only the moving statistics. \code{msmooth='smoothed'} (the default) will display only he smoothed moving estimates.}
\item{tsmooth}{defaults to the super-smoother \code{'supsmu'} for after-moving smoothing. Use \code{tsmooth='lowess'} to instead use \code{lowess}.}
\item{bass}{the \code{supsmu} \code{bass} parameter used to smooth the moving statistics if \code{tsmooth='supsmu'}. The default of 8 represents quite heavy smoothing.}
\item{span}{the \code{lowess} \code{span} used to smooth the moving statistics}
\item{maxdim}{passed to \code{hare}, default is 6}
\item{penalty}{passed to \code{hare}, default is to use BIC. Specify 2 to use AIC.}
\item{trans}{transformation to apply to x}
\item{itrans}{inverse transformation}
\item{loess}{set to TRUE to also compute loess estimates}
\item{ols}{set to TRUE to include rcspline estimate of mean using ols}
\item{qreg}{set to TRUE to include quantile regression estimates w rcspline}
\item{lrm}{set to TRUE to include logistic regression estimates w rcspline}
\item{orm}{set to TRUE to include ordinal logistic regression estimates w rcspline (mean + quantiles in \code{tau})}
\item{hare}{set to TRUE to include hazard regression estimtes of incidence at \code{times}, using the \code{polspline} package}
\item{ordsurv}{set to TRUE to include ordinal regression estimates of incidence at \code{times}, using the \code{rms} package \code{adapt_orm} and \code{survest.orm} functions}
\item{lrm_args}{a \code{list} of optional arguments to pass to \code{lrm} when \code{lrm=TRUE}, e.g., \code{list(maxit=20)}}
\item{family}{link function for ordinal regression (see \code{rms::orm})}
\item{k}{number of knots to use for ols, lrm, qreg restricted cubic splines. Linearity is forced for binary \code{y} when the minimum of the number of events and number of non-events is below 10 for a by-group. For \code{ordsurv=TRUE} is the maximum number of knots tried and is passed as argument \code{maxk} to the \code{rms} \code{adapt_orm} function.}
\item{tau}{quantile numbers to estimate with quantile regression}
\item{melt}{set to TRUE to melt data table and derive Type and Statistic}
\item{data}{data.table or data.frame, default is calling frame}
\item{pr}{defaults to no printing of window information. Use \code{pr='plain'} to print in the ordinary way, \verb{pr='kable} to convert the object to \code{knitr::kable} and print, or \code{pr='margin'} to convert to \code{kable} and place in the \code{Quarto} right margin. For the latter two \code{results='asis'} must be in the chunk header.}
}
\value{
a data table, with attribute \code{infon} which is a data frame with rows corresponding to strata and columns \code{N}, \code{Wmean}, \code{Wmin}, \code{Wmax} if \code{stat} computed \code{N}. These summarize the number of observations used in the windows. If \code{varyeps=TRUE} there is an additional column \code{eps} with the computed per-stratum \code{eps}. When \code{space='n'} and \code{xinc} is not given, the computed \code{xinc} also appears as a column. An additional attribute \code{info} is a \code{kable} object ready for printing to describe the window characteristics.
}
\description{
Moving Estimates Using Overlapping Windows
}
\details{
Function to compute moving averages and other statistics as a function
of a continuous variable, possibly stratified by other variables.
Estimates are made by creating overlapping moving windows and
computing the statistics defined in the stat function for each window.
The default method, \code{space='n'} creates varying-width intervals each having a sample size of \code{2*eps +1}, and the smooth estimates are made every \code{xinc} observations. Outer intervals are not symmetric in sample size (but the mean x in those intervals will reflect that) unless \code{eps=nignore}, as outer intervals are centered at observations \code{nignore} and \code{n - nignore + 1} where the default for \code{nignore} is 10. The mean x-variable within each windows is taken to represent that window. If \code{trans} and \code{itrans} are given, x means are computed on the \code{trans(x)} scale and then \code{itrans}'d. For \code{space='x'}, by default estimates are made on to the \code{nignore} smallest to the \code{nignore} largest
observed values of the x variable to avoid extrapolation and to
help getting the moving statistics off on an adequate start for
the left tail. Also by default the moving estimates are smoothed using \code{supsmu}.
When \code{melt=TRUE} you can feed the result into \code{ggplot} like this:
\verb{ggplot(w, aes(x=age, y=crea, col=Type)) + geom_line() +}
\code{facet_wrap(~ Statistic)}
See \href{https://hbiostat.org/rflow/analysis.html#sec-analysis-assoc}{here} for several examples.
}
\author{
Frank Harrell
}
|