File: which.influence.Rd

package info (click to toggle)
design 2.3-0-2
  • links: PTS
  • area: main
  • in suites: squeeze
  • size: 1,756 kB
  • ctags: 1,113
  • sloc: asm: 15,221; ansic: 5,245; fortran: 627; makefile: 1
file content (92 lines) | stat: -rw-r--r-- 2,623 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
\name{which.influence}
\alias{which.influence}
\alias{show.influence}
\title{
Which Observations are Influential
}
\description{
Creates a list with a component for
each factor in the model.  The names of the components are the factor
names.  Each component contains the observation identifiers of all
observations that are "overly influential" with respect to that factor,
meaning that \eqn{|dfbetas| > u} for at least one \eqn{\beta_i}{beta i}
associated with that factor, for a given \code{cutoff}.  The default \code{cutoff}
is \code{.2}.  The fit must come from a function that has
\code{resid(fit, type="dfbetas")} defined. 


\code{show.influence}, written by Jens Oehlschlaegel-Akiyoshi, applies the
result of \code{which.influence} to a data frame, usually the one used to
fit the model, to report the results.
}
\usage{
which.influence(fit, cutoff=.2)

show.influence(object, dframe, report=NULL, sig=NULL, id=NULL)
}
\arguments{
\item{fit}{
fit object
}
\item{object}{
the result of \code{which.influence}
}
\item{dframe}{
data frame containing observations pertinent to the model fit
}
\item{cutoff}{
cutoff value
}
\item{report}{
other columns of the data frame to report besides those corresponding
to predictors that are influential for some observations
}
\item{sig}{
runs results through \code{signif} with \code{sig} digits if \code{sig} is given
}
\item{id}{
a character vector that labels rows of \code{dframe} if \code{row.names} were
not used
}}
\value{
\code{show.influence} returns a marked dataframe with the first column being
a count of influence values
}
\author{
Frank Harrell\cr
Department of Biostatistics, Vanderbilt University\cr
f.harrell@vanderbilt.edu
\cr

Jens Oehlschlaegel-Akiyoshi\cr
Center for Psychotherapy Research\cr
Christian-Belser-Strasse 79a\cr
D-70597 Stuttgart Germany\cr
oehl@psyres-stuttgart.de
}
\seealso{
\code{\link{residuals.lrm}}, \code{\link{residuals.cph}}, \code{\link{residuals.ols}}, \code{\link{Design}}, \code{\link{lrm}}, \code{\link{ols}}, \code{\link{cph}}
}
\examples{
#print observations in data frame that are influential,
#separately for each factor in the model
x1 <- 1:20
x2 <- abs(x1-10)
x3 <- factor(rep(0:2,length.out=20))
y  <- c(rep(0:1,8),1,1,1,1)
f  <- lrm(y ~ rcs(x1,3) + x2 + x3, x=TRUE,y=TRUE)
w <- which.influence(f, .55)
nam <- names(w)
d   <- data.frame(x1,x2,x3,y)
for(i in 1:length(nam)) {
 print(paste("Influential observations for effect of ",nam[i]),quote=FALSE)
 print(d[w[[i]],])
}


show.influence(w, d)  # better way to show results
}
\keyword{models}
\keyword{regression}
\keyword{survival}
\concept{logistic regression model}