File: somers2.Rd

package info (click to toggle)
hmisc 4.2-0-1
  • links: PTS, VCS
  • area: main
  • in suites: bullseye, buster, sid
  • size: 3,332 kB
  • sloc: asm: 27,116; fortran: 606; ansic: 411; xml: 160; makefile: 2
file content (77 lines) | stat: -rw-r--r-- 2,052 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
\name{somers2}
\alias{somers2}
\title{
Somers' Dxy Rank Correlation
}
\description{
Computes Somers' Dxy rank correlation between a variable \code{x} and a
binary (0-1) variable \code{y}, and the corresponding receiver operating
characteristic curve area \code{c}. Note that \code{Dxy = 2(c-0.5)}.  
\code{somers} allows for a \code{weights} variable, which specifies frequencies
to associate with each observation.
}
\usage{
somers2(x, y, weights=NULL, normwt=FALSE, na.rm=TRUE)
}
\arguments{
\item{x}{
typically a predictor variable. \code{NA}s are allowed.
}
\item{y}{
a numeric outcome variable coded \code{0-1}. \code{NA}s are allowed.
}
\item{weights}{
a numeric vector of observation weights (usually frequencies).  Omit
or specify a zero-length vector to do an unweighted analysis.
}
\item{normwt}{
set to \code{TRUE} to make \code{weights} sum to the actual number of non-missing
observations.
}
\item{na.rm}{
set to \code{FALSE} to suppress checking for NAs.
}}
\value{
a vector with the named elements \code{C}, \code{Dxy}, \code{n} (number of non-missing
pairs), and \code{Missing}. Uses the formula 
\code{C = (mean(rank(x)[y == 1]) - (n1 + 1)/2)/(n - n1)}, where \code{n1} is the
frequency of \code{y=1}.
}
\details{
The \code{rcorr.cens} function, which although slower than \code{somers2} for large
sample sizes, can also be used to obtain Dxy for non-censored binary
\code{y}, and it has the advantage of computing the standard deviation of
the correlation index.
}
\author{
Frank Harrell
\cr
Department of Biostatistics
\cr
Vanderbilt University School of Medicine
\cr
\email{f.harrell@vanderbilt.edu}
}
\seealso{
\code{\link{rcorr.cens}}, \code{\link{rank}}, \code{\link{wtd.rank}}, 
}
\examples{
set.seed(1)
predicted <- runif(200)
dead      <- sample(0:1, 200, TRUE)
roc.area <- somers2(predicted, dead)["C"]

# Check weights
x <- 1:6
y <- c(0,0,1,0,1,1)
f <- c(3,2,2,3,2,1)
somers2(x, y)
somers2(rep(x, f), rep(y, f))
somers2(x, y, f)
}
\keyword{nonparametric}
\concept{logistic regression model}
\concept{predictive accuracy}