File: mChoice.Rd

package info (click to toggle)
hmisc 4.2-0-1
  • links: PTS, VCS
  • area: main
  • in suites: bullseye, buster, sid
  • size: 3,332 kB
  • sloc: asm: 27,116; fortran: 606; ansic: 411; xml: 160; makefile: 2
file content (250 lines) | stat: -rw-r--r-- 8,199 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
\name{mChoice}
\alias{mChoice}
\alias{format.mChoice}
\alias{print.mChoice}
\alias{summary.mChoice}
\alias{as.character.mChoice}
\alias{as.double.mChoice}
\alias{inmChoice}
\alias{match.mChoice}
\alias{[.mChoice}
\alias{print.summary.mChoice}
\alias{is.mChoice}
\alias{Math.mChoice}
\alias{Ops.mChoice}
\alias{Summary.mChoice}
\title{Methods for Storing and Analyzing Multiple Choice Variables}
\description{
  \code{mChoice} is a function that is useful for grouping 
  variables that represent
  individual choices on a multiple choice question.  These choices are
  typically factor or character values but may be of any type.  Levels
  of component factor variables need not be the same; all unique levels
  (or unique character values) are collected over all of the multiple
  variables.  Then a new character vector is formed with integer choice
  numbers separated by semicolons.  Optimally, a database system would
  have exported the semicolon-separated character strings with a
  \code{levels} attribute containing strings defining value labels
  corresponding to the integer choice numbers.  \code{mChoice} is a
  function for creating a multiple-choice variable after the fact.
  \code{mChoice} variables are explicitly handed by the \code{describe}
  and \code{summary.formula} functions. \code{NA}s or blanks in input
  variables are ignored. 

  \code{format.mChoice} will convert the multiple choice representation
  to text form by substituting \code{levels} for integer codes.
  \code{as.double.mChoice} converts the \code{mChoice} object to a
  binary numeric matrix, one column per used level (or all levels of
  \code{drop=FALSE}.  This is called by
  the user by invoking \code{as.numeric}.  There is a
  \code{print} method and a \code{summary} method, and a \code{print}
  method for the \code{summary.mChoice} object.  The \code{summary}
  method computes frequencies of all two-way choice combinations, the
  frequencies of the top 5 combinations, information about which other
  choices are present when each given choice is present, and the
  frequency distribution of the number of choices per observation.  This
  \code{summary} output is used in the \code{describe} function.

  \code{in.mChoice} creates a logical vector the same length as \code{x}
  whose elements are \code{TRUE} when the observation in \code{x}
  contains at least one of the codes or value labels in the second
  argument.

  \code{match.mChoice} creats an integer vector of the indexes of all
  elements in \code{table} which contain any of the speicified levels

  \code{is.mChoice} returns \code{TRUE} is the argument is a multiple
  choice variable.
}
\usage{
mChoice(\dots, label='',
        sort.levels=c('original','alphabetic'), 
        add.none=FALSE, drop=TRUE)

\method{format}{mChoice}(x, minlength=NULL, sep=";", \dots)

\method{as.double}{mChoice}(x, drop=FALSE, ...)

\method{print}{mChoice}(x, quote=FALSE, max.levels=NULL,
       width=getOption("width"), ...)

\method{as.character}{mChoice}(x, ...)

\method{summary}{mChoice}(object, ncombos=5, minlength=NULL, drop=TRUE, ...)

\method{print}{summary.mChoice}(x, prlabel=TRUE, ...)

\method{[}{mChoice}(x, ..., drop=FALSE)

match.mChoice(x, table, nomatch=NA, incomparables=FALSE)

inmChoice(x, values)

is.mChoice(x)

\method{Summary}{mChoice}(..., na.rm)
}
\arguments{
  \item{na.rm}{
    Logical: remove \code{NA}'s from data
  }
  \item{table}{
    a vector (mChoice) of values to be matched against.
  }
  \item{nomatch}{
    value to return if a value for \code{x} does not exist in
    \code{table}.
  }
  \item{incomparables}{
    logical whether incomparable values should be compaired.
  }
  \item{...}{
    a series of vectors
  }
  \item{sort.}{
    By default, choice codes are sorted in ascending numeric
    order.  Set \code{sort=FALSE} to preserve the original left to right
    ordering from the input variables.
  }
  \item{label}{
    a character string \code{label} attribute to attach to the matrix created
    by \code{mChoice}
  }
  \item{sort.levels}{
    set \code{sort.levels="alphabetic"} to sort the columns of the matrix
    created by \code{mChoice} alphabetically by category rather than by the
    original order of levels in component factor variables (if there were
    any input variables that were factors)
  }
  \item{add.none}{
    Set \code{add.none} to \code{TRUE} to make a new category
    \code{'none'} if it doesn't already exist and if there is an
    observations with no choices selected.
  }
  \item{drop}{
    set \code{drop=FALSE} to keep unused factor levels as columns of the matrix
    produced by \code{mChoice}
  }
  \item{x}{
    an object of class \code{"mchoice"} such as that created by
    \code{mChoice}.  For \code{is.mChoice} is any object.
  }
  \item{object}{
    an object of class \code{"mchoice"} such as that created by
    \code{mChoice}
  }
  \item{ncombos}{
    maximum number of combos.
  }
  \item{width}{
    With of a line of text to be formated
  }
  \item{quote}{
    quote the output
  }
  \item{max.levels}{max levels to be displayed}
  \item{minlength}{
    By default no abbreviation of levels is done in
    \code{format} and \code{summary}.  Specify a positive integer to use
    abbreviation in those functions.  See \code{\link{abbreviate}}.
  }
  \item{sep}{
    character to use to separate levels when formatting
  }
  \item{prlabel}{
    set to \code{FALSE} to keep
    \code{print.summary.mChoice} from printing the variable label and
    number of unique values
  }
  \item{values}{
    a scalar or vector.  If \code{values} is integer, it is
    the choice codes, and if it is a character vector, it is assumed to
    be value labels.
  }
}
\value{
  \code{mChoice} returns a character vector of class \code{"mChoice"}
  plus attributes \code{"levels"} and \code{"label"}.
  \code{summary.mChoice} returns an object of class
  \code{"summary.mChoice"}.  \code{inmChoice} returns a logical vector.
  \code{format.mChoice} returns a character vector, and
  \code{as.double.mChoice} returns a binary numeric matrix.
}
\author{
  Frank Harrell
  \cr
  Department of Biostatistics
  \cr
  Vanderbilt University
  \cr
  \email{f.harrell@vanderbilt.edu}
}
\seealso{
  \code{\link{label}}
}
\examples{
options(digits=3)
set.seed(3)
n <- 20
sex <- factor(sample(c("m","f"), n, rep=TRUE))
age <- rnorm(n, 50, 5)
treatment <- factor(sample(c("Drug","Placebo"), n, rep=TRUE))


# Generate a 3-choice variable; each of 3 variables has 5 possible levels
symp <- c('Headache','Stomach Ache','Hangnail',
          'Muscle Ache','Depressed')
symptom1 <- sample(symp, n, TRUE)
symptom2 <- sample(symp, n, TRUE)
symptom3 <- sample(symp, n, TRUE)
cbind(symptom1, symptom2, symptom3)[1:5,]
Symptoms <- mChoice(symptom1, symptom2, symptom3, label='Primary Symptoms')
Symptoms
print(Symptoms, long=TRUE)
format(Symptoms[1:5])
inmChoice(Symptoms,'Headache')
levels(Symptoms)
inmChoice(Symptoms, 3)
inmChoice(Symptoms, c('Headache','Hangnail'))
# Note: In this example, some subjects have the same symptom checked
# multiple times; in practice these redundant selections would be NAs
# mChoice will ignore these redundant selections

meanage <- N <- numeric(5)
for(j in 1:5) {
 meanage[j] <- mean(age[inmChoice(Symptoms,j)])
 N[j] <- sum(inmChoice(Symptoms,j))
}
names(meanage) <- names(N) <- levels(Symptoms)
meanage
N

# Manually compute mean age for 2 symptoms
mean(age[symptom1=='Headache' | symptom2=='Headache' | symptom3=='Headache'])
mean(age[symptom1=='Hangnail' | symptom2=='Hangnail' | symptom3=='Hangnail'])

summary(Symptoms)

#Frequency table sex*treatment, sex*Symptoms
summary(sex ~ treatment + Symptoms, fun=table)
# Check:
ma <- inmChoice(Symptoms, 'Muscle Ache')
table(sex[ma])

# could also do:
# summary(sex ~ treatment + mChoice(symptom1,symptom2,symptom3), fun=table)

#Compute mean age, separately by 3 variables
summary(age ~ sex + treatment + Symptoms)


summary(age ~ sex + treatment + Symptoms, method="cross")

f <- summary(treatment ~ age + sex + Symptoms, method="reverse", test=TRUE)
f
# trio of numbers represent 25th, 50th, 75th percentile
print(f, long=TRUE)
}
\keyword{category}
\keyword{manip}
\concept{multiple choice}