File: mChoice.Rd

package info (click to toggle)
hmisc 5.2-5-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 4,044 kB
  • sloc: asm: 28,907; f90: 590; ansic: 415; xml: 160; fortran: 75; makefile: 2
file content (282 lines) | stat: -rw-r--r-- 9,904 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
\name{mChoice}
\alias{mChoice}
\alias{format.mChoice}
\alias{print.mChoice}
\alias{summary.mChoice}
\alias{as.character.mChoice}
\alias{as.double.mChoice}
\alias{inmChoice}
\alias{inmChoicelike}
\alias{nmChoice}
\alias{match.mChoice}
\alias{[.mChoice}
\alias{print.summary.mChoice}
\alias{is.mChoice}
\alias{Math.mChoice}
\alias{Ops.mChoice}
\alias{Summary.mChoice}
\title{Methods for Storing and Analyzing Multiple Choice Variables}
\description{
  \code{mChoice} is a function that is useful for grouping 
  variables that represent
  individual choices on a multiple choice question.  These choices are
  typically factor or character values but may be of any type.  Levels
  of component factor variables need not be the same; all unique levels
  (or unique character values) are collected over all of the multiple
  variables.  Then a new character vector is formed with integer choice
  numbers separated by semicolons.  Optimally, a database system would
  have exported the semicolon-separated character strings with a
  \code{levels} attribute containing strings defining value labels
  corresponding to the integer choice numbers.  \code{mChoice} is a
  function for creating a multiple-choice variable after the fact.
  \code{mChoice} variables are explicitly handed by the \code{describe}
  and \code{summary.formula} functions. \code{NA}s or blanks in input
  variables are ignored. 

  \code{format.mChoice} will convert the multiple choice representation
  to text form by substituting \code{levels} for integer codes.
  \code{as.double.mChoice} converts the \code{mChoice} object to a
  binary numeric matrix, one column per used level (or all levels of
  \code{drop=FALSE}.  This is called by
  the user by invoking \code{as.numeric}.  There is a
  \code{print} method and a \code{summary} method, and a \code{print}
  method for the \code{summary.mChoice} object.  The \code{summary}
  method computes frequencies of all two-way choice combinations, the
  frequencies of the top 5 combinations, information about which other
  choices are present when each given choice is present, and the
  frequency distribution of the number of choices per observation.  This
  \code{summary} output is used in the \code{describe} function.  The
  \code{print} method returns an html character string if
  \code{options(prType='html')} is in effect if \code{render=FALSE} or
  renders the html otherwise.  This is used by \code{print.describe} and
  is most effective when \code{short=TRUE} is specified to \code{summary}.

  \code{in.mChoice} creates a logical vector the same length as \code{x}
  whose elements are \code{TRUE} when the observation in \code{x}
  contains at least one of the codes or value labels in the second
  argument.

  \code{match.mChoice} creates an integer vector of the indexes of all
  elements in \code{table} which contain any of the speicified levels

	\code{nmChoice} returns an integer vector of the number of choices
	that were made

  \code{is.mChoice} returns \code{TRUE} is the argument is a multiple
  choice variable.
}
\usage{
mChoice(\dots, label='',
        sort.levels=c('original','alphabetic'), 
        add.none=FALSE, drop=TRUE, ignoreNA=TRUE)

\method{format}{mChoice}(x, minlength=NULL, sep=";", \dots)

\method{as.double}{mChoice}(x, drop=FALSE, ...)

\method{print}{mChoice}(x, quote=FALSE, max.levels=NULL,
       width=getOption("width"), ...)

\method{as.character}{mChoice}(x, ...)

\method{summary}{mChoice}(object, ncombos=5, minlength=NULL,
  drop=TRUE, short=FALSE, ...)

\method{print}{summary.mChoice}(x, prlabel=TRUE, render=TRUE, ...)

\method{[}{mChoice}(x, ..., drop=FALSE)

match.mChoice(x, table, nomatch=NA, incomparables=FALSE)

inmChoice(x, values, condition=c('any', 'all'))

inmChoicelike(x, values, condition=c('any', 'all'),
              ignore.case=FALSE, fixed=FALSE)

nmChoice(object)

is.mChoice(x)

\method{Summary}{mChoice}(..., na.rm)
}
\arguments{
  \item{na.rm}{
    Logical: remove \code{NA}'s from data
  }
  \item{table}{
    a vector (mChoice) of values to be matched against.
  }
  \item{nomatch}{
    value to return if a value for \code{x} does not exist in
    \code{table}.
  }
  \item{incomparables}{
    logical whether incomparable values should be compaired.
  }
  \item{...}{
    a series of vectors
  }
  \item{label}{
    a character string \code{label} attribute to attach to the matrix created
    by \code{mChoice}
  }
  \item{sort.levels}{
    set \code{sort.levels="alphabetic"} to sort the columns of the matrix
    created by \code{mChoice} alphabetically by category rather than by the
    original order of levels in component factor variables (if there were
    any input variables that were factors)
  }
  \item{add.none}{
    Set \code{add.none} to \code{TRUE} to make a new category
    \code{'none'} if it doesn't already exist and if there is an
    observations with no choices selected.
  }
  \item{drop}{
    set \code{drop=FALSE} to keep unused factor levels as columns of the matrix
    produced by \code{mChoice}
  }
  \item{ignoreNA}{set to \code{FALSE} to keep any \code{NA}s present in
data as a real level.  Prior to Hmisc 4.7-2 \code{FALSE} was the
default.}
  \item{x}{
    an object of class \code{"mchoice"} such as that created by
    \code{mChoice}.  For \code{is.mChoice} is any object.
  }
  \item{object}{
    an object of class \code{"mchoice"} such as that created by
    \code{mChoice}
  }
  \item{ncombos}{
    maximum number of combos.
  }
  \item{width}{
    With of a line of text to be formated
  }
  \item{quote}{
    quote the output
  }
  \item{max.levels}{max levels to be displayed}
  \item{minlength}{
    By default no abbreviation of levels is done in
    \code{format} and \code{summary}.  Specify a positive integer to use
    abbreviation in those functions.  See \code{\link{abbreviate}}.
  }
  \item{short}{set to \code{TRUE} to have \code{summary.mChoice} use
integer choice numbers in its tables, and to print the choice level
definitions at the top}
  \item{sep}{character to use to separate levels when formatting}
  \item{prlabel}{
    set to \code{FALSE} to keep
    \code{print.summary.mChoice} from printing the variable label and
    number of unique values.  Ignore for html output.
  }
  \item{render}{applies of \code{options(prType='html')} is in
  effect. Set to \code{FALSE} to return the html text instead of
  rendering the html.}
  \item{values}{
    a scalar or vector.  If \code{values} is integer, it is
    the choice codes, and if it is a character vector, it is assumed to
    be value labels.  For \code{inmChoicelike} \code{values} must be
    character strings which are pieces of choice labels.
  }
  \item{condition}{set to \code{'all'} for \code{inmChoice} to require
that all choices in \code{values} be present instead of the default of
any of them present.}
  \item{ignore.case}{set to \code{TRUE} to have \code{inmChoicelike}
ignore case in the data when matching on \code{values}}
  \item{fixed}{see \code{grep}}
}
\value{
  \code{mChoice} returns a character vector of class \code{"mChoice"}
  plus attributes \code{"levels"} and \code{"label"}.
  \code{summary.mChoice} returns an object of class
  \code{"summary.mChoice"}.  \code{inmChoice} and \code{inmChoicelike}
  return a logical vector.
  \code{format.mChoice} returns a character vector, and
  \code{as.double.mChoice} returns a binary numeric matrix.
  \code{nmChoice} returns an integer vector.
  \code{print.summary.mChoice} returns an html character string if
  \code{options(prType='html')} is in effect.
}
\author{
  Frank Harrell
  \cr
  Department of Biostatistics
  \cr
  Vanderbilt University
  \cr
  \email{fh@fharrell.com}
}
\seealso{
  \code{\link{label}}, \code{\link{combplotp}}
}
\examples{
options(digits=3)
set.seed(3)
n <- 20
sex <- factor(sample(c("m","f"), n, rep=TRUE))
age <- rnorm(n, 50, 5)
treatment <- factor(sample(c("Drug","Placebo"), n, rep=TRUE))


# Generate a 3-choice variable; each of 3 variables has 5 possible levels
symp <- c('Headache','Stomach Ache','Hangnail',
          'Muscle Ache','Depressed')
symptom1 <- sample(symp, n, TRUE)
symptom2 <- sample(symp, n, TRUE)
symptom3 <- sample(symp, n, TRUE)
cbind(symptom1, symptom2, symptom3)[1:5,]
Symptoms <- mChoice(symptom1, symptom2, symptom3, label='Primary Symptoms')
Symptoms
print(Symptoms, long=TRUE)
format(Symptoms[1:5])
inmChoice(Symptoms,'Headache')
inmChoicelike(Symptoms, 'head', ignore.case=TRUE)
levels(Symptoms)
inmChoice(Symptoms, 3)
# Find all subjects with either of two symptoms
inmChoice(Symptoms, c('Headache','Hangnail'))
# Note: In this example, some subjects have the same symptom checked
# multiple times; in practice these redundant selections would be NAs
# mChoice will ignore these redundant selections
# Find all subjects with both symptoms
inmChoice(Symptoms, c('Headache', 'Hangnail'), condition='all')

meanage <- N <- numeric(5)
for(j in 1:5) {
 meanage[j] <- mean(age[inmChoice(Symptoms,j)])
 N[j] <- sum(inmChoice(Symptoms,j))
}
names(meanage) <- names(N) <- levels(Symptoms)
meanage
N

# Manually compute mean age for 2 symptoms
mean(age[symptom1=='Headache' | symptom2=='Headache' | symptom3=='Headache'])
mean(age[symptom1=='Hangnail' | symptom2=='Hangnail' | symptom3=='Hangnail'])

summary(Symptoms)

#Frequency table sex*treatment, sex*Symptoms
summary(sex ~ treatment + Symptoms, fun=table)
# Check:
ma <- inmChoice(Symptoms, 'Muscle Ache')
table(sex[ma])

# could also do:
# summary(sex ~ treatment + mChoice(symptom1,symptom2,symptom3), fun=table)

#Compute mean age, separately by 3 variables
summary(age ~ sex + treatment + Symptoms)


summary(age ~ sex + treatment + Symptoms, method="cross")

f <- summary(treatment ~ age + sex + Symptoms, method="reverse", test=TRUE)
f
# trio of numbers represent 25th, 50th, 75th percentile
print(f, long=TRUE)
}
\keyword{category}
\keyword{manip}
\concept{multiple choice}