File: cut2.Rd

package info (click to toggle)
hmisc 4.2-0-1
  • links: PTS, VCS
  • area: main
  • in suites: bullseye, buster, sid
  • size: 3,332 kB
  • sloc: asm: 27,116; fortran: 606; ansic: 411; xml: 160; makefile: 2
file content (80 lines) | stat: -rw-r--r-- 2,463 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
\name{cut2}
\alias{cut2}
\title{Cut a Numeric Variable into Intervals}
\description{
Function like cut but left endpoints are inclusive and labels are of
the form \code{[lower, upper)}, except that last interval is \code{[lower,upper]}.  
If cuts are given, will by default make sure that cuts include entire
range of \code{x}.
Also, if cuts are not given, will cut \code{x} into quantile groups 
(\code{g} given) or groups
with a given minimum number of observations (\code{m}).  Whereas cut creates a
category object, \code{cut2} creates a factor object.
}
\usage{
cut2(x, cuts, m=150, g, levels.mean=FALSE, digits, minmax=TRUE,
oneval=TRUE, onlycuts=FALSE, formatfun=format, \dots)
}
\arguments{
\item{x}{
numeric vector to classify into intervals
}
\item{cuts}{
cut points
}
\item{m}{
desired minimum number of observations in a group.  The algorithm does
not guarantee that all groups will have at least \code{m} observations.
}
\item{g}{
number of quantile groups
}
\item{levels.mean}{
set to \code{TRUE} to make the new categorical vector have levels attribute that is
the group means of \code{x} instead of interval endpoint labels
}
\item{digits}{
number of significant digits to use in constructing levels.  Default is 3
(5 if \code{levels.mean=TRUE})
}
\item{minmax}{
if cuts is specified but \code{min(x)<min(cuts)} or \code{max(x)>max(cuts)}, augments
cuts to include min and max \code{x}
}
\item{oneval}{
if an interval contains only one unique value, the interval will be
labeled with the formatted version of that value instead of the
interval endpoints, unless \code{oneval=FALSE}
}
\item{onlycuts}{
  set to \code{TRUE} to only return the vector of computed cuts.  This
  consists of the interior values plus outer ranges.
}
\item{formatfun}{
  formatting function, supports formula notation (if \code{rlang} is installed)
}
\item{\dots}{
  additional arguments passed to \code{formatfun}
}
}
\value{
a factor variable with levels of the form \code{[a,b)} or formatted means
(character strings) unless \code{onlycuts} is \code{TRUE} in which case
a numeric vector is returned
}
\seealso{
\code{\link{cut}}, \code{\link{quantile}}
}
\examples{
set.seed(1)
x <- runif(1000, 0, 100)
z <- cut2(x, c(10,20,30))
table(z)
table(cut2(x, g=10))      # quantile groups
table(cut2(x, m=50))      # group x into intevals with at least 50 obs.
}
\keyword{category}
\keyword{nonparametric}
\concept{grouping}
\concept{categorization}
\concept{discretization}