File: cut2.Rd

package info (click to toggle)
hmisc 3.14-5-1
  • links: PTS
  • area: main
  • in suites: jessie, jessie-kfreebsd
  • size: 2,792 kB
  • ctags: 701
  • sloc: asm: 23,440; fortran: 600; ansic: 375; xml: 160; makefile: 1
file content (75 lines) | stat: -rw-r--r-- 2,264 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
\name{cut2}
\alias{cut2}
\title{
Cut a Numeric Variable into Intervals
}
\description{
Function like cut but left endpoints are inclusive and labels are of
the form \code{[lower, upper)}, except that last interval is \code{[lower,upper]}.  
If cuts are given, will by default make sure that cuts include entire
range of \code{x}.
Also, if cuts are not given, will cut \code{x} into quantile groups 
(\code{g} given) or groups
with a given minimum number of observations (\code{m}).  Whereas cut creates a
category object, \code{cut2} creates a factor object.
}
\usage{
cut2(x, cuts, m, g, levels.mean, digits, minmax=TRUE, oneval=TRUE, onlycuts=FALSE)
}
\arguments{
\item{x}{
numeric vector to classify into intervals
}
\item{cuts}{
cut points
}
\item{m}{
desired minimum number of observations in a group.  The algorithm does
not guarantee that all groups will have at least \code{m} observations.
}
\item{g}{
number of quantile groups
}
\item{levels.mean}{
set to \code{TRUE} to make the new categorical vector have levels attribute that is
the group means of \code{x} instead of interval endpoint labels
}
\item{digits}{
number of significant digits to use in constructing levels.  Default is 3
(5 if \code{levels.mean=TRUE})
}
\item{minmax}{
if cuts is specified but \code{min(x)<min(cuts)} or \code{max(x)>max(cuts)}, augments
cuts to include min and max \code{x}
}
\item{oneval}{
if an interval contains only one unique value, the interval will be
labeled with the formatted version of that value instead of the
interval endpoints, unless \code{oneval=FALSE}
}
\item{onlycuts}{
  set to \code{TRUE} to only return the vector of computed cuts.  This
  consists of the interior values plus outer ranges.
}
}
\value{
a factor variable with levels of the form \code{[a,b)} or formatted means
(character strings) unless \code{onlycuts} is \code{TRUE} in which case
a numeric vector is returned
}
\seealso{
\code{\link{cut}}, \code{\link{quantile}}
}
\examples{
set.seed(1)
x <- runif(1000, 0, 100)
z <- cut2(x, c(10,20,30))
table(z)
table(cut2(x, g=10))      # quantile groups
table(cut2(x, m=50))      # group x into intevals with at least 50 obs.
}
\keyword{category}
\keyword{nonparametric}
\concept{grouping}
\concept{categorization}
\concept{discretization}