File: ggfreqScatter.Rd

package info (click to toggle)
hmisc 4.2-0-1
  • links: PTS, VCS
  • area: main
  • in suites: bullseye, buster, sid
  • size: 3,332 kB
  • sloc: asm: 27,116; fortran: 606; ansic: 411; xml: 160; makefile: 2
file content (96 lines) | stat: -rw-r--r-- 3,925 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
\name{ggfreqScatter}
\alias{ggfreqScatter}
\title{Frequency Scatterplot}
\description{
Uses \code{ggplot2} to plot a scatterplot or dot-like chart for the case
where there is a very large number of overlapping values.  This works
for continuous and categorical \code{x} and \code{y}.  For continuous
variables it serves the same purpose as hexagonal binning.  Counts for
overlapping points are grouped into quantile groups and level of
transparency and rainbow colors are used to provide count information.

The result can also be passed to \code{ggplotly}.  Actual cell
frequencies are added to the hover text in that case.
}
\usage{
ggfreqScatter(x, y, bins=50, g=10, cuts=NULL,
              xtrans = function(x) x,
              ytrans = function(y) y,
              xbreaks = pretty(x, 10),
              ybreaks = pretty(y, 10),
              xminor  = NULL, yminor = NULL,
              xlab = as.character(substitute(x)),
              ylab = as.character(substitute(y)),
              fcolors = viridis::viridis(10), nsize=FALSE,
              html=FALSE, prfreq=FALSE, \dots)
}
\arguments{
\item{x}{x-variable}
\item{y}{y-variable}
\item{bins}{for continuous \code{x} or \code{y} is the number of bins to
	create by rounding.  Ignored for categorical variables.  If a
	2-vector, the first element corresponds to \code{x} and the second to
	\code{y}.}
\item{g}{number of quantile groups to make for frequency counts.  Use
	\code{g=0} to use frequencies continuously for color and alpha
	coding.  This is recommended only when using \code{plotly}.}
\item{cuts}{instead of using \code{g}, specify \code{cuts} to provide
	the vector of cuts for categorizing frequencies for assignment to colors}
\item{xtrans,ytrans}{functions specifying transformations to be made
	before binning and plotting}
\item{xbreaks,ybreaks}{vectors of values to label on axis, on original
	scale}
\item{xminor,yminor}{values at which to put minor tick marks, on
	original scale}
\item{xlab,ylab}{axis labels.  If not specified and variable has a
	\code{label}, that label will be used.}
\item{fcolors}{\code{colors} argument to pass to
	\code{scale_color_gradientn} to color code frequencies}
\item{nsize}{set to \code{TRUE} to not vary color or transparency but
	instead to size the symbols in relation to the number of points.  Best
	with both \code{x} and \code{y} are discrete.  \code{ggplot2}
	\code{size} is taken as the fourth root of the frequency.  If there
	are 15 or unique frequencies all the unique frequencies are used,
	otherwise \code{g} quantile groups of frequencies are used.}
\item{html}{set to \code{TRUE} to use html in axis labels instead of
	plotmath}
\item{prfreq}{set to \code{TRUE} to print the frequency distributions of
	the binned coordinate frequencies}
\item{\dots}{arguments to pass to \code{geom_point} such as \code{shape}
	and \code{size}}
}
\value{a \code{ggplot} object}
\author{Frank Harrell}
\seealso{\code{\link[Hmisc]{cut2}}}
\examples{
set.seed(1)
x <- rnorm(1000)
y <- rnorm(1000)
count <- sample(1:100, 1000, TRUE)
x <- rep(x, count)
y <- rep(y, count)
# color=alpha=NULL below makes loess smooth over all points
g <- ggfreqScatter(x, y) +   # might add g=0 if using plotly
      geom_smooth(aes(color=NULL, alpha=NULL), se=FALSE) +
      ggtitle("Using Deciles of Frequency Counts, 2500 Bins")
g
# plotly::ggplotly(g, tooltip='label')  # use plotly, hover text = freq. only
# Plotly makes it somewhat interactive, with hover text tooltips

# Try with x categorical
x1 <- sample(c('cat', 'dog', 'giraffe'), length(x), TRUE)
ggfreqScatter(x1, y)

# Try with y categorical
y1 <- sample(LETTERS[1:10], length(x), TRUE)
ggfreqScatter(x, y1)

# Both categorical, larger point symbols, box instead of circle
ggfreqScatter(x1, y1, shape=15, size=7)
# Vary box size instead
ggfreqScatter(x1, y1, nsize=TRUE, shape=15)
}
\keyword{hplot}
\concept{grouping}
\concept{categorization}
\concept{discretization}