File: sjPlotCorr.R

package info (click to toggle)
r-cran-sjplot 2.8.17%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 1,596 kB
  • sloc: sh: 13; makefile: 2
file content (236 lines) | stat: -rw-r--r-- 10,460 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
#' @title Plot correlation matrix
#' @name sjp.corr
#'
#' @description Plot correlation matrix as ellipses or tiles.
#'
#' @param data Matrix with correlation coefficients as returned by the
#'          \code{\link{cor}}-function, or a \code{data.frame} of variables where
#'          correlations between columns should be computed.
#' @param sort.corr Logical, if \code{TRUE} (default), the axis labels are sorted
#'          according to the correlation strength. If \code{FALSE}, axis labels
#'          appear in order of how variables were included in the cor-computation or
#'          data frame.
#' @param decimals Indicates how many decimal values after comma are printed when
#'          the values labels are shown. Default is 3. Only applies when
#'          \code{show.values = TRUE}.
#' @param na.deletion Indicates how missing values are treated. May be either
#'          \code{"listwise"} (default) or \code{"pairwise"}. May be
#'          abbreviated.
#' @param corr.method Indicates the correlation computation method. May be one of
#'          \code{"pearson"} (default), \code{"spearman"} or \code{"kendall"}.
#'          May be abbreviated.
#' @param p.numeric Logical, if \code{TRUE}, the p-values are printed
#'          as numbers. If \code{FALSE} (default), asterisks are used.
#'
#' @inheritParams plot_grpfrq
#' @inheritParams plot_gpt
#'
#' @return (Insisibily) returns the ggplot-object with the complete plot (\code{plot}) as well as the data frame that
#'           was used for setting up the ggplot-object (\code{df}) and the original correlation matrix
#'           (\code{corr.matrix}).
#'
#' @note If \code{data} is a matrix with correlation coefficients as returned by
#'       the \code{\link{cor}}-function, p-values can't be computed.
#'       Thus, \code{show.p} and \code{p.numeric}
#'       only have an effect if \code{data} is a \code{\link{data.frame}}.
#'
#' @details Required argument is either a \code{\link{data.frame}} or a matrix with correlation coefficients
#'            as returned by the \code{\link{cor}}-function. In case of ellipses, the
#'            ellipses size indicates the strength of the correlation. Furthermore,
#'            blue and red colors indicate positive or negative correlations, where
#'            stronger correlations are darker.
#'
#' @import ggplot2
#' @importFrom tidyr gather
#' @importFrom scales brewer_pal grey_pal
#' @importFrom stats cor cor.test na.omit
#' @export
sjp.corr <- function(data,
                     title = NULL,
                     axis.labels = NULL,
                     sort.corr = TRUE,
                     decimals = 3,
                     na.deletion = c("listwise", "pairwise"),
                     corr.method = c("pearson", "spearman", "kendall"),
                     geom.colors = "RdBu",
                     wrap.title = 50,
                     wrap.labels = 20,
                     show.legend = FALSE,
                     legend.title = NULL,
                     show.values = TRUE,
                     show.p = TRUE,
                     p.numeric = FALSE) {
  .Deprecated(msg = "'sjp.corr' is deprecated. Please use 'correlation::correlation()' and its related plot()-method.")

  # --------------------------------------------------------
  # check p-value-style option
  # --------------------------------------------------------
  opt <- getOption("p_zero")
  if (is.null(opt) || opt == FALSE) {
    p_zero <- ""
  } else {
    p_zero <- "0"
  }
  # --------------------------------------------------------
  # check args
  # --------------------------------------------------------
  na.deletion <- match.arg(na.deletion)
  corr.method <- match.arg(corr.method)
  # --------------------------------------------------------
  # try to automatically set labels is not passed as argument
  # --------------------------------------------------------
  if (is.null(axis.labels) && is.data.frame(data)) {
    axis.labels <- unname(sjlabelled::get_label(data, def.value = colnames(data)))
  }
  # ----------------------------
  # set color palette
  # ----------------------------
  if (is.brewer.pal(geom.colors[1])) {
    geom.colors <- scales::brewer_pal(palette = geom.colors[1])(5)
  } else if (geom.colors[1] == "gs") {
    geom.colors <- scales::grey_pal()(5)
  }
  # ----------------------------
  # check if user has passed a data frame
  # or a pca object
  # ----------------------------
  if (any(class(data) == "matrix")) {
    corr <- data
    cpvalues <- NULL
  } else {
    # missing deletion corresponds to
    # SPSS listwise
    if (na.deletion == "listwise") {
      data <- stats::na.omit(data)
      corr <- stats::cor(data, method = corr.method)
    }
    # missing deletion corresponds to
    # SPSS pairwise
    else {
      corr <- stats::cor(data, method = corr.method, use = "pairwise.complete.obs")
    }
    #---------------------------------------
    # if we have a data frame as argument,
    # compute p-values of significances
    #---------------------------------------
    computePValues <- function(df) {
      cp <- c()
      for (i in seq_len(ncol(df))) {
        pv <- c()
        for (j in seq_len(ncol(df))) {
          test <- suppressWarnings(stats::cor.test(df[[i]], df[[j]],
                                                   alternative = "two.sided",
                                                   method = corr.method))
          pv <- c(pv, round(test$p.value, 4))
        }
        cp <- rbind(cp, pv)
      }
      return(cp)
    }
    cpvalues <- computePValues(data)
  }
  # ----------------------------
  # check if user defined labels have been supplied
  # if not, use variable names from data frame
  # ----------------------------
  if (is.null(axis.labels)) axis.labels <- row.names(corr)
  # ----------------------------
  # Prepare length of title and labels
  # ----------------------------
  # check length of diagram title and split longer string at into new lines
  if (!is.null(title)) title <- sjmisc::word_wrap(title, wrap.title)
  # check length of x-axis-labels and split longer strings at into new lines
  if (!is.null(axis.labels)) axis.labels <- sjmisc::word_wrap(axis.labels, wrap.labels)
  # --------------------------------------------------------
  # order correlations from highest to lowest correlation coefficient
  # --------------------------------------------------------
  if (sort.corr) {
    neword <- order(corr[1, ])
    orderedCorr <- corr[neword, neword]
    # order variable labels as well
    axis.labels <- axis.labels[neword]
    if (!is.null(cpvalues)) cpvalues <- cpvalues[neword, neword]
  } else {
    cl <- ncol(corr)
    orderedCorr <- corr[cl:1, cl:1]
    axis.labels <- rev(axis.labels)
    if (!is.null(cpvalues)) cpvalues <- cpvalues[cl:1, cl:1]
  }
  # --------------------------------------------------------
  # prepare a ordering-index-column needed for the data frame
  # that is passed to the ggplot
  # --------------------------------------------------------
  yo <- c()
  for (i in seq_len(nrow(corr))) {
    yo <- c(yo, rep(i, nrow(corr)))
  }
  # --------------------------------------------------------
  # melt correlation matrix and create data frame
  # --------------------------------------------------------
  orderedCorr <- tidyr::gather(data.frame(orderedCorr), "var", "value",
                               !! seq_len(ncol(orderedCorr)), factor_key = TRUE)
  # orderedCorr <- melt(orderedCorr)
  if (!is.null(cpvalues))
    cpvalues <- tidyr::gather(data.frame(cpvalues), "var", "value",
                              !! seq_len(ncol(cpvalues)), factor_key = TRUE)
  # if (!is.null(cpvalues)) cpvalues <- melt(cpvalues)
  # bind additional information like order for x- and y-axis
  # as well as the size of plotted points
  orderedCorr <- cbind(orderedCorr, ordx = seq_len(nrow(corr)), ordy = yo)
  # --------------------------------------------------------
  # add column with significance value
  # --------------------------------------------------------
  if (!is.null(cpvalues)) {
    if (!p.numeric) {
      cpv <- sapply(cpvalues$value, get_p_stars)
    } else {
      cpv <- sapply(cpvalues$value, function(x) {
        if (x < 0.001)
          x <- sprintf("\n(< %s.001)", p_zero)
        else
          x <- sub("0", p_zero, sprintf("\n(%.*f)", decimals, x))
      })
    }
  } else {
    cpv <- ""
  }
  orderedCorr$ps <- cpv
  # --------------------------------------------------------
  # set visibility of labels
  # --------------------------------------------------------
  if (!show.values) {
    orderedCorr$val.labels <- ""
  } else {
    if (show.p) {
      orderedCorr$val.labels <- sprintf("%.*f%s", decimals, orderedCorr$value, orderedCorr$ps)
    } else {
      orderedCorr$val.labels <- sprintf("%.*f", decimals, orderedCorr$value)
    }
  }
  orderedCorr$val.labels[orderedCorr$ordx >= orderedCorr$ordy] <- NA

  orderedCorr$ordx <- as.factor(orderedCorr$ordx)
  orderedCorr$ordy <- as.factor(orderedCorr$ordy)
  message(sprintf("Computing correlation using %s-method with %s-deletion...", corr.method, na.deletion))
  # --------------------------------------------------------
  # start with base plot object here
  # --------------------------------------------------------
  corrPlot <- ggplot(orderedCorr, aes_string(x = "ordx", y = "ordy", fill = "value", label = "val.labels")) +
    geom_tile(size = 0, colour = "black") +
  # fill gradient colour from distinct color brewer palette. negative correlations are dark
  # red, positive corr. are dark blue, and they become lighter the closer they are to a
  # correlation coefficient of zero
    scale_x_discrete(labels = axis.labels, breaks = seq_len(length(axis.labels))) +
    scale_y_discrete(labels = axis.labels, breaks = seq_len(length(axis.labels))) +
    # set limits to (-1,1) to make sure the whole color palette is used. this
    # is the colour scale for geoms
    scale_fill_gradientn(colours = geom.colors, limits = c(-1,1)) +
    geom_text(size = 3.5, colour = "black") +
    labs(title = title, x = NULL, y = NULL)
  if (show.legend)
    corrPlot <- corrPlot + guides(fill = legend.title)
  else
    corrPlot <- corrPlot + guides(fill = "none")

  corrPlot
}