File: sjPlotAnova.R

package info (click to toggle)
r-cran-sjplot 2.8.17%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 1,596 kB
  • sloc: sh: 13; makefile: 2
file content (274 lines) | stat: -rw-r--r-- 12,109 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
#' @title Plot One-Way-Anova tables
#' @name sjp.aov1
#'
#' @description Plot One-Way-Anova table sum of squares (SS) of each factor level (group)
#'                against the dependent variable. The SS of the factor variable against the
#'                dependent variable (variance within and between groups) is printed to
#'                the model summary.
#'
#' @param var.dep Dependent variable. Will be used with following formula:
#'          \code{aov(var.dep ~ var.grp)}
#' @param var.grp Factor with the cross-classifying variable, where \code{var.dep}
#'          is grouped into the categories represented by \code{var.grp}.
#' @param meansums Logical, if \code{TRUE}, the values reported are the true group mean values.
#'          If \code{FALSE} (default), the values are reported in the standard way, i.e. the values indicate the difference of
#'          the group mean in relation to the intercept (reference group).
#' @param string.interc Character vector that indicates the reference group (intercept), that is appended to
#'          the value label of the grouping variable. Default is \code{"(Intercept)"}.
#'
#' @inheritParams plot_grpfrq
#' @inheritParams plot_xtab
#' @inheritParams plot_gpt
#' @inheritParams plot_model
#'
#' @return A ggplot-object.
#'
#' @examples
#' data(efc)
#' # note: "var.grp" does not need to be a factor.
#' # coercion to factor is done by the function
#' sjp.aov1(efc$c12hour, efc$e42dep)
#'
#'
#' @import ggplot2
#' @importFrom sjmisc trim word_wrap to_value
#' @importFrom stats confint aov summary.lm
#' @importFrom rlang .data
#' @importFrom sjlabelled get_label get_labels
#' @export
sjp.aov1 <- function(var.dep,
                     var.grp,
                     meansums = FALSE,
                     title = NULL,
                     axis.labels = NULL,
                     rev.order = FALSE,
                     string.interc = "(Intercept)",
                     axis.title = "",
                     axis.lim = NULL,
                     geom.colors = c("#3366a0", "#aa3333"),
                     geom.size = 3,
                     wrap.title = 50,
                     wrap.labels = 25,
                     grid.breaks = NULL,
                     show.values = TRUE,
                     digits = 2,
                     y.offset = .15,
                     show.p = TRUE,
                     show.summary = FALSE) {
  # --------------------------------------------------------
  # get variable name
  # --------------------------------------------------------
  var.grp.name <- get_var_name(deparse(substitute(var.grp)))
  var.dep.name <- get_var_name(deparse(substitute(var.dep)))
  # --------------------------------------------------------
  # try to automatically set labels is not passed as parameter
  # --------------------------------------------------------
  if (is.null(axis.labels)) axis.labels <- sjlabelled::get_labels(var.grp,
                                                              attr.only = F,
                                                              values = NULL,
                                                              non.labelled = T)
  if (is.null(axis.title)) axis.title <- sjlabelled::get_label(var.dep, def.value = var.dep.name)
  if (is.null(title)) {
    t1 <- sjlabelled::get_label(var.grp, def.value = var.grp.name)
    t2 <- sjlabelled::get_label(var.dep, def.value = var.dep.name)
    if (!is.null(t1) && !is.null(t2)) title <- paste0(t1, " by ", t2)
  }
  # --------------------------------------------------------
  # remove titles if empty
  # --------------------------------------------------------
  if (!is.null(axis.labels) && length(axis.labels) == 1 && axis.labels == "") axis.labels <- NULL
  if (!is.null(axis.title) && length(axis.title) == 1 && axis.title == "") axis.title <- NULL
  if (!is.null(title) && length(title) == 1 && title == "") title <- NULL
  # --------------------------------------------------------
  # unlist labels
  # --------------------------------------------------------
  if (!is.null(axis.labels)) {
    # append "intercept" string, to mark the reference category
    axis.labels[1] <- paste(axis.labels[1], string.interc)
  }
  # --------------------------------------------------------
  # Check if var.grp is factor. If not, convert to factor
  # --------------------------------------------------------
  if (!is.factor(var.grp)) var.grp <- as.factor(var.grp)
  # --------------------------------------------------------
  # check whether we have x-axis title. if not, use standard
  # value
  # --------------------------------------------------------
  # check length of diagram title and split longer string at into new lines
  # every 50 chars
  if (!is.null(title)) title <- sjmisc::word_wrap(title, wrap.title)
  # check length of x-axis title and split longer string at into new lines
  # every 50 chars
  if (!is.null(axis.title)) axis.title <- sjmisc::word_wrap(axis.title, wrap.title)
  # check length of x-axis-labels and split longer strings at into new lines
  # every 10 chars, so labels don't overlap
  if (!is.null(axis.labels)) axis.labels <- sjmisc::word_wrap(axis.labels, wrap.labels)
  # ----------------------------
  # Calculate one-way-anova. Since we have
  # only one group variable, Type of SS does
  # not matter.
  # ----------------------------
  fit <- stats::aov(var.dep ~ var.grp)
  # coefficients (group mean)
  means <- stats::summary.lm(fit)$coefficients[, 1]
  # p-values of means
  means.p <- stats::summary.lm(fit)$coefficients[, 4]
  # lower confidence intervals of coefficients (group mean)
  means.lci <- stats::confint(fit)[, 1]
  # upper confidence intervals of coefficients (group mean)
  means.uci <- stats::confint(fit)[, 2]
  # ----------------------------
  # Check whether true group means should be reported
  # or the differences of group means in relation to the
  # intercept (reference group). The latter is the default.
  # ----------------------------
  if (meansums) {
    for (i in 2:length(means)) {
      means[i] <- means[i] + means[1]
      means.lci[i] <- means.lci[i] + means[1]
      means.uci[i] <- means.uci[i] + means[1]
    }
  }
  # ----------------------------
  # create expression with model summarys. used
  # for plotting in the diagram later
  # ----------------------------
  if (show.summary) {
    # sum of squares
    ss <- summary(fit)[[1]]['Sum Sq']
    # multiple r2
    r2 <- stats::summary.lm(fit)$r.squared
    # adj. r2
    r2.adj <- stats::summary.lm(fit)$adj.r.squared
    # get F-statistics
    fstat <- stats::summary.lm(fit)$fstatistic[1]
    # p-value for F-test
    pval <- summary(fit)[[1]]['Pr(>F)'][1, 1]
    # indicate significance level by stars
    pan <- get_p_stars(pval)
    # create mathematical term
    modsum <- as.character(as.expression(
      substitute(italic(SS[B]) == ssb * "," ~~ italic(SS[W]) == ssw * "," ~~ R^2 == mr2 * "," ~~ "adj." * R^2 == ar2 * "," ~~ "F" == f * panval,
                 list(ssb = sprintf("%.2f", ss[1, ]),
                      ssw = sprintf("%.2f", ss[2, ]),
                      mr2 = sprintf("%.3f", r2),
                      ar2 = sprintf("%.3f", r2.adj),
                      f = sprintf("%.2f", fstat),
                      panval = pan))))
  }
  # ----------------------------
  # print coefficients and p-values in plot
  # ----------------------------
  # init data column for p-values
  ps <- round(means, digits)
  # if no values should be shown, clear
  # vector now
  if (!show.values) ps <- rep("", length(ps))
  # --------------------------------------------------------
  # copy p-values into data column
  # --------------------------------------------------------
  if (show.p) {
    for (i in seq_len(length(means.p))) {
      ps[i] <- sjmisc::trim(paste(ps[i], get_p_stars(means.p[i])))
    }
  }
  # --------------------------------------------------------
  # check whether order of category items should be reversed
  # or not
  # --------------------------------------------------------
  if (rev.order)
    catorder <- length(means):1
  else
    catorder <- seq_len(length(means))
  # --------------------------------------------------------
  # create new data.frame, since ggplot requires data.frame as parameter
  # The data frame contains means, CI and p-values
  # --------------------------------------------------------
  df <- data_frame(
    means = means,     # Append coefficients
    lower = means.lci, # append CI
    upper = means.uci,
    p = means.p,   # append p-value
    pv = ps,
    xv = catorder
  )
  # --------------------------------------------------------
  # check if user defined labels have been supplied
  # if not, use variable names from data frame
  # --------------------------------------------------------
  if (is.null(axis.labels)) axis.labels <- row.names(df)
  # order labels
  axis.labels <- axis.labels[catorder]
  df$means <- sjmisc::to_value(df$means, keep.labels = F)
  df$lower <- sjmisc::to_value(df$lower, keep.labels = F)
  df$upper <- sjmisc::to_value(df$upper, keep.labels = F)
  df$p <- sjmisc::to_value(df$p, keep.labels = F)
  df$pv <- as.character(df$pv)
  df$xv <- as.factor(df$xv)
  # bind color values to data frame, because we cannot use several
  # different color aesthetics in ggplot
  df <- cbind(df, geocol = ifelse(df$means >= 0, geom.colors[1], geom.colors[2]))
  # --------------------------------------------------------
  # Calculate axis limits. The range is from lowest lower-CI
  # to highest upper-CI, or a user-defined range (if "axis.lim"
  # is not NULL)
  # --------------------------------------------------------
  if (is.null(axis.lim)) {
    # we have confindence intervals displayed, so
    # the range corresponds to the boundaries given by
    # the CI's
    maxval <- max(df$upper)
    minval <- min(df$lower)
    if (maxval > 0)
      limfac <- ifelse(abs(maxval) < 5, 5, 10)
    else
      limfac <- ifelse(abs(minval) < 5, 5, 10)
    upper_lim <- ifelse(maxval == 0, 0, limfac * ceiling((maxval + 1) / limfac))
    lower_lim <- ifelse(minval == 0, 0, limfac * floor(minval / limfac))
  } else {
    lower_lim <- axis.lim[1]
    upper_lim <- axis.lim[2]
  }
  # determine gridbreaks
  if (is.null(grid.breaks))
    ticks <- pretty(c(lower_lim, upper_lim))
  else
    ticks <- seq(lower_lim, upper_lim, by = grid.breaks)
  # --------------------------------------------------------
  # Set up plot padding (margins inside diagram)
  # --------------------------------------------------------
  scaley <- scale_y_continuous(
    limits = c(lower_lim, upper_lim),
    breaks = ticks,
    labels = ticks
  )
  # --------------------------------------------------------
  # Start plot here!
  # --------------------------------------------------------
  anovaplot <- ggplot(df, aes(y = .data$means, x = .data$xv)) +
    # print point
    geom_point(size = geom.size, colour = df$geocol) +
    # and error bar
    geom_errorbar(aes(ymin = .data$lower, ymax = .data$upper), colour = df$geocol, width = 0) +
    # Print p-values. With vertical adjustment, so
    # they don't overlap with the errorbars
    geom_text(aes(label = .data$pv, y = .data$means), nudge_x = y.offset, show.legend = FALSE) +
    # set y-scale-limits, breaks and tick labels
    scaley +
    # set value labels to x-axis
    scale_x_discrete(labels = axis.labels, limits = 1:length(axis.labels)) +
    # flip coordinates
    labs(title = title, x = NULL, y = axis.title) +
    coord_flip()

  # check whether modelsummary should be printed
  if (show.summary) {
    # add annotations with model summary
    # annotations include intercept-value and model's r-square
    anovaplot <- anovaplot +
      annotate("text", label = modsum, parse = TRUE, x = -Inf, y = Inf,
               hjust = "right", vjust = "bottom")
  }

  anovaplot
}