File: plot_frq.R

package info (click to toggle)
r-cran-sjplot 2.8.17%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 1,596 kB
  • sloc: sh: 13; makefile: 2
file content (715 lines) | stat: -rw-r--r-- 26,346 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
utils::globalVariables("density")

#' @title Plot frequencies of variables
#' @name plot_frq
#'
#' @description Plot frequencies of a variable as bar graph, histogram, box plot etc.
#'
#' @note This function only works with variables with integer values (or numeric
#'         factor levels), i.e. scales / centered variables
#'         with fractional part may result in unexpected behaviour.
#'
#' @param ... Optional, unquoted names of variables that should be selected for
#'   further processing. Required, if \code{data} is a data frame (and no
#'   vector) and only selected variables from \code{data} should be processed.
#'   You may also use functions like \code{:} or tidyselect's
#'   select_helpers.
#' @param sort.frq Determines whether categories should be sorted
#'          according to their frequencies or not. Default is \code{"none"}, so
#'          categories are not sorted by frequency. Use \code{"asc"} or
#'          \code{"desc"} for sorting categories ascending or descending order.
#' @param geom.colors User defined color for geoms, e.g. \code{geom.colors = "#0080ff"}.
#' @param errorbar.color Color of confidence interval bars (error bars).
#'          Only applies to \code{type = "bar"}. In case of dot plots, error bars
#'          will have same colors as dots (see \code{geom.colors}).
#' @param show.mean Logical, if \code{TRUE}, a vertical line in histograms
#'          is drawn to indicate the mean value of the variables. Only
#'          applies to histogram-charts.
#' @param show.mean.val Logical, if \code{TRUE} (default), the mean value
#'          is printed to the vertical line that indicates the variable's
#'          mean. Only applies to histogram-charts.
#' @param show.sd Logical, if \code{TRUE}, the standard deviation
#'          is annotated as shaded rectangle around the mean intercept
#'          line. Only applies to histogram-charts.
#' @param mean.line.type Numeric value, indicating the linetype of the mean
#'          intercept line. Only applies to histogram-charts and
#'          when \code{show.mean = TRUE}.
#' @param mean.line.size Numeric, size of the mean intercept line. Only
#'          applies to histogram-charts and when \code{show.mean = TRUE}.
#' @param normal.curve Logical, if \code{TRUE}, a normal curve, which is adjusted to the data,
#'          is plotted over the histogram or density plot. Default is
#'          \code{FALSE}. Only applies when histograms or density plots are plotted (see \code{type}).
#' @param normal.curve.color Color of the normal curve line. Only
#'          applies if \code{normal.curve = TRUE}.
#' @param normal.curve.size Numeric, size of the normal curve line. Only
#'          applies if \code{normal.curve = TRUE}.
#' @param normal.curve.alpha Transparancy level (alpha value) of the normal curve. Only
#'          applies if \code{normal.curve = TRUE}.
#' @param xlim Numeric vector of length two, defining lower and upper axis limits
#'          of the x scale. By default, this argument is set to \code{NULL}, i.e. the
#'          x-axis fits to the required range of the data.
#' @param axis.title Character vector of length one or two (depending on
#'          the plot function and type), used as title(s) for the x and y axis.
#'          If not specified, a default labelling  is chosen.
#'          \strong{Note:} Some plot types do not support this argument. In such
#'          cases, use the return value and add axis titles manually with
#'          \code{\link[ggplot2]{labs}}, e.g.: \code{$plot.list[[1]] + labs(x = ...)}
#'
#' @inheritParams plot_scatter
#' @inheritParams plot_grpfrq
#' @inheritParams tab_xtab
#'
#' @return A ggplot-object.
#'
#' @examples
#' library(sjlabelled)
#' data(efc)
#' data(iris)
#'
#' # simple plots, two different notations
#' plot_frq(iris, Species)
#' plot_frq(efc$tot_sc_e)
#'
#' # boxplot
#' plot_frq(efc$e17age, type = "box")
#'
#' if (require("dplyr")) {
#'   # histogram, pipe-workflow
#'   efc %>%
#'     dplyr::select(e17age, c160age) %>%
#'     plot_frq(type = "hist", show.mean = TRUE)
#'
#'   # bar plot(s)
#'   plot_frq(efc, e42dep, c172code)
#' }
#'
#' if (require("dplyr") && require("gridExtra")) {
#'   # grouped data frame, all panels in one plot
#'   efc %>%
#'     group_by(e42dep) %>%
#'     plot_frq(c161sex) %>%
#'     plot_grid()
#' }
#'
#' \donttest{
#' library(sjmisc)
#' # grouped variable
#' ageGrp <- group_var(efc$e17age)
#' ageGrpLab <- group_labels(efc$e17age)
#' plot_frq(ageGrp, title = get_label(efc$e17age), axis.labels = ageGrpLab)
#'
#' # plotting confidence intervals. expand grid and v/hjust for text labels
#' plot_frq(
#'   efc$e15relat, type = "dot", show.ci = TRUE, sort.frq = "desc",
#'   coord.flip = TRUE, expand.grid = TRUE, vjust = "bottom", hjust = "left"
#' )
#'
#' # histogram with overlayed normal curve
#' plot_frq(efc$c160age, type = "h", show.mean = TRUE, show.mean.val = TRUE,
#'         normal.curve = TRUE, show.sd = TRUE, normal.curve.color = "blue",
#'         normal.curve.size = 3, ylim = c(0,50))
#' }
#' @import ggplot2
#' @importFrom sjmisc group_labels group_var to_value frq
#' @importFrom sjlabelled set_labels drop_labels
#' @importFrom stats na.omit sd weighted.mean dnorm
#' @importFrom rlang .data
#' @export
plot_frq <- function(data,
                     ...,
                    title = "",
                    weight.by = NULL,
                    title.wtd.suffix = NULL,
                    sort.frq = c("none", "asc", "desc"),
                    type = c("bar", "dot", "histogram", "line", "density", "boxplot", "violin"),
                    geom.size = NULL,
                    geom.colors = "#336699",
                    errorbar.color = "darkred",
                    axis.title = NULL,
                    axis.labels = NULL,
                    xlim = NULL,
                    ylim = NULL,
                    wrap.title = 50,
                    wrap.labels = 20,
                    grid.breaks = NULL,
                    expand.grid = FALSE,
                    show.values = TRUE,
                    show.n = TRUE,
                    show.prc = TRUE,
                    show.axis.values = TRUE,
                    show.ci = FALSE,
                    show.na = FALSE,
                    show.mean = FALSE,
                    show.mean.val = TRUE,
                    show.sd = TRUE,
                    drop.empty = TRUE,
                    mean.line.type = 2,
                    mean.line.size = 0.5,
                    inner.box.width = 0.15,
                    inner.box.dotsize = 3,
                    normal.curve = FALSE,
                    normal.curve.color = "red",
                    normal.curve.size = 0.8,
                    normal.curve.alpha = 0.4,
                    auto.group = NULL,
                    coord.flip = FALSE,
                    vjust = "bottom",
                    hjust = "center",
                    y.offset = NULL) {

  # Match arguments -----
  type <- match.arg(type)
  sort.frq <- match.arg(sort.frq)

  plot_data  <- get_dplyr_dot_data(data, dplyr::quos(...))

  if (!is.data.frame(plot_data)) {
    plot_data <- data.frame(plot_data, stringsAsFactors = FALSE)
    colnames(plot_data) <- deparse(substitute(data))
  }

  pl <- NULL

  if (inherits(plot_data, "grouped_df")) {
    # get grouped data
    grps <- get_grouped_data(plot_data)

    # now plot everything
    for (i in seq_len(nrow(grps))) {
      # copy back labels to grouped data frame
      tmp <- sjlabelled::copy_labels(grps$data[[i]], data)

      # prepare argument list, including title
      tmp.title <- get_grouped_plottitle(plot_data, grps, i, sep = "\n")

      # plot

      plots <- lapply(colnames(tmp), function(.d) {
        plot_frq_helper(
          var.cnt = tmp[[.d]], title = tmp.title, weight.by = weight.by, title.wtd.suffix, sort.frq, type, geom.size, geom.colors,
          errorbar.color, axis.title, axis.labels, xlim, ylim, wrap.title, wrap.labels, grid.breaks,
          expand.grid, show.values, show.n, show.prc, show.axis.values, show.ci, show.na,
          show.mean, show.mean.val, show.sd, drop.empty, mean.line.type, mean.line.size,
          inner.box.width, inner.box.dotsize, normal.curve, normal.curve.color,
          normal.curve.size, normal.curve.alpha, auto.group, coord.flip, vjust,
          hjust, y.offset, var.name = .d
        )
      })

      # add plots, check for NULL results
      pl <- c(pl, plots)
    }
  } else {
    pl <- lapply(colnames(plot_data), function(.d) {
      plot_frq_helper(
        var.cnt = plot_data[[.d]], title, weight.by = weight.by, title.wtd.suffix, sort.frq, type, geom.size, geom.colors,
        errorbar.color, axis.title, axis.labels, xlim, ylim, wrap.title, wrap.labels, grid.breaks,
        expand.grid, show.values, show.n, show.prc, show.axis.values, show.ci, show.na,
        show.mean, show.mean.val, show.sd, drop.empty, mean.line.type, mean.line.size,
        inner.box.width, inner.box.dotsize, normal.curve, normal.curve.color,
        normal.curve.size, normal.curve.alpha, auto.group, coord.flip, vjust,
        hjust, y.offset, var.name = .d
      )
    })

    if (length(pl) == 1) pl <- pl[[1]]
  }

  pl
}


plot_frq_helper <- function(
  var.cnt, title, weight.by, title.wtd.suffix, sort.frq, type, geom.size, geom.colors,
  errorbar.color, axis.title, axis.labels, xlim, ylim, wrap.title, wrap.labels, grid.breaks,
  expand.grid, show.values, show.n, show.prc, show.axis.values, show.ci, show.na,
  show.mean, show.mean.val, show.sd, drop.empty, mean.line.type, mean.line.size,
  inner.box.width, inner.box.dotsize, normal.curve, normal.curve.color,
  normal.curve.size, normal.curve.alpha, auto.group, coord.flip, vjust,
  hjust, y.offset, var.name = NULL) {

  # remove empty value-labels
  if (drop.empty) {
    var.cnt <- sjlabelled::drop_labels(var.cnt)
  }


  # try to find some useful default offsets for textlabels,
  # depending on plot range and flipped coordinates
  if (is.null(y.offset)) {
    # get maximum y-pos
    y.offset <- ceiling(max(table(var.cnt)) / 100)
    if (coord.flip) {
      if (missing(vjust)) vjust <- "center"
      if (missing(hjust)) hjust <- "bottom"
      if (hjust == "bottom")
        y_offset <- y.offset
      else if (hjust == "top")
        y_offset <- -y.offset
      else
        y_offset <- 0
    } else {
      if (vjust == "bottom")
        y_offset <- y.offset
      else if (vjust == "top")
        y_offset <- -y.offset
      else
        y_offset <- 0
    }
  } else {
    y_offset <- y.offset
  }

  if (is.null(axis.title)) axis.title <- sjlabelled::get_label(var.cnt, def.value = var.name)
  if (is.null(title)) title <- sjlabelled::get_label(var.cnt, def.value = var.name)

  # remove titles if empty
  if (!is.null(axis.title) && axis.title == "") axis.title <- NULL
  if (!is.null(title) && title == "") title <- NULL

  # check color argument
  if (length(geom.colors) > 1) geom.colors <- geom.colors[1]

  # default grid-expansion
  if (isTRUE(expand.grid) || (missing(expand.grid) && type == "histogram")) {
    expand.grid <- waiver()
  } else {
    expand.grid <- c(0, 0)
  }

  # for histograms or density plots...
  xv <- sjmisc::to_value(stats::na.omit(var.cnt))

  # check for nice bin-width defaults
  if (type %in% c("histogram", "density") &&
      !is.null(geom.size) &&
      geom.size < round(diff(range(xv)) / 40))
    message("Using very small binwidth. Consider adjusting `geom.size` argument.")

  # create second data frame
  hist.dat <- data.frame(xv)

  # check default geom.size -----
  if (is.null(geom.size)) {
    geom.size <- dplyr::case_when(
      type == "bar" ~ .7,
      type == "dot" ~ 2.5,
      type == "density" ~ ceiling(diff(range(xv)) / 40),
      type == "histogram" ~ ceiling(diff(range(xv)) / 40),
      type == "line" ~ .8,
      type == "boxplot" ~ .3,
      type == "violin" ~ .3,
      TRUE ~ .7
    )
  }

  # check whether variable should be auto-grouped -----
  if (!is.null(auto.group) && length(unique(var.cnt)) >= auto.group) {
    message(sprintf(
      "`%s` has %i unique values and was grouped...",
      var.name,
      length(unique(var.cnt))
    ))
  }

  if (!is.null(weight.by)) {
    dat <- data.frame(
      var.cnt = var.cnt,
      weight.by = weight.by,
      stringsAsFactors = FALSE
    )
  } else {
    dat <- data.frame(
      var.cnt = var.cnt,
      stringsAsFactors = FALSE
    )
  }

  # create frequency data frame -----
  df.frq <- suppressMessages(sjmisc::frq(
    x = dat,
    "var.cnt",
    sort.frq = sort.frq,
    weights = "weight.by",
    auto.grp = auto.group,
    show.na = show.na
  ))

  mydat <- df.frq[[1]]
  # remove empty
  if (drop.empty) mydat <- mydat[mydat$frq > 0, ]

  # add confindence intervals for frequencies
  total_n = sum(mydat$frq)
  rel_frq <- as.numeric(mydat$frq / total_n)
  ci <- 1.96 * suppressWarnings(sqrt(rel_frq * (1 - rel_frq) / total_n))
  mydat$upper.ci <- total_n * (rel_frq + ci)
  mydat$lower.ci <- total_n * (rel_frq - ci)
  mydat$rel.upper.ci <- rel_frq + ci
  mydat$rel.lower.ci <- rel_frq - ci

  # any labels detected?
  if (!is.null(mydat$label) && is.null(axis.labels) && !all(stats::na.omit(mydat$label) == "<none>"))
    axis.labels <- mydat$label
  else if (is.null(axis.labels))
    axis.labels <- mydat$val

  # wrap labels
  axis.labels <- sjmisc::word_wrap(axis.labels, wrap.labels)

  # define text label position
  if (show.ci)
    mydat$label.pos <- mydat$upper.ci
  else
    mydat$label.pos <- mydat$frq

  # Trim labels and title to appropriate size -----
  # check length of diagram title and split longer string into new lines
  # every 50 chars
  if (!is.null(title)) {
    # if we have weighted values, say that in diagram's title
    if (!is.null(title.wtd.suffix)) title <- paste(title, title.wtd.suffix, sep = "")
    title <- sjmisc::word_wrap(title, wrap.title)
  }
  # check length of x-axis title and split longer string into new lines
  # every 50 chars
  if (!is.null(axis.title)) axis.title <- sjmisc::word_wrap(axis.title, wrap.title)

  # count variable may not be a factor!
  if (is.factor(var.cnt) || is.character(var.cnt)) {
    var.cnt <- sjmisc::to_value(var.cnt, keep.labels = F)
  }

  # If we have a histogram, caluclate means of groups
  if (is.null(weight.by)) {
    mittelwert <- mean(var.cnt, na.rm = TRUE)
    stddev <- stats::sd(var.cnt, na.rm = TRUE)
  } else {
    mittelwert <- stats::weighted.mean(var.cnt, weight.by, na.rm = TRUE)
    stddev <- datawizard::weighted_sd(var.cnt, weights = weight.by)
  }

  # If we have boxplots, use different data frame structure
  if (type == "boxplot" || type == "violin") {
    mydat <- stats::na.omit(data.frame(cbind(
      grp = 1,
      frq = var.cnt,
      val = var.cnt
    )))
    mydat$grp <- as.factor(mydat$grp)
  }

  # Prepare bar charts
  trimViolin <- FALSE
  lower_lim <- 0

  # calculate upper y-axis-range
  # if we have a fixed value, use this one here
  if (!is.null(ylim) && length(ylim) == 2) {
    lower_lim <- ylim[1]
    upper_lim <- ylim[2]
  } else {
    # if we have boxplots, we have different ranges, so we can adjust
    # the y axis
    if (type == "boxplot" || type == "violin") {
      # use an extra standard-deviation as limits for the y-axis when we have boxplots
      lower_lim <- min(var.cnt, na.rm = TRUE) - floor(stats::sd(var.cnt, na.rm = TRUE))
      upper_lim <- max(var.cnt, na.rm = TRUE) + ceiling(stats::sd(var.cnt, na.rm = TRUE))
      # make sure that the y-axis is not below zero
      if (lower_lim < 0) {
        lower_lim <- 0
        trimViolin <- TRUE
      }
    } else if (type == "histogram") {
      # what is the maximum values after binning for histograms?
      hist.grp.cnt <- ceiling(diff(range(var.cnt, na.rm = T)) / geom.size)
      # ... or the amount of max. answers per category
      # add 10% margin to upper limit
      upper_lim <- max(pretty(table(
        sjmisc::group_var(
          var.cnt,
          size = "auto",
          n = hist.grp.cnt,
          append = FALSE
        )
      ) * 1.1))
    } else {
      if (show.ci)
        upper_lim <- max(pretty(mydat$upper.ci * 1.1))
      else
        upper_lim <- max(pretty(mydat$frq * 1.1))
    }
  }

  # If we want to include NA, use raw percentages as valid percentages
  if (show.na) mydat$valid.prc <- mydat$raw.prc

  # don't display value labels when we have boxplots or violin plots
  if (type == "boxplot" || type == "violin") show.values <- FALSE
  if (show.values) {
    # here we have counts and percentages
    if (show.prc && show.n) {
      if (coord.flip) {
        ggvaluelabels <-
          geom_text(
            label = sprintf("%i (%.01f%%)", mydat$frq, mydat$valid.prc),
            hjust = hjust,
            vjust = vjust,
            aes(y = .data$label.pos + y_offset)
          )
      } else {
        ggvaluelabels <-
          geom_text(
            label = sprintf("%i\n(%.01f%%)", mydat$frq, mydat$valid.prc),
            hjust = hjust,
            vjust = vjust,
            aes(y = .data$label.pos + y_offset)
          )
      }
    } else if (show.n) {
      # here we have counts, without percentages
      ggvaluelabels <-  geom_text(
        label = sprintf("%i", mydat$frq),
        hjust = hjust,
        vjust = vjust,
        aes(y = .data$label.pos + y_offset)
      )
    } else if (show.prc) {
      # here we have counts, without percentages
      ggvaluelabels <-
        geom_text(
          label = sprintf("%.01f%%", mydat$valid.prc),
          hjust = hjust,
          vjust = vjust,
          aes(y = .data$label.pos + y_offset)
        )
    } else {
      # no labels
      ggvaluelabels <-  geom_text(aes(y = .data$frq), label = "")
    }
  } else {
    # no labels
    ggvaluelabels <-  geom_text(aes(y = .data$frq), label = "")
  }

  # Set up grid breaks
  maxx <- if (is.numeric(mydat$val))
    max(mydat$val) + 1
  else
    nrow(mydat)

  if (is.null(grid.breaks)) {
    gridbreaks <- waiver()
    histgridbreaks <- waiver()
  } else {
    gridbreaks <- c(seq(lower_lim, upper_lim, by = grid.breaks))
    histgridbreaks <- c(seq(lower_lim, maxx, by = grid.breaks))
  }

  # set Y-axis, depending on the calculated upper y-range.
  # It either corresponds to the maximum amount of cases in the data set
  # (length of var) or to the highest count of var's categories.
  if (show.axis.values) {
    yscale <- scale_y_continuous(
      limits = c(lower_lim, upper_lim),
      expand = expand.grid,
      breaks = gridbreaks
    )
  } else {
    yscale <- scale_y_continuous(
      limits = c(lower_lim, upper_lim),
      expand = expand.grid,
      breaks = gridbreaks,
      labels = NULL
    )
  }

  # bar and dot plot start here! -----
  if (type == "bar" || type == "dot") {
    # define geom
    if (type == "bar") {
      geob <- geom_bar(stat = "identity", width = geom.size, fill = geom.colors)
    } else if (type == "dot") {
      geob <- geom_point(size = geom.size, colour = geom.colors)
    }

    # as factor, but preserve order
    mydat$val <- factor(mydat$val, levels = unique(mydat$val))

    # mydat is a data frame that only contains one variable (var).
    # Must be declared as factor, so the bars are central aligned to
    # each x-axis-break.
    baseplot <- ggplot(mydat, aes(x = .data$val, y = .data$frq)) +
      geob +
      yscale +
      # remove guide / legend
      guides(fill = "none") +
      # show absolute and percentage value of each bar.
      ggvaluelabels +
      # print value labels to the x-axis.
      # If argument "axis.labels" is NULL, the category numbers (1 to ...)
      # appear on the x-axis
      scale_x_discrete(labels = axis.labels)

    # add error bars
    if (show.ci) {
      ebcol <- ifelse(type == "dot", geom.colors, errorbar.color)
      # print confidence intervalls (error bars)
      baseplot <- baseplot +
        geom_errorbar(aes_string(ymin = "lower.ci", ymax = "upper.ci"), colour = ebcol, width = 0)
    }

    # check whether coordinates should be flipped, i.e.
    # swap x and y axis
    if (coord.flip) baseplot <- baseplot + coord_flip()

  # Start box plot here -----
  } else if (type == "boxplot" || type == "violin") {
    # setup base plot
    baseplot <- ggplot(mydat, aes_string(x = "grp", y = "frq"))
    # and x-axis
    scalex <- scale_x_discrete(labels = "")
    if (type == "boxplot") {
      baseplot <- baseplot +
        geom_boxplot(width = geom.size, fill = geom.colors, notch = show.ci)
    } else {
      baseplot <- baseplot +
        geom_violin(trim = trimViolin, width = geom.size, fill = geom.colors)
      # if we have a violin plot, add an additional boxplot inside to show
      # more information
      if (show.ci) {
        baseplot <- baseplot +
          geom_boxplot(width = inner.box.width, fill = "white", notch = TRUE)
      } else {
        baseplot <- baseplot +
          geom_boxplot(width = inner.box.width, fill = "white")
      }
    }

    # if we have boxplots or violon plots, also add a point that indicates
    # the mean value
    # different fill colours, because violin boxplots have white background
    fcsp <- ifelse(type == "boxplot", "white", "black")
    baseplot <- baseplot +
      stat_summary(fun = "mean", geom = "point", shape = 21,
                   size = inner.box.dotsize, fill = fcsp)
    # no additional labels for the x- and y-axis, only diagram title
    baseplot <- baseplot + yscale + scalex

  # Start density plot here -----
  } else if (type == "density") {
    # First, plot histogram with density curve
    baseplot <- ggplot(hist.dat, aes(x = .data$xv)) +
      geom_histogram(aes(y = stat(density)), binwidth = geom.size, fill = geom.colors) +
      # transparent density curve above bars
      geom_density(aes(y = stat(density)), fill = "cornsilk", alpha = 0.3) +
      # remove margins from left and right diagram side
      scale_x_continuous(expand = expand.grid, breaks = histgridbreaks, limits = xlim)

    # check whether user wants to overlay the histogram
    # with a normal curve
    if (normal.curve) {
      baseplot <- baseplot +
        stat_function(
          fun = dnorm,
          args = list(
            mean = mean(hist.dat$xv),
            sd = stats::sd(hist.dat$xv)
          ),
          colour = normal.curve.color,
          size = normal.curve.size,
          alpha = normal.curve.alpha
        )
    }
  } else {
    # Since the density curve shows no absolute numbers (counts) on the
    # y-axis, have also the opportunity to plot "real" histrograms with
    # counts on the y-axis
    if (type == "histogram") {
      # original data needed for normal curve
      baseplot <- ggplot(mydat) +
        # second data frame mapped to the histogram geom
        geom_histogram(data = hist.dat, aes(x = .data$xv), binwidth = geom.size, fill = geom.colors)
    } else {
      baseplot <- ggplot(mydat, aes(x = .data$val, y = .data$frq)) +
        geom_area(alpha = 0.3) +
        geom_line(linewidth = geom.size, colour = geom.colors) +
        ggvaluelabels
    }
    # check whether user wants to overlay the histogram
    # with a normal curve
    if (normal.curve) {
      baseplot <- baseplot +
        stat_function(
          fun = function(xx, mean, sd, n) {
            n * stats::dnorm(x = xx, mean = mean, sd = sd)
          },
          args = with(mydat, c(
            mean = mittelwert,
            sd = stddev,
            n = length(var.cnt)
          )),
          colour = normal.curve.color,
          size = normal.curve.size,
          alpha = normal.curve.alpha
        )
    }
    # if we have a histogram, add mean-lines
    if (show.mean) {
      baseplot <- baseplot +
        # vertical lines indicating the mean
        geom_vline(xintercept = mittelwert, linetype = mean.line.type, size = mean.line.size)
      # check whether meanvalue should be shown.
      if (show.mean.val) {
        baseplot <- baseplot +
          # use annotation instead of geomtext, because we need mean value only printed once
          annotate(
            "text",
            x = mittelwert,
            y = upper_lim,
            parse = TRUE,
            label = paste(
              "italic(bar(x)) == ",
              round(mittelwert, 1),
              "~~italic(s) == ",
              round(stddev, 1)
            ),
            vjust = "top",
            hjust = "top"
          )
      }
      # check whether the user wants to plot standard deviation area
      if (show.sd) {
        baseplot <- baseplot +
          # first draw shaded rectangle. these are by default in grey colour with very high transparancy
          annotate("rect",
                   xmin = mittelwert - stddev,
                   xmax = mittelwert + stddev,
                   ymin = 0,
                   ymax = c(upper_lim),
                   fill = "grey70",
                   alpha = 0.2) +
          # draw border-lines for shaded rectangle
          geom_vline(xintercept = mittelwert - stddev,
                     linetype = 3,
                     size = mean.line.size,
                     alpha = 0.7) +
          geom_vline(xintercept = mittelwert + stddev,
                     linetype = 3,
                     size = mean.line.size,
                     alpha = 0.7)
      }
    }

    # show absolute and percentage value of each bar.
    baseplot <- baseplot + yscale +
      # continuous x-scale for histograms
      scale_x_continuous(limits = xlim, expand = expand.grid, breaks = histgridbreaks)
  }

  # set axes text and
  baseplot <- baseplot + labs(title = title, x = axis.title, y = NULL)

  # Check whether ggplot object should be returned or plotted
  baseplot
}