File: BoxCoxTrans.R

package info (click to toggle)
r-cran-caret 7.0-1%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 4,036 kB
  • sloc: ansic: 210; sh: 10; makefile: 2
file content (143 lines) | stat: -rw-r--r-- 5,498 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
#' Box-Cox and Exponential Transformations
#'
#' @name BoxCoxTrans
#' @aliases BoxCoxTrans.default BoxCoxTrans predict.BoxCoxTrans expoTrans.default expoTrans predict.expoTrans
#'
#' @description These classes can be used to estimate transformations and apply them to existing and future data
#'
#' @param y a numeric vector of data to be transformed. For \code{BoxCoxTrans}, the data must be strictly positive.
#' @param x an optional dependent variable to be used in a linear model.
#' @param fudge a tolerance value: lambda values within +/-fudge will be coerced to 0 and within 1+/-fudge will be coerced to 1.
#' @param numUnique how many unique values should \code{y} have to estimate the transformation?
#' @param na.rm a logical value indicating whether \code{NA} values should be stripped from \code{y} and \code{x} before the computation proceeds.
#' @param newdata a numeric vector of values to transform.
#' @param digits minimal number of \emph{significant digits}.
#' @param object an object of class \code{BoxCoxTrans} or \code{expoTrans}.
#' @param \dots for \code{BoxCoxTrans}: options to pass to \code{\link[MASS]{boxcox}}. \code{plotit} should not be passed through. For \code{predict.BoxCoxTrans}, additional arguments are ignored.
#'
#' @details
#' \code{BoxCoxTrans} function is basically a wrapper for the \code{\link[MASS]{boxcox}} function in the MASS library. It can be used to estimate the transformation and apply it to new data.
#'
#' \code{expoTrans} estimates the exponential transformation of Manly (1976) but assumes a common mean for
#' the data. The transformation parameter is estimated by directly maximizing the likelihood.
#'
#' If \code{any(y <= 0)} or  if \code{length(unique(y)) < numUnique}, lambda is not estimated and no
#' transformation is applied.
#'
#' @return
#' Both functions returns a list of class of either \code{BoxCoxTrans} or \code{expoTrans} with
#' elements
#' \item{lambda }{estimated transformation value}
#' \item{fudge }{value of \code{fudge}}
#' \item{n }{number of data points used to estimate lambda}
#' \item{summary }{the results of \code{summary(y)}}
#' \item{ratio }{\code{max(y)/min(y)}}
#' \item{skewness }{sample skewness statistic}
#' \code{BoxCoxTrans} also returns: \item{fudge }{value of \code{fudge}}
#'
#' The \code{predict} functions returns numeric vectors of transformed values
#'
#' @references Box, G. E. P. and Cox, D. R. (1964) An analysis of transformations (with discussion). Journal of the Royal Statistical Society B, 26, 211-252.
#' Manly, B. L. (1976) Exponential data transformations. The Statistician, 25, 37 - 42.
#'
#' @author Max Author
#'
#' @seealso \code{\link[MASS]{boxcox}}, \code{\link{preProcess}}, \code{\link{optim}}
#'
#' @examples
#' data(BloodBrain)
#'
#' ratio <- exp(logBBB)
#' bc <- BoxCoxTrans(ratio)
#' bc
#'
#' predict(bc, ratio[1:5])
#'
#' ratio[5] <- NA
#' bc2 <- BoxCoxTrans(ratio, bbbDescr$tpsa, na.rm = TRUE)
#' bc2
#'
#' manly <- expoTrans(ratio)
#' manly
#'
#' @keywords utilities
#'
#' @export
BoxCoxTrans <- function(y, ...) UseMethod("BoxCoxTrans")


##TODO add an epsilon?
## TODO add exclusion list to preProc?
#' @rdname BoxCoxTrans
#' @export
BoxCoxTrans.default <- function(y, x = rep(1, length(y)), fudge = .2, numUnique = 3,  na.rm = FALSE, ...) {
  requireNamespaceQuietStop("MASS")
  requireNamespaceQuietStop("e1071")
  if(na.rm && (any(is.na(y)) | any(is.na(x)))) {
    rmv <- is.na(y) | is.na(x)
    y <- y[!rmv]
    x <- x[!rmv]
  }
  if(!is.numeric(y) | is.factor(y) | is.character(y)) stop("y must be numeric")

  if(any(y <= 0) | length(unique(y)) < numUnique) {
    out <- list(lambda = NA,
                summary = summary(y),
                ratio = NA,
                n = length(y))
  } else {
    bc <- MASS::boxcox(y~x, plotit = FALSE, ...)
    out <- list(lambda = bc$x[which.max(bc$y)])
  }
  out$fudge <- fudge
  out$n <- length(y)
  out$summary <- summary(y)
  out$ratio <- max(y)/min(y)
  out$skewness <- e1071::skewness(y)
  class(out) <- "BoxCoxTrans"
  out
}


#' @rdname BoxCoxTrans
#' @method print BoxCoxTrans
#' @export
print.BoxCoxTrans <- function(x, newdata, digits = 3, ...){
  cat("Box-Cox Transformation\n\n")

  cat(x$n, "data points used to estimate Lambda\n\n")
  cat("Input data summary:\n")
  print(x$summary)
  if(!is.na(x$lambda)) {
    cat("\nLargest/Smallest:", signif(x$ratio, digits), "\n")
    cat("Sample Skewness:", signif(x$skewness, digits), "\n")
    cat("\nEstimated Lambda:", signif(x$lambda, digits), "\n")
    if(x$lambda < x$fudge & x$lambda > -x$fudge)
      cat("With fudge factor, Lambda = 0 will be used for transformations\n")
    if(x$lambda < 1+x$fudge & x$lambda > 1-x$fudge)
      cat("With fudge factor, no transformation is applied\n")
  } else cat("\nLambda could not be estimated; no transformation is applied\n")
  cat("\n")
  invisible(x)
}


#' @rdname BoxCoxTrans
#' @method predict BoxCoxTrans
#' @export
predict.BoxCoxTrans <- function(object, newdata, ...) {
  if(!is.vector(newdata) || !is.numeric(newdata)) stop("newdata should be a numeric vector")
  if(is.na(object$lambda))  {
    out <- newdata
  } else {
    if(object$lambda < object$fudge & object$lambda > -object$fudge)  {
      if(any(newdata[!is.na(newdata)] <= 0)) warning("newdata should have values > 0")
      out <- log(newdata)
    } else {
      if(object$lambda < 1+object$fudge & object$lambda > 1-object$fudge) {
        out <- newdata
      } else out <- (newdata^object$lambda - 1)/object$lambda
    }
  }
  out
}