File: mca.R

package info (click to toggle)
vr 7.2.12-1
  • links: PTS
  • area: main
  • in suites: sarge
  • size: 2,228 kB
  • ctags: 182
  • sloc: ansic: 2,393; makefile: 28; sh: 28
file content (96 lines) | stat: -rw-r--r-- 3,071 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
# file MASS/mca.q
# copyright (C) 1994-9 W. N. Venables and B. D. Ripley
#
mca <- function(df, nf = 2, abbrev = FALSE)
{
  class.ind <- function(cl)
  {
    n <- length(cl); cl <- as.factor(cl)
    x <- matrix(0, n, length(levels(cl)))
    x[(1:n) + n * (unclass(cl) - 1)] <- 1
    dimnames(x) <- list(names(cl), levels(cl))
    x
  }
  if(!all(unlist(lapply(df, is.factor))))
    stop("all variables must be factors")
  n <- nrow(df); p <- length(df)
  G <- as.matrix(do.call("data.frame", c(lapply(df, class.ind),
                                         check.names=FALSE)))
  Dc <- drop((rep(1, n)) %*% G)
  X <- t(t(G)/(sqrt(p*Dc)))
  X.svd <- svd(X)
  sec <- 1 + (1:nf)
  rs <- X %*% X.svd$v[, sec]/p
  cs <- diag(1/(sqrt(p*Dc))) %*% X.svd$v[, sec]
  fs <- X.svd$u[, sec]/rep(p*X.svd$d[sec], rep(n, nf))
  dimnames(rs) <- list(row.names(df), as.character(1:nf))
  dimnames(fs) <- dimnames(rs)
  varnames <- if(abbrev) unlist(lapply(df, levels))
              else colnames(G)
  dimnames(cs) <- list(varnames, as.character(1:nf))
  structure(list(rs=rs, cs=cs, fs=fs, d=X.svd$d[sec], p=p,
                 call=match.call()), class="mca")
}

print.mca <- function(x, ...)
{
  if(!is.null(cl <- x$call)) {
    cat("Call:\n")
    dput(cl)
  }
  cat("\nMultiple correspondence analysis of",
            nrow(x$rs), "cases of", x$p,
            "factors\n")
  cat("\nCorrelations", format(round(x$d,3), ...))
  p <- 100 * cumsum(x$d)/(x$p - 1)
  cat("  cumulative % explained", format(round(p,2), ...), "\n")
  invisible(x)
}

plot.mca <- function(x, rows = TRUE,
                     col, cex = par("cex"), ...)
{
  if(length(cex) == 1) cex <- rep(cex, 2)
  eqscplot(x$cs, type="n", xlab="", ...)
  if(missing(col)) {
    col <- par("col")
    if (!is.numeric(col)) col <- match(col, palette())
    col <- c(col, col + 1)
  } else if(length(col) != 2) col <- rep(col, length = 2)
  if(rows) text(x$rs, cex=cex[1], col=col[1])
  text(x$cs, labels=dimnames(x$cs)[[1]], cex=cex[2], col=col[2])
  invisible(x)
}

predict.mca <- function(object, newdata, type=c("row", "factor"), ...)
{
  class.ind <- function(cl)
  {
    n <- length(cl); cl <- as.factor(cl)
    x <- matrix(0, n, length(levels(cl)))
    x[(1:n) + n * (unclass(cl) - 1)] <- 1
    dimnames(x) <- list(names(cl), levels(cl))
    x
  }

  type <- match.arg(type)
  if(is.null(abbrev <- object$call$abbrev)) abbrev <- FALSE
  if(!all(unlist(lapply(newdata, is.factor))))
    stop("All variables must be factors")
  G <- as.matrix(do.call("data.frame", c(lapply(newdata, class.ind),
                                         check.names=FALSE)))
  if(abbrev) colnames(G) <- unlist(lapply(newdata, levels))
  if(type == "row") {
    # predict new row(s)
    if(!all(colnames(G) == dimnames(object$cs)[[1]]))
       stop("factors in newdata do not match those for fit")
    G %*% object$cs/object$p
  } else {
    # predict positions of new factor(s)
    n <- nrow(G)
    Dc <- drop((rep(1, n)) %*% G)
    if(n != nrow(object$fs))
      stop("newdata is not of the right length")
    (t(G)/Dc) %*% object$fs
  }
}