1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227
|
# Class that implements the standard NMF model
#
# Author: Renaud Gaujoux \email{renaud@@cbio.uct.ac.za}
###############################################################################
#' @include NMF-class.R
NULL
#' NMF Model - Standard model
#'
#' This class implements the standard model of Nonnegative Matrix
#' Factorization.
#' It provides a general structure and generic functions to manage
#' factorizations that follow the standard NMF model, as defined by
#' \cite{Lee2001}.
#'
#' Let \eqn{V} be a \eqn{n \times m} non-negative matrix and \eqn{r} a positive
#' integer. In its standard form (see references below), a NMF of \eqn{V} is
#' commonly defined as a pair of matrices \eqn{(W, H)} such that:
#'
#' \deqn{V \equiv W H,}
#'
#' where:
#' \itemize{
#' \item \eqn{W} and \eqn{H} are \eqn{n \times r} and \eqn{r
#' \times m} matrices respectively with non-negative entries;
#' \item \eqn{\equiv} is to be understood with respect to some loss function.
#' Common choices of loss functions are based on Frobenius norm or Kullback-Leibler
#' divergence.
#' }
#'
#' Integer \eqn{r} is called the \emph{factorization rank}.
#' Depending on the context of application of NMF, the columns of \eqn{W}
#' and \eqn{H} are given different names:
#' \describe{
#' \item{columns of \code{W}}{basis vector, metagenes, factors, source, image basis}
#' \item{columns of \code{H}}{mixture coefficients, metagene sample expression profiles, weights}
#' \item{rows of \code{H}}{basis profiles, metagene expression profiles}
#' }
#'
#' NMF approaches have been successfully applied to several fields.
#' The package NMF was implemented trying to use names as generic as possible
#' for objects and methods.
#'
#' The following terminology is used:
#' \describe{
#' \item{samples}{the columns of the target matrix \eqn{V}}
#' \item{features}{the rows of the target matrix \eqn{V}}
#' \item{basis matrix}{the first matrix factor \eqn{W}}
#' \item{basis vectors}{the columns of first matrix factor \eqn{W}}
#' \item{mixture matrix}{the second matrix factor \eqn{H}} \item{mixtures
#' coefficients}{the columns of second matrix factor \eqn{H}}
#' }
#'
#' However, because the package NMF was primarily implemented to work with gene
#' expression microarray data, it also provides a layer to easily and
#' intuitively work with objects from the Bioconductor base framework.
#' See \link{bioc-NMF} for more details.
#'
#' @slot W A \code{matrix} that contains the basis matrix, i.e. the \emph{first}
#' matrix factor of the factorisation
#' @slot H A \code{matrix} that contains the coefficient matrix, i.e. the
#' \emph{second} matrix factor of the factorisation
#' @slot bterms a \code{data.frame} that contains the primary data that
#' define fixed basis terms. See \code{\link{bterms}}.
#' @slot ibterms integer vector that contains the indexes of the basis components
#' that are fixed, i.e. for which only the coefficient are estimated.
#'
#' IMPORTANT: This slot is set on construction of an NMF model via
#' \code{\link[=nmfModel,formula,ANY-method]{nmfModel}} and is not recommended to
#' not be subsequently changed by the end-user.
#' @slot cterms a \code{data.frame} that contains the primary data that
#' define fixed coefficient terms. See \code{\link{cterms}}.
#' @slot icterms integer vector that contains the indexes of the basis components
#' that have fixed coefficients, i.e. for which only the basis vectors are estimated.
#'
#' IMPORTANT: This slot is set on construction of an NMF model via
#' \code{\link[=nmfModel,formula,ANY-method]{nmfModel}} and is not recommended to
#' not be subsequently changed by the end-user.
#'
#' @export
#' @family NMF-model
#' @examples
#' # create a completely empty NMFstd object
#' new('NMFstd')
#'
#' # create a NMF object based on one random matrix: the missing matrix is deduced
#' # Note this only works when using factory method NMF
#' n <- 50; r <- 3;
#' w <- rmatrix(n, r)
#' nmfModel(W=w)
#'
#' # create a NMF object based on random (compatible) matrices
#' p <- 20
#' h <- rmatrix(r, p)
#' nmfModel(W=w, H=h)
#'
#' # create a NMF object based on incompatible matrices: generate an error
#' h <- rmatrix(r+1, p)
#' try( new('NMFstd', W=w, H=h) )
#' try( nmfModel(w, h) )
#'
#' # Giving target dimensions to the factory method allow for coping with dimension
#' # incompatibilty (a warning is thrown in such case)
#' nmfModel(r, W=w, H=h)
#'
setClass('NMFstd'
, representation(
W = 'matrix' # basis matrix
, H = 'matrix' # mixture coefficients matrix
, bterms = 'data.frame' # fixed basis terms: nrow(bterms) = nrow(x)
, ibterms = 'integer' # index of the fixed basis terms
, cterms = 'data.frame' # fixed coef terms: ncol(cterms) = ncol(x)
, icterms = 'integer' # index of the fixed coefficient terms
)
, prototype = prototype(
W = matrix(as.numeric(NA), 0, 0),
H = matrix(as.numeric(NA), 0, 0)
)
, validity = function(object){
# dimension compatibility: W and H must be compatible for matrix multiplication
if( ncol(object@W) != nrow(object@H) ){
return(paste('Dimensions of W and H are not compatible [ncol(W)=', ncol(object@W) , '!= nrow(H)=', nrow(object@H), ']'))
}
# give a warning if the dimensions look strange: rank greater than the number of samples
if( !is.empty.nmf(object) && ncol(object@H) && ncol(object@W) > ncol(object@H) ){
warning(paste('Dimensions of W and H look strange [ncol(W)=', ncol(object@W) , '> ncol(H)=', ncol(object@H), ']'))
}
# everything went fine: return TRUE
return(TRUE)
}
, contains = 'NMF'
)
#' Get the basis matrix in standard NMF models
#'
#' This function returns slot \code{W} of \code{object}.
#'
#' @examples
#' # random standard NMF model
#' x <- rnmf(3, 10, 5)
#' basis(x)
#' coef(x)
#'
#' # set matrix factors
#' basis(x) <- matrix(1, nrow(x), nbasis(x))
#' coef(x) <- matrix(1, nbasis(x), ncol(x))
#' # set random factors
#' basis(x) <- rmatrix(basis(x))
#' coef(x) <- rmatrix(coef(x))
#'
#' # incompatible matrices generate an error:
#' try( coef(x) <- matrix(1, nbasis(x)-1, nrow(x)) )
#' # but the low-level method allow it
#' .coef(x) <- matrix(1, nbasis(x)-1, nrow(x))
#' try( validObject(x) )
#'
setMethod('.basis', 'NMFstd',
function(object){
object@W
}
)
#' Set the basis matrix in standard NMF models
#'
#' This function sets slot \code{W} of \code{object}.
setReplaceMethod('.basis', signature(object='NMFstd', value='matrix'),
function(object, value){
object@W <- value
object
}
)
#' Get the mixture coefficient matrix in standard NMF models
#'
#' This function returns slot \code{H} of \code{object}.
setMethod('.coef', 'NMFstd',
function(object){
object@H
}
)
#' Set the mixture coefficient matrix in standard NMF models
#'
#' This function sets slot \code{H} of \code{object}.
setReplaceMethod('.coef', signature(object='NMFstd', value='matrix'),
function(object, value){
object@H <- value
object
}
)
#' Compute the target matrix estimate in \emph{standard NMF models}.
#'
#' The estimate matrix is computed as the product of the two matrix slots
#' \code{W} and \code{H}:
#' \deqn{\hat{V} = W H}{V ~ W H}
#'
#' @param W a matrix to use in the computation as the basis matrix in place of
#' \code{basis(object)}.
#' It must be compatible with the coefficient matrix used
#' in the computation (i.e. number of columns in \code{W} = number of rows in \code{H}).
#' @param H a matrix to use in the computation as the coefficient matrix in place of
#' \code{coef(object)}.
#' It must be compatible with the basis matrix used
#' in the computation (i.e. number of rows in \code{H} = number of columns in \code{W}).
#'
#' @export
#' @inline
#'
#' @examples
#' # random standard NMF model
#' x <- rnmf(3, 10, 5)
#' all.equal(fitted(x), basis(x) %*% coef(x))
#'
#'
setMethod('fitted', signature(object='NMFstd'),
function(object, W, H, ...){
if( missing(W) ) W <- object@W
if( missing(H) ) H <- object@H
return(W %*% H)
}
)
|