File: rnmf.R

package info (click to toggle)
r-cran-nmf 0.23.0-1
  • links: PTS, VCS
  • area: main
  • in suites: bullseye, sid
  • size: 3,344 kB
  • sloc: cpp: 680; ansic: 7; makefile: 2
file content (368 lines) | stat: -rw-r--r-- 13,669 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
# Generation of Random NMF Models
# 
# Author: Renaud Gaujoux
# Creation: 03 Jul 2012
###############################################################################

#' @include nmfModel.R
NULL

.rnmf_fixed <- oneoffVariable('none')

#' Generates a random NMF model of the same class and rank as another NMF model.
#' 
#' This is the workhorse method that is eventually called by all other methods.
#' It generates an NMF model of the same class and rank as \code{x}, compatible with the 
#' dimensions specified in \code{target}, that can be a single or 2-length 
#' numeric vector, to specify a square or rectangular target matrix respectively.
#' 
#' The second dimension can also be passed via argument \code{ncol}, so that 
#' calling \code{rnmf(x, 20, 10, ...)} is equivalent to \code{rnmf(x, c(20, 10), ...)}, 
#' but easier to write.
#' 
#' The entries are uniformly drawn between \code{0} and \code{max} 
#' (optionally specified in \code{...}) that defaults to 1.
#' 
#' By default the dimnames of \code{x} are set on the returned NMF model. 
#' This behaviour is disabled with argument \code{keep.names=FALSE}. 
#' See \code{\link{nmfModel}}.
#' 
#' @param ncol single numeric value that specifies the number of columns of the
#' coefficient matrix. Only used when \code{target} is a single numeric value.
#' @param keep.names a logical that indicates if the dimension names of the 
#' original NMF object \code{x} should be conserved (\code{TRUE}) or discarded
#' (\code{FALSE}).
#' @param dist specification of the random distribution to use to draw the entries 
#' of the basis and coefficient matrices.
#' It may be specified as:
#' \itemize{
#' 
#' \item a \code{function} which must be a distribution function such as e.g. 
#' \code{\link{runif}} that is used to draw the entries of both the basis and 
#' coefficient matrices. It is passed in the \code{dist} argument of 
#' \code{\link{rmatrix}}.
#' 
#' \item a \code{list} of arguments that are passed internally to \code{\link{rmatrix}}, 
#' via \code{do.call('rmatrix', dist)}.   
#' 
#' \item a \code{character} string that is partially matched to \sQuote{basis} or 
#' \sQuote{coef}, that specifies which matrix in should be drawn randomly, the 
#' other remaining as in \code{x} -- unchanged.
#' 
#' \item a \code{list} with elements \sQuote{basis} and/or \sQuote{coef}, which 
#' specify the \code{dist} argument separately for the basis and coefficient 
#' matrix respectively.
#' 
#' These elements may be either a distribution function, or a list of arguments that  
#' are passed internally to \code{\link{rmatrix}}, via 
#' \code{do.call('rmatrix', dist$basis)} 
#' or \code{do.call('rmatrix', dist$coef)}.   
#' }
#' 
#' @inline
#' @examples 
#' 
#' ## random NMF of same class and rank as another model
#' 
#' x <- nmfModel(3, 10, 5)
#' x
#' rnmf(x, 20) # square
#' rnmf(x, 20, 13)
#' rnmf(x, c(20, 13))
#' 
#' # using another distribution
#' rnmf(x, 20, dist=rnorm) 
#' 
#' # other than standard model
#' y <- rnmf(3, 50, 10, model='NMFns')
#' y
#' \dontshow{ stopifnot( identical(dim(y), c(50L,10L,3L)) ) }
#' \dontshow{ stopifnot( is(y, 'NMFns') ) }
#' 
setMethod('rnmf', signature(x='NMF', target='numeric'), 
	function(x, target, ncol=NULL, keep.names=TRUE, dist=runif){
		
		# store original dimnames
		if( keep.names ) dn <- dimnames(x)
		
		# valid parameter 'target'
		if( length(target) != 1 && length(target) != 2 )
			stop('NMF::rnmf - invalid target dimensions [length must be 1 or 2. Here length = ', length(target) ,']')
		if( any(is.na(target)) ) 
			stop('NMF::rnmf - invalid target dimensions [NA values in element(s): ', paste(which(is.na(target)), collapse=' and '), ']')		
		# shortcut for symetric case: provide only one dimension
		if( length(target) == 1L ){
			ncol <- if( !is.null(ncol) ){
				if( !is.numeric(ncol) || length(ncol) != 1 || is.na(ncol) )
					stop("NMF::rnmf - invalid argument `ncol`: must be a single numeric value")
				ncol
			}else target
			target <- c(target, ncol)
		}
		
		# retrieve dimension of the target matrix
		n <- target[1]; m <- target[2];
		# retrieve the factorization rank					
		r <- nbasis(x)
		
		## draw basis and coef matrices
		# interpret argument dist
		if( length(dist) == 0L ) dist <- runif
		if( is.character(dist) ){
			dist <- match.arg(dist, c('basis', 'coef'))
			dist <- setNames(list(runif), dist)
		}
		if( is.function(dist) ){
			dist <- list(basis = list(x=n, y=r, dist=dist)
					, coef = list(x=r, y=m, dist=dist))
		}else if( is.list(dist) ){
			if( !all(names(dist) %in% c('basis', 'coef')) ){
				dist <- list(basis=c(list(x=n, y=r), dist)
							, coef=c(list(x=r, y=m), dist))
			}else{
				if( !is.null(dist$basis) )
					dist$basis <- c(list(x=n, y=r), dist$basis)
				if( !is.null(dist$coef) )
					dist$coef <- c(list(x=r, y=m), dist$coef)
			}
		}
		
		fixed <- .rnmf_fixed()
		#Vc# Initialize random matrix: W
		# NB: this will keep the values of fixed basis terms
		if( !is.null(dist$basis) && !('basis' %in% fixed) ){
			basis(x) <- do.call('rmatrix', dist$basis);
		}
		#Vc# Initialize random matrix: H
		# NB: this will keep the values of fixed coef terms
		if( !is.null(dist$coef) && !('coef' %in% fixed) ){
			coef(x) <- do.call('rmatrix', dist$coef);
		}
		
		# if one needs to keep the names (possibly or reducing/increasing) 
		if( keep.names && !is.null(dn) )
			dimnames(x) <- list(dn[[1]][1:n], dn[[2]][1:m], dn[[3]][1:r])
		
		# return the modified object
		x
	}
)

#' Generates a random NMF model compatible and consistent with a target matrix.
#' 
#' The entries are uniformly drawn between \code{0} and \code{max(target)}.
#' It is more or less a shortcut for:
#' \samp{ rnmf(x, dim(target), max=max(target), ...)} 
#' 
#' It returns an NMF model of the same class as \code{x}.
#' 
#' @param use.dimnames a logical that indicates whether the dimnames of the 
#' target matrix should be set on the returned NMF model. 
#' 
#' @inline
#' 
#' @examples
#' # random NMF compatible with a target matrix
#' x <- nmfModel(3, 10, 5)
#' y <- rmatrix(20, 13)
#' rnmf(x, y) # rank of x
#' rnmf(2, y) # rank 2
#' 
setMethod('rnmf', signature(x='ANY', target='matrix'), 
	function(x, target, ..., dist=list(max=max(max(target, na.rm=TRUE), 1)), use.dimnames=TRUE){	
				
		# build a random NMF with the dimensions of the target matrix upper-bounded by the target's maximum entry.
		res <- rnmf(x, dim(target), ..., dist=dist)
		# compute the upper-bound of the random entries and enforce it if possible
		no.na <- abs(target[!is.na(target)])
		if( length(no.na) > 0 ){
			m <- max(no.na)
			basis(res) <- pmin(basis(res), m)
			coef(res) <- pmin(coef(res), m)
		}
		
		# set the dimnames from the target matrix if necessary
		if( use.dimnames )
			dimnames(res) <- dimnames(target)
		
		# return result
		res
	}
)
#' Shortcut for \code{rnmf(x, as.matrix(target))}.
setMethod('rnmf', signature(x='ANY', target='data.frame'),
	function(x, target, ...){
		rnmf(x, as.matrix(target), ...)
	}
)
#' Generates a random NMF model of the same dimension as another NMF model.
#' 
#' It is a shortcut for \code{rnmf(x, nrow(x), ncol(x), ...)}, which returns
#' a random NMF model of the same class and dimensions as \code{x}.
#' 
#' @examples
#' ## random NMF from another model
#' 
#' a <- nmfModel(3, 100, 20)
#' b <- rnmf(a)
#' \dontshow{ stopifnot( !nmf.equal(a,b) ) }
#' 
setMethod('rnmf', signature(x='NMF', target='missing'), 
	function(x, target, ...){
		rnmf(x, c(nrow(x),ncol(x)), ...)
	}
)
#' Generates a random NMF model of a given rank, with known basis and/or 
#' coefficient matrices.  
#'
#' This methods allow to easily generate partially random NMF model, where one 
#' or both factors are known.
#' Although the later case might seems strange, it makes sense for NMF models that 
#' have fit extra data, other than the basis and coefficient matrices, that 
#' are drawn by an \code{rnmf} method defined for their own class, which should 
#' internally call \code{rnmf,NMF,numeric} and let it draw the basis and 
#' coefficient matrices.
#' (e.g. see \code{\linkS4class{NMFOffset}} and \code{\link{rnmf,NMFOffset,numeric-method}}).  
#' 
#' Depending on whether arguments \code{W} and/or \code{H} are missing, 
#' this method interprets \code{x} differently:
#' \itemize{
#' 
#' \item \code{W} provided, \code{H} missing: \code{x} is taken as the number of 
#' columns that must be drawn to build a random coefficient matrix 
#' (i.e. the number of columns in the target matrix).
#' 
#' \item \code{W} is missing, \code{H} is provided: \code{x} is taken as the number of 
#' rows that must be drawn to build a random basis matrix 
#' (i.e. the number of rows in the target matrix).
#' 
#' \item both \code{W} and \code{H} are provided: \code{x} is taken as the target 
#' rank of the model to generate.

#' \item Having both \code{W} and \code{H} missing produces an error, as the 
#' dimension of the model cannot be determined in this case. 
#' }
#'
#' The matrices \code{W} and \code{H} are reduced if necessary and possible 
#' to be consistent with this value of the rank, by the internal call to 
#' \code{\link{nmfModel}}.
#'  
#' All arguments in \code{...} are passed to the function \code{\link{nmfModel}} 
#' which is used to build an initial NMF model, that is in turn passed to 
#' \code{rnmf,NMF,numeric} with \code{dist=list(coef=dist)} or 
#' \code{dist=list(basis=dist)} when suitable.
#' The type of NMF model to generate can therefore be specified in argument 
#' \code{model} (see \code{\link{nmfModel}} for other possible arguments). 
#' 
#' The returned NMF model, has a basis matrix equal to \code{W} (if not missing) 
#' and a coefficient matrix equal to \code{H} (if not missing), or drawn 
#' according to the specification provided in argument \code{dist} 
#' (see method \code{rnmf,NMF,numeric} for details on the supported values for \code{dist}).
#' 
#' @examples
#' # random NMF model with known basis matrix
#' x <- rnmf(5, W=matrix(1:18, 6)) # 6 x 5 model with rank=3
#' basis(x) # fixed 
#' coef(x) # random
#' 
#' # random NMF model with known coefficient matrix
#' x <- rnmf(5, H=matrix(1:18, 3)) # 5 x 6 model with rank=3 
#' basis(x) # random
#' coef(x) # fixed
#' 
#' # random model other than standard NMF
#' x <- rnmf(5, H=matrix(1:18, 3), model='NMFOffset')
#' basis(x) # random
#' coef(x) # fixed
#' offset(x) # random
#' 
setMethod('rnmf', signature(x='numeric', target='missing'),
	function(x, target, ..., W, H, dist=runif){
		
		# get fixed matrices to restore on exit:
		# one must enforce honouring the fixed matrices to prevent the call to 
		# rnmf from a sub-class method to change them.  
		of <- .rnmf_fixed()
		on.exit( .rnmf_fixed(of) )
		
		if( !missing(W) && missing(H) ){ # fixed basis matrix: x = n samples
			# one must not change the values H
			.rnmf_fixed('basis')
			x <- nmfModel(ncol(W), nrow(W), x, W=W, ...)
			dist <- list(coef=dist)
		}else if( missing(W) && !missing(H) ){ # fixed coef matrix: x = n features
			# one must not change the values H
			.rnmf_fixed('coef')
			x <- nmfModel(nrow(H), x, ncol(H), H=H, ...)
			dist <- list(basis=dist)
		}else if( !missing(W) && !missing(H) ){ # fixed basis and coef: x = rank
			# one must not change the values of W and H
			.rnmf_fixed(c('basis', 'coef'))
			x <- nmfModel(x, nrow(W), ncol(H), W=W, H=H, ...)
		}else
			stop("NMF::rnmf - Missing both arguments `W` and/or `H`: at least one of them must be specified.")
		
		rnmf(x, dist=dist) 
	}
)
#' Generates a random NMF model with known basis and coefficient matrices.
#' 
#' This method is a shortcut for calling \code{rnmf,numeric,missing} with a 
#' suitable value for \code{x} (the rank), when both factors are known:
#' code{rnmf(min(ncol(W), nrow(H)), ..., W=W, H=H)}.
#' 
#' Arguments \code{W} and \code{H} are required.
#' Note that calling this method only makes sense for NMF models that contains 
#' data to fit other than the basis and coefficient matrices, 
#' e.g. \code{\linkS4class{NMFOffset}}. 
#' 
#' @examples
#' 
#' # random model other than standard NMF
#' x <- rnmf(W=matrix(1:18, 6), H=matrix(21:38, 3), model='NMFOffset')
#' basis(x) # fixed
#' coef(x) # fixed
#' offset(x) # random
#' 
setMethod('rnmf', signature(x='missing', target='missing'),
	function(x, target, ..., W, H){
		rnmf(min(ncol(W), nrow(H)), ..., W=W, H=H)
	}
)

#' Generates a random standard NMF model of given dimensions.
#' 
#' This is a shortcut for \code{rnmf(nmfModel(x, target, ncol, ...)), dist=dist)}.
#' It generates a standard NMF model compatible with the dimensions passed in 
#' \code{target}, that can be a single or 2-length numeric vector, to specify 
#' a square or rectangular target matrix respectively. 
#' See \code{\link{nmfModel}}.
#' 
#' @inheritParams nmfModel,numeric,numeric-method
#' 
#' @examples
#' 
#' ## random standard NMF of given dimensions
#' 
#' # generate a random NMF model with rank 3 that fits a 100x20 matrix  
#' rnmf(3, 100, 20)
#' \dontshow{ stopifnot( identical(dim(rnmf(3, 100, 20)), c(100L,20L,3L)) ) }
#' # generate a random NMF model with rank 3 that fits a 100x100 matrix
#' rnmf(3, 100)
#' \dontshow{ stopifnot( identical(dim(rnmf(3, 100)), c(100L,100L,3L)) ) }
#' 
setMethod('rnmf', signature(x='numeric', target='numeric'), 
		function(x, target, ncol=NULL, ..., dist=runif){		
			rnmf(nmfModel(x, target, ncol, ...), dist=dist)
		}
)
#' Generate a random formula-based NMF model, using the method 
#' \code{\link{nmfModel,formula,ANY-method}}.
setMethod('rnmf', signature(x='formula', target='ANY'), 
	function(x, target, ..., dist=runif){
		# missing target is NULL
		if( missing(target) ) target <- NULL
		rnmf(nmfModel(x, target, ...), dist=dist)
	}
)