1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388
|
#ifndef NMF_EUCLIDEAN_H // include header only once
#define NMF_EUCLIDEAN_H
#include <R.h>
#include <Rdefines.h>
#include <R_ext/Error.h>
extern "C" {
// NMF from Lee and Seung (based on Euclidean norm)
SEXP euclidean_update_H ( SEXP v, SEXP w, SEXP h, SEXP eps, SEXP nbterms, SEXP ncterms, SEXP dup);
SEXP euclidean_update_W ( SEXP v, SEXP w, SEXP h, SEXP eps, SEXP weight, SEXP nbterms, SEXP ncterms, SEXP dup);
// NMF with offset
SEXP offset_euclidean_update_H ( SEXP v, SEXP w, SEXP h, SEXP offset, SEXP eps, SEXP dup);
SEXP offset_euclidean_update_W ( SEXP v, SEXP w, SEXP h, SEXP offset, SEXP eps, SEXP dup);
}
//////////////////////////////////
// STANDARD NMF: LEE
//////////////////////////////////
// include version for both double/integer storage.mode (defined as templates)
#include "euclidean.cpp"
// define the exported versions (for SEXP)
SEXP euclidean_update_H ( SEXP v, SEXP w, SEXP h, SEXP eps
, SEXP nbterms=ScalarInteger(0), SEXP ncterms=ScalarInteger(0)
, SEXP dup=ScalarLogical(1)){
if( TYPEOF(v) == REALSXP ){
return euclidean_update_H(NUMERIC_POINTER(v), w, h, eps
, *INTEGER(nbterms), *INTEGER(ncterms)
, *LOGICAL(dup));
}else{
return euclidean_update_H(INTEGER_POINTER(v), w, h, eps
, *INTEGER(nbterms), *INTEGER(ncterms)
, *LOGICAL(dup));
}
}
//////////////////////////////////
// NMF WITH WEIGHT
//////////////////////////////////
#define NMF_WITH_WEIGHT
#include "euclidean.cpp"
#undef NMF_WITH_WEIGHT
SEXP euclidean_update_W ( SEXP v, SEXP w, SEXP h, SEXP eps
, SEXP weight = R_NilValue
, SEXP nbterms=ScalarInteger(0), SEXP ncterms=ScalarInteger(0)
, SEXP dup=ScalarLogical(1)){
int nb = *INTEGER(nbterms), nc = *INTEGER(ncterms);
bool copy = *LOGICAL(dup);
if( TYPEOF(v) == REALSXP ){
if( isNull(weight) ){
return euclidean_update_W(NUMERIC_POINTER(v), w, h, eps, nb, nc, copy);
}else{
return weuclidean_update_W(NUMERIC_POINTER(v), w, h, eps, weight, nb, nc, copy);
}
}else{
if( isNull(weight) ){
return euclidean_update_W(INTEGER_POINTER(v), w, h, eps, nb, nc, copy);
}else{
return weuclidean_update_W(INTEGER_POINTER(v), w, h, eps, weight, nb, nc, copy);
}
}
}
//////////////////////////////////
// NMF WITH OFFSET
//////////////////////////////////
#define NMF_WITH_OFFSET
// include version for both double/integer storage.mode (defined as templates)
#include "euclidean.cpp"
#undef NMF_WITH_OFFSET
// define the exported versions (for SEXP)
SEXP offset_euclidean_update_H ( SEXP v, SEXP w, SEXP h, SEXP offset, SEXP eps, SEXP dup=ScalarLogical(1)){
if( TYPEOF(v) == REALSXP )
return offset_euclidean_update_H(NUMERIC_POINTER(v), w, h, offset, eps, *LOGICAL(dup));
else
return offset_euclidean_update_H(INTEGER_POINTER(v), w, h, offset, eps, *LOGICAL(dup));
}
SEXP offset_euclidean_update_W ( SEXP v, SEXP w, SEXP h, SEXP offset, SEXP eps, SEXP dup=ScalarLogical(1)){
if( TYPEOF(v) == REALSXP )
return offset_euclidean_update_W(NUMERIC_POINTER(v), w, h, offset, eps, *LOGICAL(dup));
else
return offset_euclidean_update_W(INTEGER_POINTER(v), w, h, offset, eps, *LOGICAL(dup));
}
#define NMF_EUCLIDEAN_DONE
#else // END OF NMF_EUCLIDEAN_H
#ifndef NMF_EUCLIDEAN_DONE // START DEFINITION OF FUNCTIONS
/**
* Euclidean norm based multiplicative update for the mixture coefficients matrix H
* from Lee and Seung.
* Also used in the NMF with Offset algorithm
*
* Note: for performance reason the dimension names are NOT conserved.
*/
#ifndef NMF_WITH_WEIGHT
template <typename T_Rnumeric>
static
#ifdef NMF_WITH_OFFSET
SEXP offset_euclidean_update_H (
#else
SEXP euclidean_update_H (
#endif
T_Rnumeric* pV, SEXP w, SEXP h
#ifdef NMF_WITH_OFFSET
, SEXP offset
#endif
, SEXP eps
#ifndef NMF_WITH_OFFSET
, int nbterms=0, int ncterms=0
#endif
, int dup=1)
{
SEXP res;
int nprotect = 0;
double eps_val = *NUMERIC_POINTER(eps);
// retrieve dimensions
int n = INTEGER(GET_DIM(w))[0];
int r = INTEGER(GET_DIM(w))[1];
int p = INTEGER(GET_DIM(h))[1];
// get number of non-fixed terms
int vr =
#ifdef NMF_WITH_OFFSET
r;
#else
r - ncterms;
#endif
// duplicate H (keeping attributes)
PROTECT( res = (dup != 0 ? duplicate(h) : h) ); nprotect++;
// define internal pointers
double* pW = NUMERIC_POINTER(w);
double* pH = NUMERIC_POINTER(h);
double* p_res = NUMERIC_POINTER(res);
double* pH_buffer = (double*) R_alloc(r, sizeof(double));
// extra variables in the case of an optional offset
#ifdef NMF_WITH_OFFSET
double *pOffset = NULL, *den_addon = NULL;
if( offset != R_NilValue ){
pOffset = NUMERIC_POINTER(offset);
den_addon = (double*) R_alloc(r, sizeof(double));
//memset(den_addon, 0, r);
}
#endif
// auxiliary temporary variable
double temp = 0.0;
// Pre-compute symmetric matrix t(W)W
// -> allocate internal memory as a upper triangular in column major
double* p_tWW = (double*) R_alloc((int) (r*(r+1))/2, sizeof(double));
double* p_row = NULL;
for( int i=r-1; i>=0; --i){
p_row = pW + i*n;
#ifdef NMF_WITH_OFFSET
den_addon[i] = 0.0;
#endif
for( int j=r-1; j>=0; --j){
temp = 0.0;
for( int u=n-1; u>=0; --u){
temp += p_row[u] * pW[u + j*n];
#ifdef NMF_WITH_OFFSET
if( pOffset != NULL && j==0 )
den_addon[i] += p_row[u] * pOffset[u];
#endif
}
p_tWW[((j+1)*j)/2 + i] = temp;
}
}
// H_au = H_au (W^T V)_au / (W^T W H)_au
// Compute update of H column by column
for (int j=p-1; j>=0; --j){
for(int i=vr-1; i>=0; --i){ // compute value for H_ij (only non-fixed entries)
// numerator
double numerator = 0.0;
for( int u=n-1; u>=0; --u)
numerator += pW[u + i*n] * pV[u + j*n];
double den = 0.0;
for( int l=r-1; l>=0; --l){ // use all entries (fixed and non-fixed)
// bufferize jth-column of H, as it can be changed at the end of the current i-loop
if( i==vr-1 )
pH_buffer[l] = pH[l + j*r];
den += p_tWW[i > l ? ((i+1)*i)/2 + l : ((l+1)*l)/2 + i] * pH_buffer[l];
}
// add offset addon if necessary
#ifdef NMF_WITH_OFFSET
if( pOffset != NULL )
den += den_addon[i];
#endif
// multiplicative update
p_res[i + j*r] = ((temp = pH_buffer[i] * numerator) > eps_val ? temp : eps_val) / (den + eps_val);
}
}
// return result
UNPROTECT(nprotect);
return res;
}
#endif
/**
* Euclidean norm based multiplicative update for the basis matrix W
* from Lee and Seung.
* Also used in the NMF with Offset algorithm
*
* Note: for performance reason the dimension names are NOT conserved.
*/
template <typename T_Rnumeric>
static SEXP
#ifdef NMF_WITH_OFFSET
offset_euclidean_update_W
#else
#ifdef NMF_WITH_WEIGHT
weuclidean_update_W
#else
euclidean_update_W
#endif
#endif
(T_Rnumeric* pV, SEXP w, SEXP h
#ifdef NMF_WITH_OFFSET
, SEXP offset
#endif
, SEXP eps
#ifdef NMF_WITH_WEIGHT
, SEXP weight
#endif
#ifndef NMF_WITH_OFFSET
, int nbterms=0, int ncterms=0
#endif
, int dup=1)
{
SEXP res;
int nprotect = 0;
// setup variables for enforcing a limit Inf on the entries
double limInf = *NUMERIC_POINTER(eps);
// retrieve dimensions
int n = INTEGER(GET_DIM(w))[0];
int r = INTEGER(GET_DIM(w))[1];
int p = INTEGER(GET_DIM(h))[1];
// duplicate H (keeping attributes)
//PROTECT(res = duplicate(w)); nprotect++;
PROTECT(res = (dup != 0 ? duplicate(w) : w) ); nprotect++;
// define internal pointers to data
double* pW = NUMERIC_POINTER(w);
double* pH = NUMERIC_POINTER(h);
double* p_res = NUMERIC_POINTER(res);
double* pW_buffer = (double*) R_alloc(r, sizeof(double));
// extra variables in the case of an optional offset
#ifdef NMF_WITH_OFFSET
double *pOffset = NULL, *rowSumsH = NULL;
if( offset != R_NilValue ){
pOffset = NUMERIC_POINTER(offset);
// pre-compute the row sums of H
rowSumsH = (double*) R_alloc(r, sizeof(double));
for( int i=r-1; i>=0; --i){
rowSumsH[i] = 0.0;
for( int j=p-1; j>=0; --j){
rowSumsH[i] += pH[i + j*r];
}
}
}
#endif
#ifdef NMF_WITH_WEIGHT
// take sample weights into account
double* p_weight = !isNull(weight) ? NUMERIC_POINTER(weight) : NULL;
double beta = -1.0;
if( p_weight == NULL ){// <=> no weights
beta = 1.0;
}
else if( length(weight) == 1 ){// all weighted are the same
// NB: theoretically this is equivalent to weight=1, but may be used
// to test it in practice (with the numerical adjustments via eps)
beta = *p_weight;
}
// fill weight vector with single value
if( beta > 0 ){
double* pw = p_weight = (double*) R_alloc(p, sizeof(double));
for(int i=0; i<p; ++i, ++pw){
*pw = beta;
}
}
#endif
// auxiliary temporary variable
double temp = 0.0;
// Pre-compute symetric matrix Ht(H)
// -> allocate internal memory as a lower triangular in column major
double* p_HtH = (double*) R_alloc((int) (r*(r+1))/2, sizeof(double));
for( int j=r-1; j>=0; --j){
for( int i=j; i<r; ++i){
temp = 0.0;
for( int u=p-1; u>=0; --u){
temp += pH[j + u*r] * pH[i + u*r]
#ifdef NMF_WITH_WEIGHT
* p_weight[u]
#endif
;
}
p_HtH[((i+1)*i)/2 + j] = temp;
}
}
// W_ia = W_ia (V H^T)_ia / (W H H^T)_ia and columns are rescaled after each iteration
// Compute update of W row by row
double numerator = 0.0;
double den = 0.0;
for(int i=n-1; i>=0; --i){
for (int j=r-1; j>=0; --j){// compute value for W_ij
// numerator
numerator = 0.0;
for( int u=p-1; u>=0; --u){
numerator += pV[i + u*n] * pH[j + u*r]
#ifdef NMF_WITH_WEIGHT
* p_weight[u]
#endif
;
}
den = 0.0;
for( int l=r-1; l>=0; --l){
// bufferize ith-row of W, as it can be changed at the end of the current j-loop
if( j==r-1 )
pW_buffer[l] = pW[i + l*n];
// compute index of the stored value of t(w)w_[iH,l] in column major
den += pW_buffer[l] * p_HtH[l < j ? ((j+1)*j)/2 + l : ((l+1)*l)/2 + j];
}
// add offset addon if necessary
#ifdef NMF_WITH_OFFSET
if( pOffset != NULL )
den += pOffset[i] * rowSumsH[j];
#endif
// multiplicative update
temp = pW_buffer[j] * numerator;
p_res[i + j*n] = ( temp < limInf ? limInf : temp ) / (den + limInf);
}
}
// return result
UNPROTECT(nprotect);
return res;
}
#endif //END ifndef NMF_EUCLIDEAN_DONE
#endif //END ifdef NMF_EUCLIDEAN_H
|