File: fff_GMM.h

package info (click to toggle)
nipy 0.1.2%2B20100526-2
  • links: PTS, VCS
  • area: main
  • in suites: squeeze
  • size: 11,992 kB
  • ctags: 13,434
  • sloc: python: 47,720; ansic: 41,334; makefile: 197
file content (386 lines) | stat: -rw-r--r-- 14,505 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
/*!
  \file fff_GMM.h
  \brief Gaussian Mixture Models
  \author Bertrand Thirion
  \date 2004-2006

  This library implements different kinds of GMM estimation techniques

  

  */

#ifndef fff_GMM
#define fff_GMM

#ifdef __cplusplus
extern "C" {
#endif

  
#include "fff_graphlib.h"

  typedef struct fff_GMM_{
	
    long k;            /* number of components */
    long dim;          /* feature dimension */
	int prec_type;     /* type of teh precision */
	fff_matrix * means;
	fff_matrix * precisions;
	fff_vector * weights;
    
  } fff_GMM_;



  typedef struct fff_Bayesian_GMM{
	
    long k;            /* number of components */
    long dim;          /* feature dimension */
	
	/* it is assumed that 
	   1) the covariance is diagonal, so that 
	   precision matrices and centroid matrices do have the same size:
	   (k, dim), 
	   where k is the number of components in the mixture
	   and dim is the number of dimensions of the data
	   All the matrices in the model have this size.
	   All the vectors have length k.
	   2)Conjugate normal-Wishart prior are used 
	*/

	/* priors */
	fff_matrix * prior_means;
	fff_vector * prior_means_scale;
	fff_matrix * prior_precisions;
	fff_vector * prior_dof;
	fff_vector * prior_weights;

    /* current estimates */
	fff_vector * means_scale;
	fff_matrix * means;
	fff_matrix * precisions;
	fff_vector * dof;
	fff_vector * weights;
    
  } fff_Bayesian_GMM;

  /*! 
    \brief Constructor for the GMM structure 
    \param k : number of components
    \param dim : the number of dimensions
	\param prec_type: (int) the parameterization of the model
	
	if prec_type==0 the model is assumed to be Gaussian,
	heteroscedesatic with full covariance.
	if prec_type==1 the model is assumed to be Gaussian,
	heteroscedesatic and diagonal.
	if prec_type==2 the model is assumed to be Gaussian,
	homoscedastic, diagonal and with equal weights.
  */
  extern fff_GMM_* fff_GMM_new( const long k, const long dim,const int prec_type );
  
  /*! 
    \brief Destructor for the GMM structure
    \param thisone the GMM to be deleted
  */
  extern int fff_GMM_delete( fff_GMM_* thisone );
  
     /*!
    \brief Labelling checking algorithm 
    \param Label vector
    \param k number of classes

    This algorithm quickly checks that for each i in [0,..,k-1]
    there exists an integer j so that Label[j] = i.
    it returns 1 if yes, 0 if no
  */
  extern int fff_clustering_OntoLabel(const fff_array * Label, const long k);

  /*!
    \brief GMM algorithm with selection of the number of clusters
    \param X data matrix 
    \param Centers Cluster centers
    \param Precision Cluster precisions
    \param Weights Mixture Weights
    \param Label data labels
    \param nbclust number of clusters for which a mixture is searched. 
    \param maxiter maximum number of iterations
    \param delta small constant for control of convergence

    This function performs different initializations of the GMM clustering
    algorithm with nbclust mixtures, and keeps the best one according
    to a BIC criterion. 

    The optimal number of clusters is returned.  It is assumed that Centers,
    Precision, and Weights have been allocated for max(nbclust)
    clusters.
  */
  int fff_clustering_gmm_select( fff_matrix* Centers, fff_matrix* Precision,  fff_vector *Weights, fff_array *Label, const fff_matrix* X, const fff_vector *nbclust, const int maxiter, const double delta);
  /*!
    \brief GMM algorithm
    \param X data matrix 
    \param Centers Cluster centers
    \param Precision Cluster precisions
    \param Weights Mixture Weights
    \param Label data labels
    \param maxiter maximum number of iterations
    \param delta small constant for control of convergence
    \param ninit number of initializations

    This function performs ninit initializations of the GMM clustering
    algorithm, and keeps the best one. The average log-likelihood is
    returned.
    
  */
  extern double fff_clustering_gmm_ninit( fff_matrix* Centers, fff_matrix* Precision,  fff_vector *Weights, fff_array *Label, const fff_matrix* X, const int maxiter, const double delta, const int ninit );
 /*!
    \brief GMM algorithm
    \param X data matrix 
    \param Centers Cluster centers
    \param Precision Cluster precisions
    \param Weights Mixture Weights
    \param Label data labels
    \param maxiter maximum number of iterations
    \param delta small constant for control of convergence
    \param chunksize the number of features on which gmm is performed
	\param verbose verbosity mode

    This algorithm performs a GMM of the data X.  Note that the data
    and cluster matrices should be dimensioned as (nb items * feature
    dimension) and (nb clusters * feature dimension).
    The precision is written either in the form 
    - (nb clusters * sqf), where sqf is the square of the feature dimension.  
    - (nb clusters * feature dimension), then it is assumed that it codes for a
    diagonal precision matrix, and an algorithm with diagonal
    covariance is implemented. 
    - (1* sqf) then the Gaussians are assumed to be homoscedastic. Only
    one diagonal variance is computed  for all clusters. the weights of 
    the mixture are set (1/nb clusters) 
    
    chunksize has to be chosen in the interval [nb clusters, nb features]
    chunksize = nb features is a standard choice, 
    but lowering chunksize will profit to  computation time.
    it is advisable to have  chunksize<10^6
    
    The metric used in the algorithm is Euclidian.
    The returned Label vector is a hard membership function 
    computed after convergence.
    The normlized log-likelihood is returned.
  */
  extern double fff_clustering_gmm( fff_matrix* Centers, fff_matrix* Precision,  fff_vector *Weights, fff_array *Label, const fff_matrix* X, const int maxiter, const double delta, const int chunksize, const int verbose );
 

  /*!
    \brief Evaluation of a GMM algorithm on a dataset
    \param X data matrix 
    \param Centers Cluster centers
    \param Precision Cluster precisions
    \param Weights Mixture Weights
    \param L average log-likelihood

    This algorithm computes the average log-likelihood of a GMM on an
    empirical dataset. This number is returned.
    The algorithm adapts to the size of the precision matrix
    - (k*(f*f)) precision is a full f*f matrix for all clusters
    - (k*f) precision diagonal
    - (1*f) precision diagonal and equal for all clusters
  */
  extern double fff_gmm_mean_eval(double* L, const fff_matrix* X, const fff_matrix* Centers, const fff_matrix* Precision, const fff_vector* Weights);

/*!
    \brief Relaxation of a previously estimated GMM on a dataset
    \param X data matrix 
    \param Centers Cluster centers
    \param Precision Cluster precisions
    \param Weights Mixture Weights
    \param LogLike  log-likelihood of each data
	\param Labels final labelling

    This algorithm computes the average log-likelihood of a GMM on an
    empirical dataset. This number is returned.
    The algorithm adapts to the size of the precision matrix
    - (k*(f*f)) precision is a full f*f matrix for all clusters
    - (k*f) precision diagonal
    - (1*f) precision diagonal and equal for all clusters
  */
  extern int fff_gmm_relax( fff_vector* LogLike, fff_array* Labels, fff_matrix* Centers, fff_matrix* Precision, fff_vector* Weights, const fff_matrix* X, const int maxiter, const double delta);

  /*!
    \brief Evaluation of a GMM on a dataset and labelling of the dataset X
    \param X data matrix 
    \param Centers Cluster centers
    \param Precision Cluster precisions
    \param Weights Mixture Weights
    \param LogLike  log-likelihood of each data
    \param Labels final labelling

    This algorithm computes the average log-likelihood of a GMM on an
    empirical dataset. This number is returned.
    The algorithm adapts to the size of the precision matrix
    - (k*(f*f)) precision is a full f*f matrix for all clusters
    - (k*f) precision diagonal
    - (1*f) precision diagonal and equal for all clusters
  */
  extern int fff_gmm_partition(fff_vector* LogLike, fff_array* Labels, const fff_matrix* X, const fff_matrix* Centers, const fff_matrix* Precision, const fff_vector* Weights);

    /*!
    \brief representation of GMM membership with a graph structure
	\param G Weighted Graph
	\param X data-defining matrix 
    \param Centers Cluster centers
    \param Precision Cluster precisions
    \param Weights Mixture Weights

    This algorithm computes the probabilistic membership values 
	of the data X with respect to the GMM model and stores this 
	-presumably sparse- information in a graph structure.
	If G->E = 0, nothing is done with graph
	Otherwise, the  memebership is coded in graph
	The number of edges is retuned.
	In practice, it is thus advised to call it once with an empty graph
	to get the number of edges, then 
	to allocate the graph structure and recall the function.
  */
  extern int fff_gmm_membership(fff_graph* G, const fff_matrix* X, const fff_matrix* Centers, const fff_matrix* Precision, const fff_vector* Weights);


   /*!
    \brief Shifting the points in X toward points of larger likelihood
	\param X moving points
    \param Centers Cluster centers
    \param Precision Cluster precisions
    \param Weights Mixture Weights

  */
  extern int fff_gmm_shift(fff_matrix* X, const fff_matrix* Centers, const fff_matrix* Precision, const fff_vector* Weights);


/*!
    \brief Implementation of a Byaesian GMM model based on the Variational Bayes Model.
    \param BGMM  the structure that contains the Bayesian GMM
	\param Label final data labelling
	\param X data matrix 
	\param maxiter Maximal number of iterations
    \param delta minimal relative increment of the likelihood to declare convergence
	The Model comes from Penny, 2001 (research report).

	Given a dataset X, and prior about clsuter centers and precision
    the dunction computes a GMM model (Centers,Precision,Weights)

*/
  extern double fff_VB_gmm_estimate(fff_Bayesian_GMM* BGMM,  fff_array *Label, const fff_matrix* X, const int maxiter, const double delta);



   /*! 
    \brief Constructor for the Bayesian GMM structure 
    \param k : number of components
    \param dim : the number of dimensions
  */
  extern fff_Bayesian_GMM* fff_BGMM_new( const long k, const long dim );
  
  /*! 
    \brief Destructor for the Bayesian GMM structure
    \param thisone the BGMM to be deleted
  */
  extern int fff_BGMM_delete( fff_Bayesian_GMM* thisone );
  
  /*
	\brief instantiation of the prior of the Bayesian GMM
	\param BG the Bayesian GMM to be instantiated
	\param prior_means the prior of the means
	\param prior_means_scale scaling factor on the precision of the means
	\param prior_precision prior precision in the wishart model
	\param prior_dof number of degrees of freedom in the Wishart model
	\param prior_weight prior on the weights in the Dirichlet model
	
	Note that all the parameters are copied
   */
  extern int fff_BGMM_set_priors(fff_Bayesian_GMM* BG, const fff_matrix * prior_means, const fff_vector *prior_means_scale, const fff_matrix * prior_precision, const fff_vector* prior_dof, const fff_vector *prior_weights );

   /*
	\brief Estimation of the BGMM using a Gibbs sampling technique
	\param membership the average membership variable across iterations
	\param BG the BGMM to be estimated
	\param X the data used in the estimation of the model
	\param niter the number of iterations in the sampling
	\param method choice of a quick(purely normal) technique

	This function performs the estimation of the model.
	The membership (hidden) variable is randomly resampled at each step,
	while the parameters of the model are recomputed analytically
	for the sake of speed.
	The number of itertaions is niter for bith the burun-in 
	and the estimation periods
	The average data memebership across iterations is given in membership.
	The BGMM is re-instantiated with the average parameters 
	during the estimation period.
   */

  extern int fff_BGMM_Gibbs_estimation(fff_matrix* membership, fff_Bayesian_GMM* BG, const fff_matrix *X, const int niter, const int method);

    
   /*
	\brief Sampling of the BGMM using on a grid 
	\param the average density on the given grid 
	\param BG the BGMM to be estimated
	\param X the data used in the estimation of the model
	\param grid the grid used in the estimation of the model
	\param niter the number of iterations in the sampling
	\param method choice of a quick(purely normal) technique

   */
  extern int fff_BGMM_Gibbs_sampling(fff_vector* density, fff_Bayesian_GMM* BG, const fff_matrix *X, const fff_matrix *grid, const int niter, const int method);


  /*
	\brief Reading out the BGMM structure
	\param means current estimate of the cluster centers
	\param means_scale  current precision on the means matrices
	\param precisions current precision estimate
	\param dof current dif of the Wisharet model
	\param weights current weights of the components 
	\param BG BGMM model.

	Note that all the matrices are copied.
   */
  extern int fff_BGMM_get_model( fff_matrix * means, fff_vector * means_scale, fff_matrix * precisions, fff_vector* dof, fff_vector * weights, const fff_Bayesian_GMM* BG);

  /*
	\brief Writing the BGMM structure	
	\param BG BGMM model.
	\param means current estimate of the cluster centers
	\param means_scale  current precision on the means matrices
	\param precisions current precision estimate
	\param dof current dif of the Wisharet model
	\param weights current weights of the components 

	Note that all the matrices are copied.
   */
  extern int fff_BGMM_set_model( fff_Bayesian_GMM* BG, const fff_matrix * means, const fff_vector * means_scale, const fff_matrix * precisions, const fff_vector* dof, const fff_vector * weights );


  /*!	
	\brief Sampling of a Bayesian GMM model based on the Variational Bayes.
	\param density the posterior
    \param BGMM  the structure that contains the Bayesian GMM
	\param grid the smapling grid for the posterior

*/  
  extern int fff_BGMM_sampling(fff_vector * density, fff_Bayesian_GMM* BG, const fff_matrix *grid);

/*!	
	\brief Sampling of a Bayesian GMM model 
	\param density the posterior as a (nbitem,BG->k) matrix, where nbitem X->size1
	\param X the smapling grid for the posterior
	\param BGMM  the structure that contains the Bayesian GMM

*/  
  extern double fff_WNpval(fff_matrix * proba, const fff_matrix *X, const fff_Bayesian_GMM* BG);

#ifdef __cplusplus
}
#endif

#endif