File: em.h

package info (click to toggle)
phast 1.5%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 13,008 kB
  • sloc: ansic: 54,195; makefile: 358; sh: 337; perl: 321
file content (55 lines) | stat: -rw-r--r-- 2,690 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
/***************************************************************************
 * PHAST: PHylogenetic Analysis with Space/Time models
 * Copyright (c) 2002-2005 University of California, 2006-2010 Cornell 
 * University.  All rights reserved.
 *
 * This source code is distributed under a BSD-style license.  See the
 * file LICENSE.txt for details.
 ***************************************************************************/

/** @file em.h
    Function for training an phylo-HMM via EM including its models.
    @ingroup hmm
 */

#ifndef EM_H
#define EM_H

#include <hmm.h>
#include <tree_model.h>

//#define EM_CONVERGENCE_THRESHOLD 0.01
#define EM_CONVERGENCE_THRESHOLD 0.1 /* TEMPORARY! */

/** Train a Hidden Markov Model by using EM algorithm.
    @param hmm Hidden Markov Model to train
    @param models ???
    @param data Training data (the ith training sample in data must be of length 'sample_lens[i]' )
    @param nsamples Number of samples
    @param sample_lens Lengths of each data sample
    @param pseudocounts Pseudo counts of ???
    @param compute_emissions (Optional) Function to compute emissions. If NULL simply not called
    @param estimate_state_models (Optional) Function to estimate state models. If NULL fully general parameterization is assumed
    @param estimate_transitions (Optional) Function to estimate transitional probabilities (M step). If NULL fully general is assumed
    @param get_observation_index Function to get observation index
    @param log_function Function to use for logging statistics
    @param emissions_alloc (Optional) Used for emission probabilities (must be large enough for longest sample)
    @param logf Log to save statistics to
    @result Log likelihood of optimized model
    @note HMM and models must be initialized appropriately
    @note Must be one model for every state in the HMM
    @note If sample size is 1, emissions can be pre-computed
    @warning This function is experimental
*/
double hmm_train_by_em(HMM *hmm, void *models, void *data, int nsamples, 
                       int *sample_lens, Matrix *pseudocounts, 
                       void (*compute_emissions)(double**, void**, int, void*, 
                                                 int, int), 
                       void (*estimate_state_models)(TreeModel**, int, void*, 
                                                     double**, int, FILE*),
                       void (*estimate_transitions)(HMM*, void*, double**),
                       int (*get_observation_index)(void*, int, int),
                       void (*log_function)(FILE*, double, HMM*, void*, int),
                       double **emissions_alloc, FILE *logf);

#endif