File: pbs_code.h

package info (click to toggle)
phast 1.4%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 12,412 kB
  • sloc: ansic: 54,180; makefile: 354; sh: 337; perl: 321
file content (66 lines) | stat: -rw-r--r-- 2,476 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
/** discrete encodings of probabilistic biological sequences */

#ifndef PBS_CODE
#define PBS_CODE

#include <simplex_grid.h>
#include <lists.h>
#include <stringsplus.h>

#define MAX_NBYTES 4
/* needs to be <= sizeof(int) */

typedef struct {
  SimplexGrid *sg;		/** Simplex grid */
  Vector **rp;		        /** array of representative points, by code
				    index (array has code_size elements) */
  List **codes_by_region;	/** code indices by simplex region;
				    codes_by_region[i] is a list of
				    the code indices associated with
				    simplex region i (array has
				    sg->nregs elements) */
  unsigned nbytes;		/** bytes of storage per encoded
				    "letter"; between 1 and MAX_NBYTES */
  unsigned max_size;		/** maximum code size: 256^nbytes - 1  (because of
				    reserved code for gaps) */
  unsigned code_size;	        /** actual code size; at most
				    max_size */
  unsigned gap_code;		/** reserved code for gap characters */
} PbsCode;

/* package of data used in code estimation */
typedef struct {
  PbsCode *code;		/** code being estimated */
  List *prob_vectors;		/** raw training data: probability vectors */
  List *counts;			/** raw training data: counts  */
  List **vectors_by_region;	/** element i is list of vectors for
				    simplex region i */
  List **counts_by_region;	/** element i is list of counts for
				    simplex region i */
  List **vectors_by_code;	/** element i is list of vectors for
				    code index i */
  List **counts_by_code;  	/** element i is list of counts for
				    code index i */
  double *error_by_region;	/** element i is contribution of
				    region i to total approximation
				    error */
  double *error_by_code;	/** element i is contribution of code
				    index i to total approximation
				    error */
} PbsCodeTrainingData;

typedef enum {FULL, NO_GREEDY, NO_TRAIN} training_mode;

PbsCode *pbs_new(int dim, int nrows, int nbytes);
PbsCode *pbs_new_shell(int dim, int nrows, int nbytes, int code_size);
void pbs_free(PbsCode *code);
PbsCode *pbs_new_from_file(FILE *F);
void pbs_write(PbsCode *c, FILE *F, char *comment);
void pbs_assign_points(PbsCode *c);
unsigned pbs_get_index(PbsCode *code, Vector *p, double *error);
double pbs_estimate_from_data(PbsCode *code, List *prob_vectors, List *counts,
			      FILE *logf, training_mode mode);
void pbs_write_binary(PbsCode *code, unsigned code_idx, FILE *F);
int pbs_read_binary(PbsCode *code, unsigned *code_idx, FILE *F);

#endif