File: Cds.h

package info (click to toggle)
theseus 3.0.0-1
  • links: PTS, VCS
  • area: main
  • in suites: jessie, jessie-kfreebsd
  • size: 6,152 kB
  • ctags: 2,447
  • sloc: ansic: 42,404; makefile: 250; sh: 131
file content (370 lines) | stat: -rw-r--r-- 16,553 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
/*
    Theseus - maximum likelihood superpositioning of macromolecular structures

    Copyright (C) 2004-2014 Douglas L. Theobald

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the:

    Free Software Foundation, Inc.,
    59 Temple Place, Suite 330,
    Boston, MA  02111-1307  USA

    -/_|:|_|_\-
*/

#ifndef COORDS_SEEN
#define COORDS_SEEN

#include <stdio.h>
#include "DLTmath.h"
#include "PDBCds.h"


typedef struct Algorithm Algorithm;
typedef struct Statistics Statistics;
typedef struct StCds StCds;
typedef struct StCdsArray StCdsArray;
typedef struct CdsParams CdsParams;
typedef struct Params Params;
typedef struct Priors Priors;
typedef struct Cds Cds;
typedef struct CdsArray CdsArray;


struct Algorithm
{
    char            cmdline[1024]; /* copy of the command line */
    int             argc;
    char          **argv;
    char          **infiles; /* an array of the input files listed on the command line */
    int             filenum; /* number of input files */
    char            rootname[FILENAME_MAX];
    int             weight; /* weighting method */
    int             verbose;  /* lots of output */
    double          precision; /* requested relative precision to converge to */
    int             iterations; /* max # of iterations allowed for outer loop of MultiPose() */
    int             rounds; /* running counter of rounds of the outer loop of MultiPose() */
    int             innerrounds; /* running counter of rounds of the inner loop of MultiPose() */
    double          milliseconds; /* how long the calculation took */
    int             write_file; /* flag to write output pdb file, default = 1 = yes */
    int             atoms; /* flag for atom types to include in superposition, CA, CB, backbone, P, etc. */
    char           *selection; /* character array holding user input for residues/alignment columns to include */
    char           *atomslxn; /* character array holding user input for atom types to include */
    int             revsel; /* reverse the sense of the residues to select in selection above (i.e. exclude them) */
    int             embedave; /* flag to initialize the algorithm with an embedded average (distance geometry) */
    int             landmarks; /* # of landmarks (Dryden files) */
    int             writestats; /* flag to write out stat files */
    int             FragDist;
    int             random;
    int             pca; /* flag to do Principal Components Analysis on covariance matrix */
    int             fullpca;
    int             cormat; /* flag to do PCA with correlation matrix instead of covariance matrix */
    int             tenberge;
    int             morph;
    int             stats; /* calculate moment stats */
    double          constant; /* minimum variance allowed */
    int             info; /* just calculate stats for given pdb file and quit */
    int             princaxes; /* flag to align final superposition with principle axes of mean structure */
    int             nullrun;
    int             binary; /* flag to read and write binary structure files */
    int             mbias; /* flag to calculate bias-corrected mean */
    int             domp; /* don't initialize Bayes w/MultiPose ML */
    int             doave; /* don't calculate an average structure */
    int             dotrans;
    int             dorot;
    int             dohierarch;
    int             docovars;
    int             alignment; /* flag for superimposing based on a sequence alignment */
    int             covweight; /* flag to do atomic, row-wise covariance matrix weighting */
    int             varweight; /* flag to do variance weighing (i.e., a diagonal covariance matrix) */
    int             leastsquares; /* flag to do classical least squares, all variances equal, no covars */
    int             hierarch; /* flag to use hierarchical variances, e.g. inverse gamma distributed vars */
    int             fmodel; /* read only first or all models in a pdb file */
    int             noinnerloop; /* don't iterate the inner loop */
    int             fasta; /* flag to write out FASTA sequence files for each PDB model read in */
    int             olve; /* Olve Peersen's pet requests */
    int             abort;
    int             seed; /* random number seed, can be specified by user */
    int             mixture;
    int             threads; /* flag to run with pthreads */
    double          minc;
    int             printlogL;
    int             bfact;
    int             convlele; /* flag to convert Lele's formatted files */
    double          param[2]; /* Random generation of structures, params for inverse gamma */
    double          radii[3]; /* Random generation of structures, radii of gyration for generating mean forms */
    int             ssm;
    int             bayes;
    int             ipmat;
    int             missing;
    int             scale; /* calculate scale factors for each structure */
    int             instfile; /* print out PDB files in each internal round of the MultiPose algorithm */
    int             pu; /* for testing Pu's QCP rotation method */
    int             amber; /* switch for special treatment of AMBER8 PDB formatted files */
    int             atom_names;
    double          scalefactor; /* value to scale all structures by */
    int             morphfile;  /* flag to read and write morphometric .tps files */
    int             scaleanchor; /* model index for relative scaling -- scale of this model = 1 */
    int             randgibbs; /* Randomly initialize GibbsMet */
    double          covnu;
};


struct Statistics
{
    double          stddev; /* combined standard deviation of all atomic positions */
    double          var;
    double          phi;
    double          alpha;
    double          starting_paRMSD, starting_pawRMSD; /* stats for initial superposition */
    double          starting_mlRMSD;
    double          starting_stddev, starting_logL;
    double          ave_paRMSD; /* average pairwise RMSD */
    double          ave_pawRMSD; /* average weighted pairwise RMSD */
    double          RMSD_from_mean; /* average RMSD from the mean structure */
    double          mlRMSD; /* max lik RMSD, actually a sigma */
    double          logL, mlogL, AIC, BIC, nparams, ndata, chi2; /* likelihood stats */

    double          skewness[4]; /* for x, y, z residuals and total */
    double          kurtosis[4];
    double          SES, SEK;

    int             median; /* index of structure closest to mean */
    double          wtnorm; /* normalization factor for atomic row-wise weight matrix */
    double          hierarch_p1, hierarch_p2; /* parameters of the hierarchical variance PDF */
    double          hierarch_chi2;/* chi^2 value for fit of hierarchical variances */
    double          hierarch_chi2_P; /* P-value */
    double          omnibus_chi2; /* overall chi^2, including hierarchical and overall fit */
    double          omnibus_chi2_P; /* P-value */
    double          precision; /* actual precision to which the algorithm converged */
};


/* StCds is for holding static working sets of coordinates */
struct StCds
{
    int             model; /* model number, not really used */
    int             vlen;  /* number of coordinates */
    int             aalen; /* number of real residues, no gaps, used for CA alignments */

    double         *x, *y, *z; /* x,y,z atomic coordinates */
    double         *o;         /* occupancy */
    double         *b;         /* B-factor */

    /* not to be accessed - for space only */
    char           *resName_space;
};


struct StCdsArray
{
    int             vlen;    /* number of coordinates */
    int             cnum;    /* number of Cds in array */

    Cds           **cds;     /* pointer to an array of pointers to Cds */
    Cds            *avecds;  /* average Cds of all in CdsArray */

    double         *evals;
    double         *samplevar3N; /* atomic sample variances */
    double        **CovMat;      /* the atomic, row-wise covariance matrix */
};


struct CdsParams
{
    int             vlen;       /* number of coordinates */

    double         *prvar;      /* prior variances */

    double         *residual_x, *residual_y, *residual_z;
    double         *covx, *covy, *covz; /* covariance matrix weighted x,y,z cds */

    double        **matrix;      /* 3x3 rotation matrix */
    double        **last_matrix; /* temp 3x3 rotation matrix */
    double        **last_outer_matrix; /* temp 3x3 rotation matrix */

    double          radgyr;     /* radius of gyration */
    double        **innerprod;  /* vlen x vlen inner product matrix */
    double        **innerprod2; /* 3 x 3 inner product matrix */

    double          center[3];       /* weighted centroid of coordinates */
    double          last_center[3];  /* temp centroid of coordinates */
    double          translation[3];  /* translation vector, based on weighted center */
    double          RMSD_from_mean;  /* rmsd from the mean structure */
    double          wRMSD_from_mean; /* weighted rmsd from mean structure */
    double          ref_wRMSD_from_mean;
    double          evals[4]; /* quaternion evals (residual sums) */
    double        **evecs;    /* 4x4 quaternion evecs (rotation vectors) */
};


struct Params
{
    int             vlen;       /* number of coordinates */
    int             cnum;       /* number of Cds in array */

    CdsParams    **cdsp;     /* array of coords parameters */

    Cds            *avecds;  /* average Cds of all in CdsArray */
    Cds            *tcds;    /* target Cds */

    double         *w;           /* diagonal atomic weights */
    double         *var;         /* atomic variance estimates */
    double         *evals;
    double         *samplevar3N; /* atomic sample variances */
    int            *df;          /* degrees of freedom for variances, for incomplete data alignments */
    double         *S2;          /* theoretical NMR order parameters */

    double         *residuals;  /* 3 x vlen x cnum vector of normalized residuals */

    double        **Var_matrix; /* the variances of the distances in distmat */
    double        **Dij_matrix; /* average distance distance matrix for the CdsArray */
    Matrix3D       *distmat;
    double        **CovMat;     /* the atomic, row-wise covariance matrix */
    double        **WtMat;      /* inverse of the CovMat */
    double        **FullCovMat;

    double        **pcamat;     /* vlen x vlen sized matrix for PC eigenvectors */
    double         *pcavals;    /* PCA eigenvalues */
};


struct Priors
{
    int             vlen;     /* number of coordinates */
    int             cnum;     /* number of Cds in array */

    double          alpha;

    Cds            *meancds;  /* mean Cds of all in CdsArray */

    double         *prvar;
    double         *prevals;
    double        **PrCovMat;    /* the atomic, row-wise covariance matrix */
    double        **PrInvCovMat; /*  inverse of the PrCovMat */
};


/* Cds is for holding working sets of coordinates */
struct Cds
{
    char            filename[FILENAME_MAX];
    int             model; /* model number, not really used */
    int             vlen;  /* number of coordinates */
    int             aalen; /* number of real residues, no gaps, used for CA alignments */

    char          **resName; /* residue name */
    char           *chainID; /* chain ID */
    int            *resSeq;  /* residue number */

    double        **wc; /* 3 x K matrix matrix of working coordinates, aliased to x,y,z below */
    double         *x, *y, *z; /* x,y,z atomic coordinates */
    double         *o;         /* occupancy */
    double         *b;         /* B-factor */

    int            *nu, *mu; /* binary flag vectors for present and missing data, respectively */

    double        **sc; /* 3 x K matrix matrix of static coordinates, aliased to sx,sy,sz below */
    double         *sx, *sy, *sz; /* x,y,z atomic coordinates */
    double         *so;           /* occupancy */
    double         *sb;           /* B-factor */

    double        **cc; /* inv covariance weighted coordinates */
    double         *covx, *covy, *covz; /* inv covariance matrix weighted x,y,z cds */

    double         *prvar;      /* prior variances */

    double         *residual_x, *residual_y, *residual_z;

    double        **matrix;      /* 3x3 rotation matrix */
    double        **last_matrix; /* temp 3x3 rotation matrix */
    double        **last_outer_matrix; /* temp 3x3 rotation matrix */

    double          radgyr;    /* radius of gyration */
    double        **outerprod; /* vlen x vlen outer product matrix */
    double        **innerprod; /* 3 x 3 inner product matrix */

    double          center[3];       /* weighted centroid of coordinates */
    double          last_center[3];  /* temp centroid of coordinates */
    double          translation[3];  /* translation vector, based on weighted center */
    double          RMSD_from_mean;  /* rmsd from the mean structure */
    double          wRMSD_from_mean; /* weighted rmsd from mean structure */
    double          evals[4]; /* quaternion evals (residual sums) */
    double        **evecs;    /* 4x4 quaternion evecs (rotation vectors) */

    double          bfact_c;
    double          scale;

    /* not to be accessed - for space only */
    char           *resName_space;
};


/* CdsArray is an array of Cds, plus a bunch of stuff necessary to
   do the ML superposition for this family of Cds.
*/
struct CdsArray
{
    struct PDBCdsArray  *pdbA;     /* associated PDBCdsArray */
    struct CdsArray     *scratchA; /* associated scratch array of Cds */

    char           outfile_name[FILENAME_MAX];
    int            vlen;       /* number of coordinates */
    int            cnum;       /* number of Cds in array */
    char          *anchorf_name;
    char          *mapfile_name;
    char          *msafile_name;

    Cds           **cds;     /* pointer to an array of pointers to Cds */
    Cds            *avecds;  /* average Cds of all in CdsArray */
    double        **ac;      /* average coords matrix */
    Cds            *tcds;    /* target Cds */
    double        **tc;

    double         *w;           /* diagonal atomic weights */
    double         *var;         /* atomic variance estimates */
    double         *evals;
    double         *samplevar3N; /* atomic sample variances */
    int            *df;          /* degrees of freedom for variances, used for incomplete data alignments */
    double         *S2;          /* theoretical NMR order parameters */

    double         *residuals;  /* 3 x vlen x cnum vector of normalized residuals */

    double        **Var_matrix; /* the variances of the distances in distmat */
    double        **Dij_matrix; /* average distance distance matrix for the CdsArray */
    Matrix3D       *distmat;
    double        **CovMat;     /* the atomic, row-wise covariance matrix */
    double        **WtMat;      /* normalized inverse of the CovMat */
    double        **FullCovMat;

    double        **pcamat;     /* vlen x vlen sized matrix for principle component eigenvectors */
    double         *pcavals;    /* PCA eigenvalues */

    double        **tmpmatKK1;           /* must be careful that these aren't accesses by subroutines */
    double        **tmpmatKK2;
    double        **tmpmat3a, **tmpmat3b, **tmpmat3c, **tmpmat3d; /* 3x3 scratch matrices */
    double         *tmpvecK;
    double         *tmpvec3a;
};


/* global declarations (necessary for leave(), I think) */
extern CdsArray        *baseA; /* main array of selected pdb cds, never modified */
extern PDBCdsArray     *pdbA;  /* array holding all of the pdb file coordinate info,
                              much of it unused in the actual calculations */
extern Algorithm       *algo;
extern Statistics      *stats;

#endif