1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370
|
/*
Theseus - maximum likelihood superpositioning of macromolecular structures
Copyright (C) 2004-2014 Douglas L. Theobald
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the:
Free Software Foundation, Inc.,
59 Temple Place, Suite 330,
Boston, MA 02111-1307 USA
-/_|:|_|_\-
*/
#ifndef COORDS_SEEN
#define COORDS_SEEN
#include <stdio.h>
#include "DLTmath.h"
#include "PDBCds.h"
typedef struct Algorithm Algorithm;
typedef struct Statistics Statistics;
typedef struct StCds StCds;
typedef struct StCdsArray StCdsArray;
typedef struct CdsParams CdsParams;
typedef struct Params Params;
typedef struct Priors Priors;
typedef struct Cds Cds;
typedef struct CdsArray CdsArray;
struct Algorithm
{
char cmdline[1024]; /* copy of the command line */
int argc;
char **argv;
char **infiles; /* an array of the input files listed on the command line */
int filenum; /* number of input files */
char rootname[FILENAME_MAX];
int weight; /* weighting method */
int verbose; /* lots of output */
double precision; /* requested relative precision to converge to */
int iterations; /* max # of iterations allowed for outer loop of MultiPose() */
int rounds; /* running counter of rounds of the outer loop of MultiPose() */
int innerrounds; /* running counter of rounds of the inner loop of MultiPose() */
double milliseconds; /* how long the calculation took */
int write_file; /* flag to write output pdb file, default = 1 = yes */
int atoms; /* flag for atom types to include in superposition, CA, CB, backbone, P, etc. */
char *selection; /* character array holding user input for residues/alignment columns to include */
char *atomslxn; /* character array holding user input for atom types to include */
int revsel; /* reverse the sense of the residues to select in selection above (i.e. exclude them) */
int embedave; /* flag to initialize the algorithm with an embedded average (distance geometry) */
int landmarks; /* # of landmarks (Dryden files) */
int writestats; /* flag to write out stat files */
int FragDist;
int random;
int pca; /* flag to do Principal Components Analysis on covariance matrix */
int fullpca;
int cormat; /* flag to do PCA with correlation matrix instead of covariance matrix */
int tenberge;
int morph;
int stats; /* calculate moment stats */
double constant; /* minimum variance allowed */
int info; /* just calculate stats for given pdb file and quit */
int princaxes; /* flag to align final superposition with principle axes of mean structure */
int nullrun;
int binary; /* flag to read and write binary structure files */
int mbias; /* flag to calculate bias-corrected mean */
int domp; /* don't initialize Bayes w/MultiPose ML */
int doave; /* don't calculate an average structure */
int dotrans;
int dorot;
int dohierarch;
int docovars;
int alignment; /* flag for superimposing based on a sequence alignment */
int covweight; /* flag to do atomic, row-wise covariance matrix weighting */
int varweight; /* flag to do variance weighing (i.e., a diagonal covariance matrix) */
int leastsquares; /* flag to do classical least squares, all variances equal, no covars */
int hierarch; /* flag to use hierarchical variances, e.g. inverse gamma distributed vars */
int fmodel; /* read only first or all models in a pdb file */
int noinnerloop; /* don't iterate the inner loop */
int fasta; /* flag to write out FASTA sequence files for each PDB model read in */
int olve; /* Olve Peersen's pet requests */
int abort;
int seed; /* random number seed, can be specified by user */
int mixture;
int threads; /* flag to run with pthreads */
double minc;
int printlogL;
int bfact;
int convlele; /* flag to convert Lele's formatted files */
double param[2]; /* Random generation of structures, params for inverse gamma */
double radii[3]; /* Random generation of structures, radii of gyration for generating mean forms */
int ssm;
int bayes;
int ipmat;
int missing;
int scale; /* calculate scale factors for each structure */
int instfile; /* print out PDB files in each internal round of the MultiPose algorithm */
int pu; /* for testing Pu's QCP rotation method */
int amber; /* switch for special treatment of AMBER8 PDB formatted files */
int atom_names;
double scalefactor; /* value to scale all structures by */
int morphfile; /* flag to read and write morphometric .tps files */
int scaleanchor; /* model index for relative scaling -- scale of this model = 1 */
int randgibbs; /* Randomly initialize GibbsMet */
double covnu;
};
struct Statistics
{
double stddev; /* combined standard deviation of all atomic positions */
double var;
double phi;
double alpha;
double starting_paRMSD, starting_pawRMSD; /* stats for initial superposition */
double starting_mlRMSD;
double starting_stddev, starting_logL;
double ave_paRMSD; /* average pairwise RMSD */
double ave_pawRMSD; /* average weighted pairwise RMSD */
double RMSD_from_mean; /* average RMSD from the mean structure */
double mlRMSD; /* max lik RMSD, actually a sigma */
double logL, mlogL, AIC, BIC, nparams, ndata, chi2; /* likelihood stats */
double skewness[4]; /* for x, y, z residuals and total */
double kurtosis[4];
double SES, SEK;
int median; /* index of structure closest to mean */
double wtnorm; /* normalization factor for atomic row-wise weight matrix */
double hierarch_p1, hierarch_p2; /* parameters of the hierarchical variance PDF */
double hierarch_chi2;/* chi^2 value for fit of hierarchical variances */
double hierarch_chi2_P; /* P-value */
double omnibus_chi2; /* overall chi^2, including hierarchical and overall fit */
double omnibus_chi2_P; /* P-value */
double precision; /* actual precision to which the algorithm converged */
};
/* StCds is for holding static working sets of coordinates */
struct StCds
{
int model; /* model number, not really used */
int vlen; /* number of coordinates */
int aalen; /* number of real residues, no gaps, used for CA alignments */
double *x, *y, *z; /* x,y,z atomic coordinates */
double *o; /* occupancy */
double *b; /* B-factor */
/* not to be accessed - for space only */
char *resName_space;
};
struct StCdsArray
{
int vlen; /* number of coordinates */
int cnum; /* number of Cds in array */
Cds **cds; /* pointer to an array of pointers to Cds */
Cds *avecds; /* average Cds of all in CdsArray */
double *evals;
double *samplevar3N; /* atomic sample variances */
double **CovMat; /* the atomic, row-wise covariance matrix */
};
struct CdsParams
{
int vlen; /* number of coordinates */
double *prvar; /* prior variances */
double *residual_x, *residual_y, *residual_z;
double *covx, *covy, *covz; /* covariance matrix weighted x,y,z cds */
double **matrix; /* 3x3 rotation matrix */
double **last_matrix; /* temp 3x3 rotation matrix */
double **last_outer_matrix; /* temp 3x3 rotation matrix */
double radgyr; /* radius of gyration */
double **innerprod; /* vlen x vlen inner product matrix */
double **innerprod2; /* 3 x 3 inner product matrix */
double center[3]; /* weighted centroid of coordinates */
double last_center[3]; /* temp centroid of coordinates */
double translation[3]; /* translation vector, based on weighted center */
double RMSD_from_mean; /* rmsd from the mean structure */
double wRMSD_from_mean; /* weighted rmsd from mean structure */
double ref_wRMSD_from_mean;
double evals[4]; /* quaternion evals (residual sums) */
double **evecs; /* 4x4 quaternion evecs (rotation vectors) */
};
struct Params
{
int vlen; /* number of coordinates */
int cnum; /* number of Cds in array */
CdsParams **cdsp; /* array of coords parameters */
Cds *avecds; /* average Cds of all in CdsArray */
Cds *tcds; /* target Cds */
double *w; /* diagonal atomic weights */
double *var; /* atomic variance estimates */
double *evals;
double *samplevar3N; /* atomic sample variances */
int *df; /* degrees of freedom for variances, for incomplete data alignments */
double *S2; /* theoretical NMR order parameters */
double *residuals; /* 3 x vlen x cnum vector of normalized residuals */
double **Var_matrix; /* the variances of the distances in distmat */
double **Dij_matrix; /* average distance distance matrix for the CdsArray */
Matrix3D *distmat;
double **CovMat; /* the atomic, row-wise covariance matrix */
double **WtMat; /* inverse of the CovMat */
double **FullCovMat;
double **pcamat; /* vlen x vlen sized matrix for PC eigenvectors */
double *pcavals; /* PCA eigenvalues */
};
struct Priors
{
int vlen; /* number of coordinates */
int cnum; /* number of Cds in array */
double alpha;
Cds *meancds; /* mean Cds of all in CdsArray */
double *prvar;
double *prevals;
double **PrCovMat; /* the atomic, row-wise covariance matrix */
double **PrInvCovMat; /* inverse of the PrCovMat */
};
/* Cds is for holding working sets of coordinates */
struct Cds
{
char filename[FILENAME_MAX];
int model; /* model number, not really used */
int vlen; /* number of coordinates */
int aalen; /* number of real residues, no gaps, used for CA alignments */
char **resName; /* residue name */
char *chainID; /* chain ID */
int *resSeq; /* residue number */
double **wc; /* 3 x K matrix matrix of working coordinates, aliased to x,y,z below */
double *x, *y, *z; /* x,y,z atomic coordinates */
double *o; /* occupancy */
double *b; /* B-factor */
int *nu, *mu; /* binary flag vectors for present and missing data, respectively */
double **sc; /* 3 x K matrix matrix of static coordinates, aliased to sx,sy,sz below */
double *sx, *sy, *sz; /* x,y,z atomic coordinates */
double *so; /* occupancy */
double *sb; /* B-factor */
double **cc; /* inv covariance weighted coordinates */
double *covx, *covy, *covz; /* inv covariance matrix weighted x,y,z cds */
double *prvar; /* prior variances */
double *residual_x, *residual_y, *residual_z;
double **matrix; /* 3x3 rotation matrix */
double **last_matrix; /* temp 3x3 rotation matrix */
double **last_outer_matrix; /* temp 3x3 rotation matrix */
double radgyr; /* radius of gyration */
double **outerprod; /* vlen x vlen outer product matrix */
double **innerprod; /* 3 x 3 inner product matrix */
double center[3]; /* weighted centroid of coordinates */
double last_center[3]; /* temp centroid of coordinates */
double translation[3]; /* translation vector, based on weighted center */
double RMSD_from_mean; /* rmsd from the mean structure */
double wRMSD_from_mean; /* weighted rmsd from mean structure */
double evals[4]; /* quaternion evals (residual sums) */
double **evecs; /* 4x4 quaternion evecs (rotation vectors) */
double bfact_c;
double scale;
/* not to be accessed - for space only */
char *resName_space;
};
/* CdsArray is an array of Cds, plus a bunch of stuff necessary to
do the ML superposition for this family of Cds.
*/
struct CdsArray
{
struct PDBCdsArray *pdbA; /* associated PDBCdsArray */
struct CdsArray *scratchA; /* associated scratch array of Cds */
char outfile_name[FILENAME_MAX];
int vlen; /* number of coordinates */
int cnum; /* number of Cds in array */
char *anchorf_name;
char *mapfile_name;
char *msafile_name;
Cds **cds; /* pointer to an array of pointers to Cds */
Cds *avecds; /* average Cds of all in CdsArray */
double **ac; /* average coords matrix */
Cds *tcds; /* target Cds */
double **tc;
double *w; /* diagonal atomic weights */
double *var; /* atomic variance estimates */
double *evals;
double *samplevar3N; /* atomic sample variances */
int *df; /* degrees of freedom for variances, used for incomplete data alignments */
double *S2; /* theoretical NMR order parameters */
double *residuals; /* 3 x vlen x cnum vector of normalized residuals */
double **Var_matrix; /* the variances of the distances in distmat */
double **Dij_matrix; /* average distance distance matrix for the CdsArray */
Matrix3D *distmat;
double **CovMat; /* the atomic, row-wise covariance matrix */
double **WtMat; /* normalized inverse of the CovMat */
double **FullCovMat;
double **pcamat; /* vlen x vlen sized matrix for principle component eigenvectors */
double *pcavals; /* PCA eigenvalues */
double **tmpmatKK1; /* must be careful that these aren't accesses by subroutines */
double **tmpmatKK2;
double **tmpmat3a, **tmpmat3b, **tmpmat3c, **tmpmat3d; /* 3x3 scratch matrices */
double *tmpvecK;
double *tmpvec3a;
};
/* global declarations (necessary for leave(), I think) */
extern CdsArray *baseA; /* main array of selected pdb cds, never modified */
extern PDBCdsArray *pdbA; /* array holding all of the pdb file coordinate info,
much of it unused in the actual calculations */
extern Algorithm *algo;
extern Statistics *stats;
#endif
|