File: SweeD.h~

package info (click to toggle)
sweed 3.2.1%2Bdfsg-5
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, bullseye, sid, trixie
  • size: 616 kB
  • sloc: ansic: 11,034; sh: 21; makefile: 8
file content (396 lines) | stat: -rw-r--r-- 8,056 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
/*  
 *  SweeD
 *
 *  Copyright September 2012 by Nikolaos Alachiotis and Pavlos Pavlidis
 *
 *  This program is free software; you may redistribute it and/or modify its
 *  under the terms of the GNU General Public License as published by the Free
 *  Software Foundation; either version 2 of the License, or (at your option)
 *  any later version.
 *
 *  This program is distributed in the hope that it will be useful, but
 *  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 *  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 *  for more details.
 *
 *  For any other enquiries send an email to
 *  Pavlos Pavlidis (pavlidisp@gmail.com) or
 *  Nikolaos Alachiotis (n.alachiotis@gmail.com)
 *  
 */


#ifndef _SweeD_H
#define _SweeD_H

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include <math.h>
#include <time.h>
#include <sys/time.h>
#include <limits.h>


#include "SweeD_BFGS.h"

#ifdef _USE_PTHREADS
#include <pthread.h>
#include <unistd.h>

#define EXIT 127
#define BUSYWAIT 0
#define LIKSFS 1
#define COMPRVLUT 2
#define COMPALPHA 3

#endif

#ifdef _DO_CHECKPOINTS
#include "dmtcpaware.h"
#endif

#ifdef _ANALYTICAL_SFS
#include <mpfr.h>
#include <float.h>
#define ACC 10000
#define APPROX 0
#define TOLTIME 1e-7
#define RESCALE 1
#endif

#define ENTRIES_PER_INT 32
#define DENOMINATOR_OFFSET 0.00001
#define REGISTER_WIDTH 32
#define MAXSIZE 10000
#define INFILENAMESIZE 1000
#define SEQNAMESIZE 1000
#define DECREASE 0.99
#define MINSNPPERWINDOW 5
#define STRINGLENGTH 100
#define VCF_HLENGTH 9 // number of fields in the VCF header line
#define MAX_CHROM_NAME_VCF 100
#define MAX_STATES_VCF 5

#define RESULTS_DEFAULT 0
#define RESULTS_ALL 1

#define RSQUARE 0
#define DOM 1
#define ABSDOM 2
#define JUSTD 3
#define ABSD 4
#define ABSDOM2 5

#define ZERO '0'
#define ONE  '1'
#define GAP '-'
#define AD 'A'
#define CY 'C'
#define GU 'G'
#define TH 'T'
#define UN 'N'

#define BINARY 2
#define BINARY_WITH_GAPS 3
#define DNA 4
#define DNA_WITH_GAPS 5
#define STATESALL 8

#define OTHER_FORMAT -1
#define MS_FORMAT 0
#define FASTA_FORMAT 1
#define MACS_FORMAT 2
#define VCF_FORMAT 3
#define SF_FORMAT 4

#define INTOKS 8

#define N_REF 2
#define N_ALT 3
#define N_INFO 6
#define MAX_STATES 100
#define MAXINT 2147483647


char bits_in_16bits [0x1u << 16];

int linkage_disequilibrium;

int borderTol;

int VCF_header_lines;
char VCF_alignment_name [MAX_CHROM_NAME_VCF];
int VCF_first_SNP;
int nxtVCFalignment;

char runName[INFILENAMESIZE];

typedef float cor_t;



cor_t ABS (cor_t input);

double mainTime0;

int map[INTOKS];

int nofTokens;

int nofSamples;

typedef int al_t;
typedef double t_sfs;

struct pattern
{
	int x;
	int n;
	unsigned char f;
	int c;
	struct pattern * nxt;		
};

typedef struct 
{
 	al_t	*n; // number of valid bases in every site
	al_t	*n_t; // number of valid bases in every site
	al_t *x; // number of derived bases in every site
	al_t *x_t; // number of derived bases in every site
	float 	       * positions;
	int 	       * positionsInd;
	int 	       * p_t;
	unsigned char *folded;
	unsigned char *f_t;
	unsigned char userSetFolded;
	unsigned char userSFS;
	char* outgroupSequence;
  	char* outgroupName;
	int outgroupIndex;
	int sequences;
  	int foldedFormat; // if outgroup exists we set this to zero
	int 		 segsites;	
	int		 length;

	int minn;
	int minx;	
  	int maxn;
	int maxx;

	al_t * stateA;
	al_t * stateC;
	al_t * stateG;
	al_t * stateT;


	t_sfs * SFS;
	t_sfs * tmpSFS;
	int startSFS;
	int endSFS;

	int * ng;

	t_sfs * lb;
	t_sfs * ub;

	int params;

	al_t * n_pat;
	al_t * x_pat;
	unsigned char * f_pat;
	int *  c_pat;
	int patterns;

	t_sfs *** gridProbs;
	t_sfs * gridADs;
	t_sfs interval;
	t_sfs logAD0;	

	t_sfs * baseLikelihood;

	struct pattern * patternList;
	
	float sweepStep;

	int discSites;

#ifdef _USE_PTHREADS
	t_sfs * threadScoresLIKSFS;
#endif

} alignment_struct;

alignment_struct * alignment;

double * rv;
double ** rvLUT;
double ** rvLUT2;


#define GRIDSIZE 300
#define LOCALGRIDSIZE 100

double div6; 


typedef struct
{
	double * val;
	int * sc;

} factorial_LUT;

factorial_LUT * factLUT;


typedef struct
{

  int 		sfRealPos; 
  t_sfs		likelihood; 
  t_sfs		alpha; 
 
} clr_struct;

clr_struct * clr;

#ifdef _ANALYTICAL_SFS
typedef struct
{
	double t;
	double d;
} event;

int eventsTotal;
event * eventList;

mpfr_t ** analyticalSFS_LUT;

void computeAnalyticalSFS(int sequences, FILE * fp);
void computeAnalyticalSFSExpo(int sequences, double R, FILE * fpSFSo);
long double xExponential_Integral_Ei( long double x );
void mpfr_Exponential_Integral_Ei( mpfr_t EImp, mpfr_t xmp );
#endif

void printHeading (FILE * fp);
void computeCorrelationMatrixPairwise(alignment_struct * alignment, clr_struct * omega, int omegaIndex, int firstRowIndex, void * threadData, cor_t ** myCorrelationMatrix, char * lookuptable);
void applyCorrelationMatrixAdditions (clr_struct * omega, int omegaIndex, int firstRowIndex, cor_t ** correlationMatrix);
void overlapCorrelationMatrixAdditions (alignment_struct * alignment, clr_struct * omega, int lvw_i, int cvw_i, int * firstRowToCopy, int * firstRowToCompute, int * firstRowToAdd);
void shiftCorrelationMatrixValues (clr_struct * omega, int lvw_i, int cvw_i, int firstRowToCopy, cor_t ** correlationMatrix);


void commandLineParser(int argc, char** argv, 
		       char * infile,
		       char * sfsfile,
                       char * sfsofile,
		       char * sfofile, 
		       int * grid, 
                       int * length,                    
                       unsigned int * seed,
		       int * fileFormat,	
		       int * results,
		       char * outgroupName,
		       unsigned char *userSetFolded,
			unsigned char * userSFS,
			unsigned char * onlySFS,
			unsigned char * onlySF,
		       int * monomorphic,
			int * threads,
			int * sequences,
		       int * analyticalSFS,
		       double *growthRate);

void removeMonomorphicSites (int monomorphic, FILE * fp);

t_sfs computeBaseLikelihood();
t_sfs getAlpha (int sweepPosition, t_sfs * likelihood);


int isBinary(char input);

void printIntArray(int *array, int n);

void printUCharArray(unsigned char *array, int n);

void printFloatArray(float *array, int n);

void printCharArray(char *array, int n);

int isEndOfLine(char ent);

void goToLineEnd(FILE *fp);

int findFirstAlignment(FILE *fp, int format);

int findNextAlignment(FILE *fp, int fileFormat);

void freeAlignment();

void readAlignment(FILE *fp, int format, FILE * fpInfo, FILE * sfO);

void compressAlignment(alignment_struct *alignment);

void knuthShuffle(int orgArray[], int arraySize, int start, int end);

int parseSample(FILE *fp, int targetTok, char **word, int *wordLength, int curTok);

void unGetChars(FILE *fp, char* chars, int n);

int isValidDNACharacter(char input);

void parseIndividual(char *word, char** genotype, char delimiter, int token, char* states, char phased, char unphased, int *ploidy, int *max_ploidy);

void ignoreAll(FILE * fp, char * ent);

int isSpace(char ent);

double XchooseY_ln(int x, int y);

void checkSNIPPositions (FILE* fp, alignment_struct * alignment, int index);

void createSFS (FILE * fpSFS, FILE * fpSFSo, int alignmentIndex, int * SFSsize);
void createCLR (int grid);
void createPROBS (int probGrid);
void createPatterns(void);
void computeFactLUT(void);

double gettime(void);

t_sfs likelihoodSFS_SNP(t_sfs * tmpSFS, int x, int n, unsigned char f, int c);

void updateSF(FILE * fpSFo);

#ifdef _USE_PTHREADS

typedef struct
{
	int threadID;
	int threadTOTAL;

	int threadBARRIER;
	int threadOPERATION;

}threadData_t;

pthread_t * workerThreadL;
threadData_t * threadDataL;

int gridP;
void likelihoodSFS_thread(int tid, int threads);
void parallelComputationRVLUT_thread(int tid, int threads);
void computeAlpha_parallel(int grid);
void parallelComputationAlpha_thread(int tid, int threads);
void syncThreadsBARRIER();


#endif

#ifdef _DO_CHECKPOINTS
double lastCheckpointTime;
double checkPointInterval;
extern void writeCheckpoint();
#endif

#endif