File: common.h

package info (click to toggle)
cufflinks 1.3.0-2
  • links: PTS, VCS
  • area: non-free
  • in suites: wheezy
  • size: 3,864 kB
  • sloc: cpp: 48,999; ansic: 12,297; sh: 3,381; python: 432; makefile: 209
file content (436 lines) | stat: -rw-r--r-- 11,367 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
#ifndef COMMON_H
#define COMMON_H
/*
 *  common.h
 *  Cufflinks
 *
 *  Created by Cole Trapnell on 11/26/08.
 *  Copyright 2008 Cole Trapnell. All rights reserved.
 *
 */

#ifdef HAVE_CONFIG_H
#include <config.h>
#endif

#include <stdint.h>
#include <cassert>
#include <string>
#include <utility>

#include <boost/math/distributions/normal.hpp> 
using boost::math::normal;

#include <boost/foreach.hpp>
#define foreach         BOOST_FOREACH
#define reverse_foreach BOOST_REVERSE_FOREACH

#include <boost/thread.hpp>
#include <boost/shared_ptr.hpp>

// Non-option globals
extern bool final_est_run;
extern bool allow_junk_filtering;
extern bool user_provided_fld;
extern int def_max_frag_len;
extern int max_frag_len;
extern int min_frag_len;

// Behavior options
extern int num_threads;
extern bool no_update_check;
extern bool cuff_quiet;
extern bool cuff_verbose;
extern bool output_fld;
extern bool output_bias_params;

// General options
extern int max_partner_dist;
extern uint32_t max_gene_length;
extern std::string ref_gtf_filename;
extern std::string mask_gtf_filename;
extern std::string output_dir;
extern std::string fasta_dir;
extern std::string library_type;

// Abundance estimation options
extern bool corr_bias;
extern bool corr_multi;
extern bool use_quartile_norm;
extern bool poisson_dispersion;
extern int def_frag_len_mean;
extern int def_frag_len_std_dev;
extern int max_mle_iterations;
extern int num_importance_samples;
extern float min_isoform_fraction;
extern bool use_em;
extern bool cond_prob_collapse;
extern bool use_compat_mass;
extern bool use_total_mass;

// Ref-guided assembly options
extern int overhang_3;
extern int ref_merge_overhang_tolerance;
extern int tile_len;
extern int tile_off;
extern bool enable_faux_reads;
extern bool enable_5_extend;

// Assembly options
extern uint32_t min_intron_length;
extern uint32_t max_intron_length;
extern int olap_radius;
extern int bowtie_overhang_tolerance;
extern int min_frags_per_transfrag;
extern int microexon_length;
extern float pre_mrna_fraction;
extern float high_phred_err_prob;
extern double trim_3_dropoff_frac;
extern double trim_3_avgcov_thresh;
extern double small_anchor_fraction;
extern double binomial_junc_filter_alpha;
extern std::string user_label;
extern long random_seed;
extern bool emit_count_tables;
extern bool use_fisher_covariance;
extern bool split_variance;
extern bool bootstrap;
extern int num_bootstrap_samples;
extern double bootstrap_fraction;
extern double bootstrap_delta_gap;
extern int max_frags_per_bundle;

// SECRET OPTIONS: 
// These options are just for instrumentation and benchmarking code

extern bool no_read_pairs;
extern float read_skip_fraction;
extern int trim_read_length;
extern double mle_accuracy;

// END SECRET OPTIONS

#define ASM_VERBOSE 0
#define ENABLE_THREADS 1

#if ENABLE_THREADS
extern boost::thread_specific_ptr<std::string> bundle_label; // for consistent, traceable logging
#else
extern boost::shared_ptr<std::string> bundle_label;
#endif


bool gaurd_assembly();

void asm_verbose(const char* fmt,...);
void verbose_msg(const char* fmt,...); 

int parseInt(int lower, 
			 const char *errmsg, 
			 void (*print_usage)());

float parseFloat(float lower, 
				 float upper, 
				 const char *errmsg, 
				 void (*print_usage)());

void encode_seq(const std::string seqStr, char* seq, char* c_seq);
int mkpath(const char *s, mode_t mode);


template<typename InputIterator,
		 typename OutputIterator,
		 typename Predicate>
OutputIterator copy_if(InputIterator begin,
					   InputIterator end,
					   OutputIterator destBegin,
					   Predicate p)
{
	while (begin != end)
	{
		if (p(*begin)) *destBegin++ = *begin;
		++begin;
	}
	return destBegin;
}

enum BundleMode
{
	HIT_DRIVEN,
	REF_DRIVEN,
	REF_GUIDED
};
extern BundleMode bundle_mode;
extern BundleMode init_bundle_mode;

enum BiasMode
{
	SITE,
	VLMM,
	POS,
	POS_VLMM,
    POS_SITE
};
extern BiasMode bias_mode;

enum Strandedness 
{
    UNKNOWN_STRANDEDNESS,
	STRANDED_PROTOCOL,
    UNSTRANDED_PROTOCOL
};

enum StandardMateOrientation
{
    UNKNOWN_MATE_ORIENTATION,
    MATES_POINT_TOWARD,
    MATES_POINT_SAME,
    MATES_POINT_AWAY,
    UNPAIRED,
};

enum MateStrandMapping
{
	FF,
	FR,
	RF, // This is really FR with first-strandedness
	RR // This is really FF with first-strandedness
};

enum Platform
{
    UNKNOWN_PLATFORM,
    ILLUMINA,
    SOLID
};

class EmpDist
{
	//Vectors only valid between min and max!
	std::vector<double> _pdf;
	std::vector<double> _cdf;
	int _mode;
	double _mean;
    double _std_dev;
	int _min;
	int _max;
	
public:
	EmpDist(std::vector<double>& pdf, std::vector<double>& cdf, int mode, double mean, double std_dev, int min, int max)
	: _pdf(pdf), _cdf(cdf), _mode(mode), _mean(mean), _std_dev(std_dev), _min(min), _max(max) {}
	
	void pdf(std::vector<double>& pdf)	{ _pdf = pdf; }
	double pdf(int l) const
	{
		if (!valid_len(l))
			return 0.0;
		return _pdf[l];
	}
	
	// pdf renomalized over the lengths <= r
	double npdf(int l, int r) const
 	{
		if (!valid_len(l))
			return 0.0;
		
		if (r > _max || r == 0)
			return pdf(l);
		
		return pdf(l)/cdf(r);
	}
	
	void cdf(std::vector<double>& cdf)	{ _cdf = cdf; }
	double cdf(int l) const
	{
		if (l > _max)
			return 1.0;
        if (l < 0)
            return 0.0;
		return _cdf[l];
	}
	
	bool valid_len(int l) const { return (l >= _min && l <= _max); }
	bool too_short(int l) const { return (l < _min); }
	
	void mode(int mode)				{ _mode = mode; }
	int mode() const				{ return _mode; }
	
	void max(int max)				{ _max = max;  }
	int max() const					{ return _max; }
	
	void min(int min)				{ _min = min;  }
	int min() const					{ return _min; }
    
    void mean(double mean)				{ _mean = mean;  }
	double mean() const					{ return _mean; }
    
    void std_dev(double std_dev)				{ _std_dev = std_dev;  }
	double std_dev() const					{ return _std_dev; }
};

class BiasLearner;
class MultiReadTable;

class MassDispersionModel;

struct LocusCount
{
    LocusCount(std::string ld, double c, int nt) : 
        locus_desc(ld), count(c), num_transcripts(nt) {}
    std::string locus_desc;
    double count;
    int num_transcripts;
};

class ReadGroupProperties
{
public:
    
    ReadGroupProperties(); 
    
    Strandedness strandedness() const { return _strandedness; }
    void strandedness(Strandedness s) { _strandedness = s; }
    
    StandardMateOrientation std_mate_orientation() const { return _std_mate_orient; }
    void std_mate_orientation(StandardMateOrientation so)  { _std_mate_orient = so; }
    
	MateStrandMapping mate_strand_mapping() const { return _mate_strand_mapping; }
	void mate_strand_mapping(MateStrandMapping msm) { _mate_strand_mapping = msm; }
	
    Platform platform() const { return _platform; }
    void platform(Platform p)  { _platform = p; }   
    
    long double total_map_mass() const { return _total_map_mass; }
    void total_map_mass(long double p)  { _total_map_mass = p; }  
    
    long double normalized_map_mass() const { return _norm_map_mass; }
    void normalized_map_mass(long double p)  { _norm_map_mass = p; }  
    
    boost::shared_ptr<EmpDist const> frag_len_dist() const { return _frag_len_dist; }
    void frag_len_dist(boost::shared_ptr<EmpDist const> p)  { _frag_len_dist = p; }  
    
	boost::shared_ptr<BiasLearner const> bias_learner() const { return _bias_learner; }
    void bias_learner(boost::shared_ptr<BiasLearner const> bl)  { _bias_learner = bl; } 
	
    void mass_scale_factor(double sf) { _mass_scaling_factor = sf; }
    double mass_scale_factor() const  { return _mass_scaling_factor; }
    
    void complete_fragments(bool c)  { _complete_fragments = c; }
    bool complete_fragments() const { return _complete_fragments; }
    
    double scale_mass(double unscaled_mass) const 
    { 
        if (_mass_scaling_factor == 0)
            return unscaled_mass;
        
        return unscaled_mass * (1.0 / _mass_scaling_factor);
    }
    
    boost::shared_ptr<const MassDispersionModel> mass_dispersion_model() const 
    { 
        return _mass_dispersion_model; 
    };
    
    void mass_dispersion_model(boost::shared_ptr<const MassDispersionModel> nm) 
    { 
        _mass_dispersion_model = nm; 
    }
    
    const std::vector<LocusCount>& common_scale_counts() { return _common_scale_counts; }
    void common_scale_counts(const std::vector<LocusCount>& counts) { _common_scale_counts = counts; }
    
	boost::shared_ptr<MultiReadTable> multi_read_table() const {return _multi_read_table; }	
	void multi_read_table(boost::shared_ptr<MultiReadTable> mrt) { _multi_read_table = mrt;	}
	
private:
    
    Strandedness _strandedness;
    StandardMateOrientation _std_mate_orient;
	MateStrandMapping _mate_strand_mapping;
    Platform _platform;
    long double _total_map_mass;
    long double _norm_map_mass;
    boost::shared_ptr<EmpDist const> _frag_len_dist;
	boost::shared_ptr<BiasLearner const> _bias_learner;
	boost::shared_ptr<MultiReadTable> _multi_read_table;
    
    double _mass_scaling_factor;
    boost::shared_ptr<const MassDispersionModel> _mass_dispersion_model;
    std::vector<LocusCount> _common_scale_counts;
    
    bool _complete_fragments;
};

extern std::map<std::string, ReadGroupProperties> library_type_table;

extern const ReadGroupProperties* global_read_properties;

void print_library_table();
void init_library_table();


template<typename T>
std::string cat_strings(const T& container, const char* delimiter=",")
{
    std::string cat;
	if (container.empty())
	{
		cat = "";
	}
	else
	{
		typename T::const_iterator itr = container.begin();
		//cat = *(itr);
		for (; itr != container.end(); itr++)
		{
			if (!(*itr).empty()) {
				if (!cat.empty()) cat += delimiter;
				cat += *itr; 
            }
		}
	}
    
	return cat;
}

#define OPT_NUM_IMP_SAMPLES         260
#define OPT_MLE_MAX_ITER            261
#define OPT_FDR                     262
#define OPT_LIBRARY_TYPE            263
#define OPT_OVERHANG_TOLERANCE      264
#define OPT_MAX_BUNDLE_LENGTH       265
#define OPT_MIN_FRAGS_PER_TRANSFRAG 266
#define OPT_BIAS_MODE               267
#define OPT_MIN_INTRON_LENGTH       268
#define OPT_3_PRIME_AVGCOV_THRESH	269
#define OPT_3_PRIME_DROPOFF_FRAC    270
#define OPT_POISSON_DISPERSION      271
#define OPT_NO_UPDATE_CHECK         272
#define OPT_OUTPUT_FLD              273
#define OPT_OUTPUT_BIAS_PARAMS      274
#define OPT_USE_EM                  275
#define OPT_COLLAPSE_COND_PROB      276
#define OPT_RANDOM_SEED             277
#define OPT_NO_FAUX_READS           278
#define OPT_3_OVERHANG_TOLERANCE    279
#define OPT_INTRON_OVERHANG_TOLERANCE 280
#define OPT_EMIT_COUNT_TABLES       281
#define OPT_USE_COMPAT_MASS         282
#define OPT_USE_TOTAL_MASS          283
#define OPT_USE_FISHER_COVARIANCE   284
#define OPT_USE_EMPIRICAL_COVARIANCE   285
#define OPT_SPLIT_MASS              286
#define OPT_SPLIT_VARIANCE          287
#define OPT_BOOTSTRAP               288
#define OPT_NUM_BOOTSTRAP_SAMPLES   289
#define OPT_BOOTSTRAP_FRACTION      290
#define OPT_TILE_LEN                291
#define OPT_TILE_SEP                292
#define OPT_NO_5_EXTEND             293
#define OPT_MAX_FRAGS_PER_BUNDLE    294
#define OPT_READ_SKIP_FRACTION      295
#define OPT_NO_READ_PAIRS           296
#define OPT_TRIM_READ_LENGTH        297
#define OPT_MAX_DELTA_GAP           298
#define OPT_MLE_MIN_ACC             299
#endif