File: tools.h

package info (click to toggle)
hilive 2.0a-5
  • links: PTS, VCS
  • area: main
  • in suites: experimental
  • size: 956 kB
  • sloc: cpp: 4,723; python: 241; xml: 188; sh: 35; makefile: 14
file content (254 lines) | stat: -rw-r--r-- 9,337 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
/**
 * This class provides functions that are dependent of the alignmentSettings!
 * For functions that are not dependent of any other HiLive class, please use the tools_static class!
 * Please do NOT add further includes to this file since this will lead to unwanted dependencies!
 */

#ifndef TOOLS_H
#define TOOLS_H

/* DONT ADD ANY INCLUDES */
#include "tools_static.h"
#include "alignmentSettings.h"
#include "global_variables.h"
/* DONT ADD ANY INCLUDES */


//////////////////////////////////////
////////// Build file names //////////
//////////////////////////////////////

/**
 * Get the name of a bcl file.
 * @param ln The lane number.
 * @param tl The tile number.
 * @param cl The sequencing cycle.
 * @return Path to the bcl file.
 */
inline std::string get_bcl_fname(uint16_t ln, uint16_t tl, uint16_t cl) {
  std::ostringstream path_stream;
  path_stream << globalAlignmentSettings.get_root() << "/L" << to_N_digits(ln,3) << "/C" << cl << ".1/s_"<< ln <<"_" << tl << ".bcl";
  return path_stream.str();
}

/**
 * Get the name of an alignment file.
 * @param ln The lane number.
 * @param tl The tile number.
 * @param cl The cycle for the respective mate.
 * @param mt The mate number.
 * @return Path to the alignment file.
 */
inline std::string get_align_fname(uint16_t ln, uint16_t tl, uint16_t cl, uint16_t mt){
  std::ostringstream path_stream;
  std::string base = globalAlignmentSettings.get_temp_dir() != "" ? globalAlignmentSettings.get_temp_dir() : globalAlignmentSettings.get_root();
  path_stream << base << "/L" << to_N_digits(ln,3) << "/s_"<< ln << "_" << tl << "." << mt << "."<< cl << ".align";
  return path_stream.str();
}
/**
 * Get the name of a filter file.
 * @param ln The lane number.
 * @param tl The tile number.
 * @return Path to the filter file.
 */
inline std::string get_filter_fname(uint16_t ln, uint16_t tl) {
  std::ostringstream path_stream;
  path_stream << globalAlignmentSettings.get_root() << "/L" << to_N_digits(ln,3) << "/s_"<< ln << "_" << tl << ".filter";
  return path_stream.str();
}
/**
 * Get the name of a clocs file.
 * @param ln The lane number.
 * @param tl The tile number.
 * @return Path to the clocs file.
 */
inline std::string get_clocs_fname(uint16_t ln, uint16_t tl) {
  std::ostringstream path_stream;
  path_stream << globalAlignmentSettings.get_root() << "../L" << to_N_digits(ln,3) << "/s_"<< ln << "_" << tl << ".clocs";
  return path_stream.str();
}
/**
 * Get the name of the settings file.
 * @return Path to the settings file.
 */
inline std::string get_config_fname() {
	std::ostringstream path_stream;
	path_stream << globalAlignmentSettings.get_temp_dir() << "/hilive_config.ini";
	return path_stream.str();
}
/** Get the current sequencing cycle using the current alignment cycle and read number.
 * @param cycle The read cycle.
 * @param seq_id The sequence id (:= id of the respective element in globalAlignmentSettings::seqs)
 * @return The sequencing cycle.
 * @author Tobias Loka
 */
inline uint16_t getSeqCycle(uint16_t cycle, uint16_t seq_id) {
	uint16_t seq_cycle = cycle;
	for ( int i = 0; i < seq_id; i++ )
		seq_cycle += globalAlignmentSettings.get_seq_by_id(i).length;
	return seq_cycle;
}
/**
 * Get the cycle of a mate for a given sequencing cycle.
 * When the mate is completely finished in the given cycle, return its total sequence length.
 * @param mate_number Mate of interest.
 * @param seq_cycle The sequencing cycle.
 * @return Cycle of the mate in the given sequencing cycle.
 * @author Tobias Loka
 */
inline uint16_t getMateCycle( uint16_t mate_number, uint16_t seq_cycle ) {

	// Invalid mate
	if ( mate_number == 0 || mate_number > globalAlignmentSettings.get_mates() )
		return 0;

	// Iterate through all sequence elements (including barcodes)
	for ( CountType id = 0; id < globalAlignmentSettings.get_seqs().size(); id++ ) {

		// Current sequence element
		SequenceElement seq = globalAlignmentSettings.get_seq_by_id(id);

		// Seq is mate of interest
		if ( seq.mate == mate_number )
			return ( seq.length > seq_cycle ? seq_cycle : seq.length );

		// Not enough cycles left to reach mate of interest
		else if ( seq.length >= seq_cycle )
			return 0;

		// Reduce number of cycles by the Seq length
		else
			seq_cycle -= seq.length;

	}

	// Should not be reached
	return 0;
}
////////////////////////////////////
////////// SAM/BAM output //////////
////////////////////////////////////

/**
 * Get the header for a SAM/BAM output file.
 * @return The BAM header.
 * @author Tobias Loka
 */
seqan2::BamHeader getBamHeader();

/**
 * Name of a temporary SAM/BAM file (for the time it is written).
 * @param barcode Barcode of the output file (or "undetermined" for undetermined reads)
 * @param cycle The output cycle.
 * @return Name of the temporary output file for writing.
 * @author Tobias Loka
 */
inline std::string getBamTempFileName(std::string barcode, CountType cycle) {
	std::ostringstream fname;
	std::string file_suffix = globalAlignmentSettings.get_output_format() == OutputFormat::BAM ? ".bam" : ".sam";
	fname << globalAlignmentSettings.get_out_dir() << "/hilive_out_" << "cycle" << std::to_string(cycle) << "_" << barcode << ".temp" << file_suffix;
	return fname.str();
}

/**
 * Final name of a SAM/BAM file.
 * @param barcode Barcode of the output file (or "undetermined" for undetermined reads)
 * @param cycle The output cycle.
 * @return Name of the final output file.
 * @author Tobias Loka
 */
inline std::string getBamFileName(std::string barcode, CountType cycle) {
	std::ostringstream fname;
	std::string file_suffix = globalAlignmentSettings.get_output_format() == OutputFormat::BAM ? ".bam" : ".sam";
	fname << globalAlignmentSettings.get_out_dir() << "/hilive_out_" << "cycle" << std::to_string(cycle) << "_" << barcode << file_suffix;
	return fname.str();
}
/** Reverse a MD:Z tag for reverse alignments. */
std::string reverse_mdz(std::string mdz);


/////////////////////////////
////////// Scoring //////////
/////////////////////////////

inline uint16_t getMinSingleErrorPenalty() {

	// Mismatch: +1 mismatch, -1 match
	uint16_t mismatch_penalty = globalAlignmentSettings.get_mismatch_penalty() + globalAlignmentSettings.get_match_score();

	// Deletion: +1 deletion, maximum number of matches can still be reached
	uint16_t deletion_penalty = globalAlignmentSettings.get_deletion_opening_penalty() + globalAlignmentSettings.get_deletion_extension_penalty();

	// Insertion: +1 insertion, -1 match
	uint16_t insertion_penalty = globalAlignmentSettings.get_insertion_opening_penalty() + globalAlignmentSettings.get_insertion_extension_penalty() + globalAlignmentSettings.get_match_score();

	return std::min(mismatch_penalty, std::min(insertion_penalty, deletion_penalty));
}

inline uint16_t getMaxSingleErrorPenalty() {

	// Mismatch: +1 mismatch, -1 match
	uint16_t mismatch_penalty = globalAlignmentSettings.get_mismatch_penalty() + globalAlignmentSettings.get_match_score();

	// Deletion: +1 deletion, maximum number of matches can still be reached
	uint16_t deletion_penalty = globalAlignmentSettings.get_deletion_opening_penalty() + globalAlignmentSettings.get_deletion_extension_penalty();

	// Insertion: +1 insertion, -1 match
	uint16_t insertion_penalty = globalAlignmentSettings.get_insertion_opening_penalty() + globalAlignmentSettings.get_insertion_extension_penalty() + globalAlignmentSettings.get_match_score();

	return std::max(mismatch_penalty, std::max(insertion_penalty, deletion_penalty));
}

inline ScoreType getMaxPossibleScore( CountType cycles ) {
	return cycles * globalAlignmentSettings.get_match_score();
}

inline CountType getMinSoftclipPenalty( CountType softclip_length ) {
	return ceil( float(softclip_length) / globalAlignmentSettings.get_anchor_length() ) * getMinSingleErrorPenalty();
}

inline ScoreType getMinCycleScore( CountType cycle, CountType read_length ) {

	if ( cycle < globalAlignmentSettings.get_anchor_length() )
		return globalAlignmentSettings.get_min_as();

	ScoreType maxScore = getMaxPossibleScore(read_length);
	ScoreType minCycleScore = maxScore - ( ceil((cycle - globalAlignmentSettings.get_anchor_length()) / float(globalAlignmentSettings.get_error_rate())) * getMinSingleErrorPenalty() );
	return std::max(minCycleScore, globalAlignmentSettings.get_min_as());
}

/////////////////////////////////
////////// Other stuff //////////
/////////////////////////////////

/**
 * Copy a file while locking them in the global fileLocks.
 */
int atomic_rename( const char *oldname, const char *newname );

/**
 * Check if a cycle is a seeding cycle.
 */
inline bool isSeedingCycle(CountType cycle) {

	// Don't seed cycles smaller than the anchor length
	if ( cycle < globalAlignmentSettings.get_anchor_length() )
		return false;

	// Don't seed cycles if the maximal softclip length would be exceeded
	if ( ( cycle - globalAlignmentSettings.get_anchor_length() ) > globalAlignmentSettings.get_max_softclip_length())
		return false;

	// Create seeds when reaching the anchor length for the first time
	if ( cycle == globalAlignmentSettings.get_anchor_length() )
		return true;

	// Create seeds every seeding_interval cycle after the first anchor
	if ( ( cycle - globalAlignmentSettings.get_anchor_length() ) % globalAlignmentSettings.get_seeding_interval() == 0 )
		return true;

	return false;

}

#endif /* TOOLS_H */