File: Cigar.cpp

package info (click to toggle)
trinityrnaseq 2.11.0%2Bdfsg-6
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, bullseye, sid
  • size: 417,528 kB
  • sloc: perl: 48,420; cpp: 17,749; java: 12,695; python: 3,124; sh: 1,030; ansic: 983; makefile: 688; xml: 62
file content (147 lines) | stat: -rw-r--r-- 3,269 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
#include "Cigar.hpp"
#include <string>
#include <sstream>
#include "sequenceUtil.hpp"
#include <stdlib.h>

namespace Cigar {

  vector<t_cigar> parse_cigar(string& cigar_txt) {
	
	vector<t_cigar> cigar_eles;
	
	string len_text;
	
	for (unsigned int i = 0; i < cigar_txt.length(); i++) {
	  char c = cigar_txt[i];
	  if (c >= '0' && c <= '9') {
		len_text = len_text + c;
	  }
	  else {
		t_cigar ele;
		ele.len = atol(len_text.c_str());
		ele.code = c;
		cigar_eles.push_back(ele);
		len_text = "";
	  }
	}
    
	return(cigar_eles);
	
  }



  string construct_cigar(vector<alignment_segment>& segments, unsigned int read_length, 
						 const string& genome_seq, char strand) {

	stringstream cigar_text;

	bool check_splice = (genome_seq != "" && strand);
	
	for (unsigned int i = 0; i < segments.size(); i++) {
	  
	  alignment_segment seg = segments[i];
	  
	  unsigned long curr_genome_lend = seg.genome_lend;
	  unsigned long curr_genome_rend = seg.genome_rend;
	  unsigned long curr_query_lend = seg.query_lend;
	  unsigned long curr_query_rend = seg.query_rend;
	  
	  if (i == 0) {

		if (curr_query_lend > 1) {
		  
		  cigar_text << curr_query_lend - 1 << "S";
		}
	  }
	  else {

		alignment_segment prev_seg = segments[i-1];
		
		unsigned long prev_genome_rend = prev_seg.genome_rend;
		unsigned long prev_query_rend = prev_seg.query_rend;

		unsigned long delta_genome = curr_genome_lend - prev_genome_rend;
		if (delta_genome > 1) {
		  char deletion_intron_char = (check_splice) 
			? check_intron_consensus(prev_genome_rend, curr_genome_lend, genome_seq, strand)
			: 'D';

		  cigar_text << delta_genome - 1 << deletion_intron_char;
		}

		unsigned long delta_query = curr_query_lend - prev_query_rend;
		if (delta_query > 1) {
		  cigar_text << delta_query - 1 << 'I';
		}
	  }
		
	  unsigned long len = curr_genome_rend - curr_genome_lend + 1;
		
	  cigar_text << len << 'M';
	  
	  if (i == segments.size() - 1) {
		
		if (curr_query_rend < read_length) {
		  cigar_text << read_length - curr_query_rend << 'S';
		}
	  }
	  
	}
	
	return(cigar_text.str());
	
  }


  char check_intron_consensus(unsigned long prev_exon_rend, unsigned long next_exon_lend, 
							  const string& genome_seq, char strand) {

	
	unsigned int genome_seq_length = genome_seq.length();
	if (prev_exon_rend > genome_seq_length || next_exon_lend > genome_seq_length) {
	  stringstream errmsg;
	  errmsg << "Error, coordinates: " << prev_exon_rend << " and " << next_exon_lend 
			 << " are not entirely within genome sequence length: " << genome_seq_length;

	  throw(errmsg.str());
	}
	
	
	string left_dinuc;
	left_dinuc += genome_seq[prev_exon_rend];
	left_dinuc += genome_seq[prev_exon_rend + 1];
	
	string right_dinuc;
	right_dinuc += genome_seq[next_exon_lend -3];
	right_dinuc += genome_seq[next_exon_lend-2];
	
	if (strand == '-') {
	  string left_dinuc_copy = left_dinuc;
	  string right_dinuc_copy = right_dinuc;
	  
	  left_dinuc = revcomp(right_dinuc_copy);
	  right_dinuc = revcomp(left_dinuc_copy);
	}

	if (
		( (left_dinuc == "GT" || left_dinuc == "GC") && right_dinuc == "AG")
		||
		(left_dinuc == "CT" && right_dinuc == "AC")
		) {
	  
	  return('N'); // has proper splice boundaries.
	}
	else {
	  return('D');
	}

  }
  
  


}  // end of namespace