File: filterCirc.awk

package info (click to toggle)
rna-star 2.7.8a%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 3,076 kB
  • sloc: cpp: 20,429; awk: 483; ansic: 470; makefile: 181; sh: 31
file content (27 lines) | stat: -rw-r--r-- 866 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
function cigarGenomicDist(cig)
{
        n=split(cig,L,/[A-Z]/)-1;
        split(cig,C,/[0-9]*/);
        g=0;
        for (ii=1;ii<=n;ii++) {//scan through CIGAR operations
                if (C[ii+1]!="S" && C[ii+1]!="I") { 
                        g+=L[ii];
                };
        };
        return g;
};
BEGIN {
        endTol=5;
        OFS="\t";
};
{
if ( $7>=0 && $1==$4 && $3==$6 && (($3=="-" && $5>$2 && $5-$2<1000000) || ($3=="+" && $2>$5 && $2-$5<1000000)) )
{
	#print $1,$2,$5,$3,$7,$8,$9;
	#print $11,$11+cigarGenomicDist($12),$13,$13+cigarGenomicDist($14);
        if ( ($3=="+" && $11+endTol>$5 && $13+cigarGenomicDist($14)-endTol<=$2) \
          || ($3=="-" && $13+endTol>$2 && $11+cigarGenomicDist($12)-endTol<=$5) ) {  
               print $1,($3=="+"?$5:$2),($3=="+"?$2:$5),($3=="+"?"-":"+"),($7==0?0:3-$7),$8,$9,(NF>=15 ? $15:1);
	};
};
};