File: transcriptTypes.awk

package info (click to toggle)
rna-star 2.7.8a%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 3,076 kB
  • sloc: cpp: 20,429; awk: 483; ansic: 470; makefile: 181; sh: 31
file content (36 lines) | stat: -rw-r--r-- 666 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
# requires "trTypes.txt" - a file with transcript types
# e.g. for Gencode GTF
# awk '$3=="transcript" {a=$0; gsub(/.*transcript_id "/,"",a);gsub(/".*/,"",a);;b=$0; gsub(/.*gene_type "/,"",b);gsub(/".*/,"",b); print a,b}' Gencode.gtf > trTypes.txt

BEGIN {
  while (getline < "trTypes.txt") {
    tT[$1]=$2;
  };
  OFS="\t";
  rt[1]=0; #declare array
  delete rt;
} 

{
  if ($1!=r) {
    if (length(rt)==1) {#only if read overlaps one trType
      for (tt in rt) {
        if (tt=="") print r;
        nT[tt]++;
      };
    };
    delete rt;
    r=$1;
  };

  if ($3 in tT) {    
    rt[tT[$3]]=1;
  };
};
 
END {
  for (tt in nT) {
    print tt, nT[tt];
  };
};