File: filterGenesIn_mRNAname.pl

package info (click to toggle)
augustus 3.2.3%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: stretch
  • size: 289,676 kB
  • sloc: cpp: 48,711; perl: 13,339; ansic: 1,251; makefile: 859; sh: 58
file content (100 lines) | stat: -rwxr-xr-x 2,188 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
#!/usr/bin/perl

#############################################################
# filterGenes
# filter genes from a genbank flat file database
# usage: fileterGenesIn_mRNAname.pl namefile dbfile
#
#
# Mario Stanke, Simone Lange, Katharina Hoff; 21.12.2015
#############################################################

use strict;
use warnings;

if ($#ARGV != 1) {
    print "usage:filterGenes namefile dbfile\n";
    print "names of the loci to be kept come from\n";
    print "the first parameter. Only the the first of identical loci is kept\n";
    exit;
} 
my $origfilename = $ARGV[1];
my $goodfilename = $ARGV[0];

my %goodids;
open(GOODFILE, "<", "$goodfilename") || die "Couldn't open goodfile $goodfilename\n";
while(<GOODFILE>) {
            if($_ =~ m/transcript_id \"(.*)\"/) {                                                         
                $goodids{$1} = 1;                                                                         
            }                                                                                             
}
close(GOODFILE) || die "Couldn't close goodfile $goodfilename!\n";

open(my $ORIGFILE, "$origfilename") || die "Couldn't open dbfile\n";
my @data = <$ORIGFILE>;
close($ORIGFILE);


$/="\n//\n";

my $head;
my $mRNAflag = 0;
my $cdsFlag = 0;
my $genename;
my $printFlag = 0;
my $firstPrintFlag = 0;

foreach(@data) {
    if($_=~m/^LOCUS/){
	$head = "";
	$printFlag = 0;
	$genename = "";
	$head = $head.$_;
    }
    if($_=~m/FEATURES/){
	$head = $head.$_;
    }
    if($_=~m/source/){
	$head = $head.$_;
    }
    if($mRNAflag==1 and not($_=~m/CDS/)){
	$head = $head.$_;
    }
    if($_=~m/mRNA/){
	$mRNAflag = 1;
	$head = $head.$_;
    }
    if($cdsFlag==1){
        if($_=~m/gene="/){
		my @tmp = split(/\"/);
		$genename = $tmp[1];
		$cdsFlag = 0;
		$firstPrintFlag = 1;
        }else{
		$head = $head.$_;
        }
    }
    if($_=~m/CDS/){
	$mRNAflag = 0;
        $head = $head.$_;
	$cdsFlag = 1;
    }
    if($firstPrintFlag==1 and length($head)>=2){
	if($goodids{$genename}){
		print $head;
		$head = "";
		$printFlag = 1;
        }
	$firstPrintFlag = 0;
    }
    if($printFlag==1){
	print $_;
    }
}