File: mproject.pl

package info (click to toggle)
lagan 2.0-3
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 1,360 kB
  • sloc: ansic: 8,542; perl: 7,732; cpp: 3,260; makefile: 85
file content (90 lines) | stat: -rwxr-xr-x 1,319 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
#!/usr/bin/perl

if (@ARGV < 2) {
    print ("usage:\n mproject.pl filename seqname1 [seqname2 ... ]\n");
    exit(1);
}

$filename = $ARGV[0];

$i = 1;
while ($i < @ARGV) {
    @targets[$i-1] = $ARGV[$i];
    $i++;
}

open(FASTAFILE, "$filename") || die "Could not open $filename.\n\n";

$line = <FASTAFILE>;
chomp $line;

$i=0;
%list=();
@seqs=(());

if (substr($line, 0, 1) eq ">") {
    $_ = substr($line, 1);
    /\w+/g;
    @keys[$i] = $&;
    $list{@keys[$i]}=$i;
} else {
    print ("$filename is NOT a Multi-FASTA file...\n");
    exit(1);
}

while ($line = <FASTAFILE>) {
    chomp $line;
    if (substr($line, 0, 1) eq ">") {
	$i++;
	$_ = substr($line,1);
	/\w+/g;
	@keys[$i] = $&;
	$list{@keys[$i]}=$i;
	push @seqs, ();
    } else {
	push @{$seqs[$i]}, "$line";
    }
}

$i=0;
for $row (@seqs) {
    @strs[$i++] = join "", @$row;
}

$seqlen = length $strs[0];
# $seqlen--;

for ($i=0; $i<$seqlen; $i++) {
    @isgap[$i] = 1;
    foreach $s (@targets) {
	if (substr(@strs[$list{$s}], $i, 1) ne "-") {
	    @isgap[$i] = 0;
	    break;
	}
    }
}

foreach $s (@targets) {
    print ">@keys[$list{$s}]\n";
    $j=0;
    for ($i=0; $i<$seqlen; $i++) {
	if(!@isgap[$i]) {
	    print substr(@strs[$list{$s}], $i, 1);
	    $j++;
	    if (($j % 60) == 0) {
		print "\n";
	    }
	}
    }
    print "\n";
}