1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93
|
#!/usr/bin/perl
# defaults
# constants
# usage notes
if (@ARGV < 1) {
print ("usage:\n mf2bin.pl inputfile [-out outputfile] \n");
exit(1);
}
# parse parameters
$tofile = 0;
for ($i=1; $i<@ARGV; $i++) {
if ($ARGV[$i] eq "-out") {
$tofile = 1;
$outfilename = $ARGV[++$i];
}
}
if ($tofile) {
open(OUTFILE, ">$outfilename");
}
# read in Multi-FASTA file
$infilename = $ARGV[0];
open(FASTAFILE, "$infilename") || die "Could not open $infilename.\n\n";
$line = <FASTAFILE>;
chomp $line;
$i=0;
%list=();
@seqs=(());
if (substr($line, 0, 1) eq ">") {
$_ = substr($line, 1);
/\w+/g;
@keys[$i] = $&;
$list{@keys[$i]}=$i;
} else {
print ("$filename is NOT a Multi-FASTA file...\n");
exit(1);
}
while ($line = <FASTAFILE>) {
chomp $line;
if (substr($line, 0, 1) eq ">") {
$i++;
$_ = substr($line, 1);
/\w+/g;
@keys[$i] = $&;
$list{@keys[$i]}=$i;
push @seqs, ();
} else {
push @{$seqs[$i]}, "$line";
}
}
$i=0;
for $row (@seqs) {
@strs[$i++] = join "", @$row;
}
if (@keys != 2) {
print ("mpack needs two FASTA sequences\n");
exit(1);
}
# pack bin
# format from Alex Poliakov's glass2bin.pl script
%base_code = ('-' => 0, 'A' => 1, 'C' => 2, 'T' => 3, 'G' => 4, 'N' => 5,
'a' => 1, 'c' => 2, 't' => 3, 'g' => 4, 'n' => 5);
$l = length @strs[0]; # $l--;
$s1 = reverse(@strs[0]);
$s2 = reverse(@strs[1]);
for ($i=0; $i<$l; $i++) {
if ($tofile) {
print OUTFILE pack("H2",
$base_code{chop($s1)} . $base_code{chop($s2)});
} else {
print pack("H2",
$base_code{chop($s1)} . $base_code{chop($s2)});
}
}
|