1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68
|
#!/usr/bin/perl
# Creates a fasta file with unique peptides from two peptide fasta input files
# Deletes redundant entries. Multiplicity information is taken from the first
# of two fasta input files.
my $usage = "joinPeptides.pl in1.fa in2.fa > out.fa\n\nRead script head comments for futher documentation!\n\n";
if(@ARGV!=2){print STDERR $usage; exit -1;}
my $in1 = $ARGV[0];
my $in2 = $ARGV[1];
my %peptides = ();
my $header;
my $maxLen = 100;
open(IN, "<", $in1) or die("Could not open file $in1!\n");
while ( <IN> ) {
chomp;
if($_=~m/^>/){
$header = $_;
}else{
if(length($_) > $maxLen){
print STDERR "The peptide $_ is longer $maxLen. Aborting.\n";
exit -1;
}
while($_=~m/-/){ # delete weird dashes in sequences
$_ = s/-//;
}
if(not(exists($locHash{$_}))){
$peptides{$_} = "$header";
}
}
}
close IN or die("Could not close file $in1!\n");
open(IN2, "<", $in2) or die("Could not open file $in2!\n");
while( <IN2>){
chomp;
if($_=~m/^>/){
$header = $_;
}else{
if(length($_) > $maxLen){
print STDERR "The peptide $_ is longer $maxLen. Aborting.\n";
exit -1;
}
while($_=~m/-/){ # delete weird dashes in sequences
$_ = s/-//;
}
if(not(exists($locHash{$_}))){
$peptides{$_} = "$header";
}
}
}
close IN2 or die("Could not close file $in2!\n");
# print unique fasta file
while ( ($k,$v) = each %peptides ) {
print "$v\n";
print "$k\n";
}
|