File: joinPeptides.pl

package info (click to toggle)
augustus 3.4.0%2Bdfsg2-2
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 758,480 kB
  • sloc: cpp: 65,451; perl: 21,436; python: 3,927; ansic: 1,240; makefile: 1,032; sh: 189; javascript: 32
file content (68 lines) | stat: -rwxr-xr-x 1,427 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
#!/usr/bin/perl

# Creates a fasta file with unique peptides from two peptide fasta input files
# Deletes redundant entries. Multiplicity information is taken from the first
# of two fasta input files.

my $usage = "joinPeptides.pl in1.fa in2.fa > out.fa\n\nRead script head comments for futher documentation!\n\n";

if(@ARGV!=2){print STDERR $usage; exit -1;}

my $in1 = $ARGV[0];
my $in2 = $ARGV[1];

my %peptides = ();

my $header;
my $maxLen = 100;

open(IN, "<", $in1) or die("Could not open file $in1!\n");

while ( <IN> ) { 
	chomp;
	if($_=~m/^>/){
		$header = $_;
	}else{
		if(length($_) > $maxLen){
			print STDERR "The peptide $_ is longer $maxLen. Aborting.\n";
			exit -1;
		}
		while($_=~m/-/){ # delete weird dashes in sequences
			$_ = s/-//;
		}
		if(not(exists($locHash{$_}))){
			$peptides{$_} = "$header";			
		}
		
		
	}
}
close IN or die("Could not close file $in1!\n");

open(IN2, "<", $in2) or die("Could not open file $in2!\n");
while( <IN2>){
		chomp;
		if($_=~m/^>/){
		$header = $_;
	}else{
		if(length($_) > $maxLen){
			print STDERR "The peptide $_ is longer $maxLen. Aborting.\n";
			exit -1;
		}
		while($_=~m/-/){ # delete weird dashes in sequences
			$_ = s/-//;
		}
		if(not(exists($locHash{$_}))){
			$peptides{$_} = "$header";			
		}
		
		
	}
}
close IN2 or die("Could not close file $in2!\n");
# print unique fasta file
while ( ($k,$v) = each %peptides ) {
	print "$v\n";
	print "$k\n";
}