File: simpleFastaHeaders.pl

package info (click to toggle)
augustus 3.4.0%2Bdfsg2-2
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 758,480 kB
  • sloc: cpp: 65,451; perl: 21,436; python: 3,927; ansic: 1,240; makefile: 1,032; sh: 189; javascript: 32
file content (37 lines) | stat: -rwxr-xr-x 1,215 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
#!/usr/bin/perl

# Katharina J. Hoff, May 5th 2011
#
# PASA sometimes has problems with weird fasta headers.
# This script converts the headers of a fasta file to "simple headers" without any special characters or spaces

my $USAGE="simpleFastaHeaders.pl in.fa prefix out.fa mapping.txt\n\n in.fa - the file to be reformatted\n prefix - the prefix of every new header, e.g. contig\n out.fa - the reformatted fasta file\n mapping.txt - a tab-separated mapping table (newName \t oldName)\n\n";

if(@ARGV!=4){print STDERR $USAGE; exit -1}

my $inFile = $ARGV[0];
my $prefix = $ARGV[1];
my $outFa = $ARGV[2];
my $mapping = $ARGV[3];
my $preC = 1; # counter for prefix

open(INFILE, "<", $inFile) or die("Could not open file $inFile!\n");
open(OUTFA, ">", $outFa) or die ("Could not open file $outFa!\n");
open(MAP, ">", $mapping) or die ("Could not open file $mapping!\n");

while(<INFILE>){
    if($_=~m/^>/){
	chomp;
	print MAP ">".$prefix.$preC."\t$_\n";
	print OUTFA ">".$prefix.$preC."\n";
	$preC = $preC+1;
    }else{
	print OUTFA $_;
    }
}



close(INFILE) or die("Could not close file $inFile!\n");
close(OUTFA) or die("Could not close file $outFa!\n");
close(MAP) or die("Could not close file $mapping!\n");