File: extractSequences.pl

package info (click to toggle)
snpeff 5.4.b%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 757,496 kB
  • sloc: java: 62,572; perl: 2,279; sh: 1,185; python: 744; xml: 507; makefile: 50
file content (37 lines) | stat: -rwxr-xr-x 815 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
#!/usr/bin/perl

#-------------------------------------------------------------------------------
# Extract fasta sequences matching names
#
#
#
#																Pablo Cingolani
#-------------------------------------------------------------------------------

$debug = 0;

# Check command line arguments
die "Usage: cat file.fa | ./extractSequences.pl id_1 .... id_N\n" if( $#ARGV < 0 );

# Read sequence names from command line (arguments)
foreach $name ( @ARGV ) {
	$names{$name} = 1;
	print "names{$name}\n" if $debug;
}

# Read and parse FASTA file from STDIN
$match = 0;
while( $l=<STDIN> ) {
	if( $l =~ />(.*)/ ) {
		$name = $1;

		if( $name =~ /^(.*?)\s+/ ) {
			$name = $1;
		}

		$match = ( $names{$name} ne '' );
		print "$l\tSequence name: '$name'\tmatch: $match\n" if $debug;
	}

	print $l if $match;
}