1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78
|
#! /opt/local/bin/perl5 -w
#####################################################################
#
# Filename: get_sequences.pl - get specific sequences from a database
#
# Input:
# File containing names of sequences
# Name of a sequence
# Action:
# Retrieves the sequences from the sequence database
# Output:
# File called $seqName.seqs
#
# By: Bob Chan, bobchan@howard.fhcrc.org
# Last modified: 12/13/00
#
#####################################################################
## Constants --------------------------------------------------------
my %seq_names_hash;
## Main
# Handle command-line arguments
unless (scalar(@ARGV) == 2) {
die "Usage: $0 seq_names_file seq_db_file\n";
}
my $seq_names_filename = shift;
my $seq_db_filename = shift;
open(SEQS, "$seq_names_filename") ||
die "Error: Could not open file $seq_names_filename for reading!\n";
while (<SEQS>) {
chomp;
my $line = $_;
if ($line =~ m/(\S+)/) {
$seq_names_hash{$1} = 1;
}
}
close(SEQS);
# Read sequence database file
my $line = "empty";
my @seqs;
open(SEQDB, "$seq_db_filename") ||
die "Error: Could not open file $seq_db_filename\n";
# Extract the sequences
while (defined($line)) {
if ($line =~ m/^>\s*([\w_]+)(\|\S+)?\s/) {
if (exists($seq_names_hash{$1})) {
push(@seqs, $line);
while (defined($line = <SEQDB>) && $line !~ m/^>/) {
push(@seqs, $line);
}
} else {
$line = <SEQDB>;
}
} else {
$line = <SEQDB>;
}
}
close(SEQDB);
my $seqOutFile = $seq_names_filename . ".seqs";
open(OUTFILE, ">$seqOutFile") ||
die "Error: Could not open file $seqOutFile for writing\n";
print OUTFILE @seqs;
close(OUTFILE);
exit(0);
|