File: get_sequences.pl

package info (click to toggle)
sift 6.2.1-2
  • links: PTS, VCS
  • area: non-free
  • in suites: sid
  • size: 4,784 kB
  • sloc: ansic: 18,272; perl: 219; csh: 164; makefile: 152
file content (78 lines) | stat: -rw-r--r-- 1,689 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
#! /opt/local/bin/perl5 -w

#####################################################################
#
# Filename: get_sequences.pl - get specific sequences from a database
#
# Input:
#	File containing names of sequences
#	Name of a sequence
# Action:
#	Retrieves the sequences from the sequence database
# Output:
#	File called $seqName.seqs
#
# By: Bob Chan, bobchan@howard.fhcrc.org
# Last modified: 12/13/00
#
#####################################################################

## Constants --------------------------------------------------------

my %seq_names_hash;

## Main

# Handle command-line arguments
unless (scalar(@ARGV) == 2) {
  die "Usage: $0 seq_names_file seq_db_file\n";
}

my $seq_names_filename = shift;
my $seq_db_filename = shift;

open(SEQS, "$seq_names_filename") ||
  die "Error: Could not open file $seq_names_filename for reading!\n";

while (<SEQS>) {
  chomp;
  my $line = $_;
  if ($line =~ m/(\S+)/) {
    $seq_names_hash{$1} = 1;
  }
}

close(SEQS);

# Read sequence database file
my $line = "empty";
my @seqs;

open(SEQDB, "$seq_db_filename") ||
  die "Error: Could not open file $seq_db_filename\n";

# Extract the sequences
while (defined($line)) {
  if ($line =~ m/^>\s*([\w_]+)(\|\S+)?\s/) {
    if (exists($seq_names_hash{$1})) {
      push(@seqs, $line);
      while (defined($line = <SEQDB>) && $line !~ m/^>/) {
        push(@seqs, $line);
      }
    } else {
      $line = <SEQDB>;
    }
  } else {
    $line = <SEQDB>;
  }
}

close(SEQDB);

  my $seqOutFile = $seq_names_filename . ".seqs";
  open(OUTFILE, ">$seqOutFile") ||
    die "Error: Could not open file $seqOutFile for writing\n";
  print OUTFILE @seqs;
  close(OUTFILE);

exit(0);