1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143
|
# lightweight fasta reader capabilities:
package Fasta_reader;
use strict;
sub new {
my ($packagename, $fastaFile) = @_;
## note: fastaFile can be a filename or an IO::Handle
my $self = { fastaFile => undef,,
fileHandle => undef };
bless ($self, $packagename);
## create filehandle
my $filehandle = undef;
if (ref $fastaFile eq 'IO::Handle') {
$filehandle = $fastaFile;
}
else {
open ($filehandle, $fastaFile) or die "Error: Couldn't open $fastaFile\n";
$self->{fastaFile} = $fastaFile;
}
$self->{fileHandle} = $filehandle;
return ($self);
}
#### next() fetches next Sequence object.
sub next {
my $self = shift;
my $orig_record_sep = $/;
$/="\n>";
my $filehandle = $self->{fileHandle};
my $next_text_input = <$filehandle>;
my $seqobj = undef;
if ($next_text_input) {
$next_text_input =~ s/^>|>$//g; #remove trailing > char.
$next_text_input =~ tr/\t\n\000-\037\177-\377/\t\n/d; #remove cntrl chars
my ($header, @seqlines) = split (/\n/, $next_text_input);
my $sequence = join ("", @seqlines);
$sequence =~ s/\s//g;
$seqobj = Sequence->new($header, $sequence);
}
$/ = $orig_record_sep; #reset the record separator to original setting.
return ($seqobj); #returns null if not instantiated.
}
#### finish() closes the open filehandle to the query database.
sub finish {
my $self = shift;
my $filehandle = $self->{fileHandle};
close $filehandle;
$self->{fileHandle} = undef;
}
##############################################
package Sequence;
use strict;
sub new {
my ($packagename, $header, $sequence) = @_;
## extract an accession from the header:
my ($acc, $rest) = split (/\s+/, $header, 2);
my $self = { accession => $acc,
header => $header,
sequence => $sequence,
filename => undef };
bless ($self, $packagename);
return ($self);
}
####
sub get_accession {
my $self = shift;
return ($self->{accession});
}
####
sub get_header {
my $self = shift;
return ($self->{header});
}
####
sub get_sequence {
my $self = shift;
return ($self->{sequence});
}
####
sub get_FASTA_format {
my $self = shift;
my $header = $self->get_header();
my $sequence = $self->get_sequence();
$sequence =~ s/(\S{60})/$1\n/g;
my $fasta_entry = ">$header\n$sequence\n";
return ($fasta_entry);
}
####
sub write_fasta_file {
my $self = shift;
my $filename = shift;
my ($accession, $header, $sequence) = ($self->{accession}, $self->{header}, $self->{sequence});
$sequence =~ s/(\S{60})/$1\n/g;
my $tempfile;
if ($filename) {
$tempfile = $filename;
} else {
my $acc = $accession;
$acc =~ s/\W/_/g;
$tempfile = "$acc.fasta";
}
open (TMP, ">$tempfile") or die "ERROR! Couldn't write a temporary file in current directory.\n";
print TMP ">$header\n$sequence";
close TMP;
return ($tempfile);
}
1; #EOM
|