File: Fasta_reader.pm

package info (click to toggle)
microbiomeutil 20101212%2Bdfsg1-1
  • links: PTS, VCS
  • area: main
  • in suites: stretch
  • size: 49,268 kB
  • ctags: 353
  • sloc: perl: 4,878; ansic: 419; makefile: 94; sh: 27
file content (143 lines) | stat: -rwxr-xr-x 2,980 bytes parent folder | download | duplicates (15)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
# lightweight fasta reader capabilities:
package Fasta_reader;

use strict;

sub new {
    my ($packagename, $fastaFile) = @_;

	## note: fastaFile can be a filename or an IO::Handle
	

    my $self = { fastaFile => undef,,
				 fileHandle => undef };

    bless ($self, $packagename);
    
    ## create filehandle
    my $filehandle = undef;
    
	if (ref $fastaFile eq 'IO::Handle') {
		$filehandle = $fastaFile;
	}
	else {
		
		open ($filehandle, $fastaFile) or die "Error: Couldn't open $fastaFile\n";
		$self->{fastaFile} = $fastaFile;
	}
	
	$self->{fileHandle} = $filehandle;

    return ($self);
}



#### next() fetches next Sequence object.
sub next {
    my $self = shift;
    my $orig_record_sep = $/;
    $/="\n>";
    my $filehandle = $self->{fileHandle};
    my $next_text_input = <$filehandle>;
    my $seqobj = undef;
    if ($next_text_input) {
	$next_text_input =~ s/^>|>$//g; #remove trailing > char.
	$next_text_input =~ tr/\t\n\000-\037\177-\377/\t\n/d; #remove cntrl chars
	my ($header, @seqlines) = split (/\n/, $next_text_input);
	my $sequence = join ("", @seqlines);
	$sequence =~ s/\s//g;
		
	$seqobj = Sequence->new($header, $sequence);
    }
    
    $/ = $orig_record_sep; #reset the record separator to original setting.
    
    return ($seqobj); #returns null if not instantiated.
}


#### finish() closes the open filehandle to the query database.
sub finish {
    my $self = shift;
    my $filehandle = $self->{fileHandle};
    close $filehandle;
    $self->{fileHandle} = undef;
}




##############################################
package Sequence;
use strict;

sub new {
    my ($packagename, $header, $sequence) = @_;
    
    ## extract an accession from the header:
    my ($acc, $rest) = split (/\s+/, $header, 2);
        
    my $self = { accession => $acc,
		 header => $header,
		 sequence => $sequence,
		 filename => undef };
    bless ($self, $packagename);
    return ($self);
}

####
sub get_accession {
    my $self = shift;
    return ($self->{accession});
}

####
sub get_header {
    my $self = shift;
    return ($self->{header});
}

####
sub get_sequence {
    my $self = shift;
    return ($self->{sequence});
}

#### 
sub get_FASTA_format {
    my $self = shift;
    my $header = $self->get_header();
    my $sequence = $self->get_sequence();
    $sequence =~ s/(\S{60})/$1\n/g;
    my $fasta_entry = ">$header\n$sequence\n";
    return ($fasta_entry);
}


####
sub write_fasta_file {
    my $self = shift;
    my $filename = shift;

    my ($accession, $header, $sequence) = ($self->{accession}, $self->{header}, $self->{sequence});
    $sequence =~ s/(\S{60})/$1\n/g;
   
    my $tempfile;
    if ($filename) {
	$tempfile = $filename;
    } else {
	my $acc = $accession;
	$acc =~ s/\W/_/g;
	$tempfile = "$acc.fasta";
    }
    
    open (TMP, ">$tempfile") or die "ERROR! Couldn't write a temporary file in current directory.\n";
    print TMP ">$header\n$sequence";
    close TMP;
    return ($tempfile);
}

1; #EOM