File: fasta.rb

package info (click to toggle)
genometools 1.6.1%2Bds-3
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 50,412 kB
  • sloc: ansic: 271,241; ruby: 30,339; python: 4,880; sh: 3,193; makefile: 1,194; perl: 219; pascal: 159; haskell: 37; sed: 5
file content (112 lines) | stat: -rw-r--r-- 2,375 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
module Fasta

require 'zlib'
# require 'codon2aa.rb'

class SequenceEntry
  def initialize(headline)
    h = headline.match(/^>(.*)\n/)
    if h
      @header = h[1]
    else
      STDERR.puts "#{$0}: illegal header #{header}"
      exit 1
    end
    @comment = Array.new()
    @seqoflines = Array.new()
  end
  def add_comment_line(line)
    @comment.push(line)
  end
  def add_sequence_line(line)
    @seqoflines.push(line.chomp.gsub(/\s/,""))
  end
  def set_header(s)
    @header = s
  end
  def write(rc, io, seq_line_width = 70)
    io.print ">#{@header}"
    @comment.each do |c|
      io.print c
    end
    s = @seqoflines.join("")
    if rc
      s = s.reverse
      s.tr!("ACGTacgt","TGCAtgca")
    end
    b = 0
    while b < s.size
      io.puts(s[b..b + seq_line_width - 1])
      b += seq_line_width
    end
  end
  def get_seqlength()
    lensum = 0
    @seqoflines.each do |line|
      lensum += line.length
    end
    return lensum
  end
  def get_sequence()
    return @seqoflines.join("")
  end
  def get_header()
    return @header
  end
end

=begin
  def write_open_reading_frame( offset, io, reverse=false )
    s = @seqoflines.join("")
    if reverse
      s = s.reverse
    end
    aaSequence = ""
    position = offset
    while position+3 < s.length    # enough bases for a codon
      begin
        codon = s[ position, 3 ]
        aa = codon2aa_11( codon )
        aaSequence += aa
      rescue => text
        io.puts text
        return
      end
      position += 3
    end
    puts aaSequence
  end
=end

def Fasta.read_multi_file(fname)  # function for module Fasta
  begin
    if fname.match(/\.gz$/)
      infp = Zlib::GzipReader.open(fname)
    else
      infp = File.open(fname,"r")
    end
  rescue => err
    STDERR.print "Could not open file \"#{fname}\": #{err}\n"
    exit 1
  end
  curr_entry = nil
  infp.each_line do |line|
    if line.match(/^>/)
      if not curr_entry.nil?
        yield curr_entry  # deliver current entry to iterator
      end
      curr_entry = SequenceEntry.new(line)  # create new sequence entry
    elsif line.match(/^;/)  # comment line
      curr_entry.add_comment_line(line)
    else
      curr_entry.add_sequence_line(line)
    end
  end
  if curr_entry.nil?
    STDERR.puts "#{$0}: assertion in f. #{__FILE__}, l. #{__LINE__} failed"
  end
  yield curr_entry
  infp.close
end

end # module Fasta