File: format_fasta.rb

package info (click to toggle)
ruby-bio 1.5.0-2
  • links: PTS, VCS
  • area: main
  • in suites: stretch
  • size: 7,480 kB
  • ctags: 9,428
  • sloc: ruby: 74,117; xml: 3,383; makefile: 17; perl: 13; sh: 1
file content (93 lines) | stat: -rw-r--r-- 2,879 bytes parent folder | download | duplicates (7)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
#
# = bio/db/fasta/format_fasta.rb - Fasta format generater
#
# Copyright::   Copyright (C) 2006-2008
#               Toshiaki Katayama <k@bioruby.org>,
#               Naohisa Goto <ng@bioruby.org>,
#               Jan Aerts <jan.aerts@bbsrc.ac.uk>
# License::    The Ruby License
#

module Bio::Sequence::Format::Formatter

  # INTERNAL USE ONLY, YOU SHOULD NOT USE THIS CLASS.
  # Simple Fasta format output class for Bio::Sequence.
  class Fasta < Bio::Sequence::Format::FormatterBase

    # INTERNAL USE ONLY, YOU SHOULD NOT CALL THIS METHOD.
    #
    # Creates a new Fasta format generater object from the sequence.
    #
    # ---
    # *Arguments*:
    # * _sequence_: Bio::Sequence object
    # * (optional) :header => _header_: String (default nil)
    # * (optional) :width => _width_: Fixnum (default 70)
    def initialize; end if false # dummy for RDoc

    # INTERNAL USE ONLY, YOU SHOULD NOT CALL THIS METHOD.
    #
    # Output the FASTA format string of the sequence.  
    #
    # Currently, this method is used in Bio::Sequence#output like so,
    #
    #   s = Bio::Sequence.new('atgc')
    #   puts s.output(:fasta)                   #=> "> \natgc\n"
    # ---
    # *Returns*:: String object
    def output
      header = @options[:header]
      width = @options.has_key?(:width) ? @options[:width] : 70
      seq = @sequence.seq
      entry_id = @sequence.entry_id || 
        "#{@sequence.primary_accession}.#{@sequence.sequence_version}"
      definition = @sequence.definition
      header ||= "#{entry_id} #{definition}"

      ">#{header}\n" +
        if width
          seq.to_s.gsub(Regexp.new(".{1,#{width}}"), "\\0\n")
        else
          seq.to_s + "\n"
        end
    end
  end #class Fasta

  # INTERNAL USE ONLY, YOU SHOULD NOT USE THIS CLASS.
  # NCBI-Style Fasta format output class for Bio::Sequence.
  # (like "ncbi" format in EMBOSS)
  #
  # Note that this class is under construction.
  class Fasta_ncbi < Bio::Sequence::Format::FormatterBase

    # INTERNAL USE ONLY, YOU SHOULD NOT CALL THIS METHOD.
    #
    # Output the FASTA format string of the sequence.  
    #
    # Currently, this method is used in Bio::Sequence#output like so,
    #
    #   s = Bio::Sequence.new('atgc')
    #   puts s.output(:ncbi)                   #=> "> \natgc\n"
    # ---
    # *Returns*:: String object
    def output
      width = 70
      seq = @sequence.seq
      #gi = @sequence.gi_number
      dbname = 'lcl'
      if @sequence.primary_accession.to_s.empty? then
        idstr = @sequence.entry_id
      else
        idstr = "#{@sequence.primary_accession}.#{@sequence.sequence_version}"
      end

      definition = @sequence.definition
      header = "#{dbname}|#{idstr} #{definition}"

      ">#{header}\n" + seq.to_s.gsub(Regexp.new(".{1,#{width}}"), "\\0\n")
    end
  end #class Ncbi

end #module Bio::Sequence::Format::Formatter