File: qual.rb

package info (click to toggle)
ruby-bio 1.5.0-2
  • links: PTS, VCS
  • area: main
  • in suites: stretch
  • size: 7,480 kB
  • ctags: 9,428
  • sloc: ruby: 74,117; xml: 3,383; makefile: 17; perl: 13; sh: 1
file content (126 lines) | stat: -rw-r--r-- 3,455 bytes parent folder | download | duplicates (8)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
#
# = bio/db/fasta/qual.rb - Qual format, FASTA formatted numeric entry
#
# Copyright::  Copyright (C) 2001, 2002, 2009
#              Naohisa Goto <ng@bioruby.org>,
#              Toshiaki Katayama <k@bioruby.org>
# License::    The Ruby License
#
# $Id:$
# 
# == Description
# 
# QUAL format, FASTA formatted numeric entry.
#
# == Examples
#
# See documents of Bio::FastaNumericFormat class.
#
# == References
#
# * FASTA format (WikiPedia)
#   http://en.wikipedia.org/wiki/FASTA_format
#
# * Phred quality score (WikiPedia)
#   http://en.wikipedia.org/wiki/Phred_quality_score
#   
# * Fasta format description (NCBI)
#   http://www.ncbi.nlm.nih.gov/BLAST/fasta.shtml
#

require 'bio/db/fasta'

module Bio

  # Treats a FASTA formatted numerical entry, such as:
  # 
  #   >id and/or some comments                    <== comment line
  #   24 15 23 29 20 13 20 21 21 23 22 25 13      <== numerical data
  #   22 17 15 25 27 32 26 32 29 29 25
  # 
  # The precedent '>' can be omitted and the trailing '>' will be removed
  # automatically.
  #
  # --- Bio::FastaNumericFormat.new(entry)
  # 
  # Stores the comment and the list of the numerical data.
  # 
  # --- Bio::FastaNumericFormat#definition
  #
  # The comment line of the FASTA formatted data.
  #
  # * FASTA format (Wikipedia)
  #   http://en.wikipedia.org/wiki/FASTA_format
  #
  # * Phred quality score (WikiPedia)
  #   http://en.wikipedia.org/wiki/Phred_quality_score
  #   
  class FastaNumericFormat < FastaFormat

    # Returns the list of the numerical data (typically the quality score
    # of its corresponding sequence) as an Array.
    # ---
    # *Returns*:: (Array containing Integer) numbers
    def data
      unless defined?(@list)
        @list = @data.strip.split(/\s+/).map {|x| x.to_i}
      end
      @list
    end

    # Returns the number of elements in the numerical data,
    # which will be the same of its corresponding sequence length.
    # ---
    # *Returns*:: (Integer) the number of elements
    def length
      data.length
    end

    # Yields on each elements of the numerical data.
    # ---
    # *Yields*:: (Integer) a numerical data element
    # *Returns*:: (undefined)
    def each
      data.each do |x|
        yield x
      end
    end

    # Returns the n-th element. If out of range, returns nil.
    # ---
    # *Arguments*:
    # * (required) _n_: (Integer) position
    # *Returns*:: (Integer or nil) the value
    def [](n)
      data[n]
    end

    # Returns the data as a Bio::Sequence object.
    # In the returned sequence object, the length of the sequence is zero,
    # and the numeric data is stored to the Bio::Sequence#quality_scores
    # attirbute.
    #
    # Because the meaning of the numeric data is unclear,
    # Bio::Sequence#quality_score_type is not set by default.
    #
    # Note: If you modify the returned Bio::Sequence object,
    # the sequence or definition in this FastaNumericFormat object
    # might also be changed (but not always be changed)
    # because of efficiency.
    # 
    # ---
    # *Arguments*:
    # *Returns*:: (Bio::Sequence) sequence object
    def to_biosequence
      s = Bio::Sequence.adapter(self,
                                Bio::Sequence::Adapter::FastaNumericFormat)
      s.seq = Bio::Sequence::Generic.new('')
      s
    end
    alias to_seq to_biosequence

    undef query, blast, fasta, seq, naseq, nalen, aaseq, aalen

  end #class FastaNumericFormat

end #module Bio