File: quality_score.rb

package info (click to toggle)
ruby-bio 2.0.6-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 7,108 kB
  • sloc: ruby: 68,331; perl: 13; makefile: 11; sh: 1
file content (207 lines) | stat: -rw-r--r-- 5,977 bytes parent folder | download | duplicates (7)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
#
# = bio/sequence/quality_score.rb - Sequence quality score manipulation modules
#
# Copyright::  Copyright (C) 2009
#              Naohisa Goto <ng@bioruby.org>
# License::    The Ruby License
#
# == Description
# 
# Sequence quality score manipulation modules, mainly used by Bio::Fastq
# and related classes.
#
# == References
#
# * FASTQ format specification
#   http://maq.sourceforge.net/fastq.shtml
#

module Bio

require 'bio/sequence' unless const_defined?(:Sequence)

class Sequence

  # Bio::Sequence::QualityScore is a name space for quality score modules. 
  # BioRuby internal use only (mainly from Bio::Fastq).
  module QualityScore

    # Converter methods between PHRED and Solexa quality scores.
    module Converter

      # Converts PHRED scores to Solexa scores.
      #
      # The values may be truncated or incorrect if overflows/underflows
      # occurred during the calculation.
      # ---
      # *Arguments*:
      # * (required) _scores_: (Array containing Integer) quality scores
      # *Returns*:: (Array containing Integer) quality scores
      def convert_scores_from_phred_to_solexa(scores)
        sc = scores.collect do |q|
          t = 10 ** (q / 10.0) - 1
          t = Float::MIN if t < Float::MIN
          r = 10 * Math.log10(t)
          r.finite? ? r.round : r
        end
        sc
      end

      # Converts Solexa scores to PHRED scores.
      #
      # The values may be truncated if overflows/underflows occurred
      # during the calculation.
      # ---
      # *Arguments*:
      # * (required) _scores_: (Array containing Integer) quality scores
      # *Returns*:: (Array containing Integer) quality scores
      def convert_scores_from_solexa_to_phred(scores)
        sc = scores.collect do |q|
          r = 10 * Math.log10(10 ** (q / 10.0) + 1)
          r.finite? ? r.round : r
        end
        sc
      end

      # Does nothing and simply returns the given argument.
      # 
      # ---
      # *Arguments*:
      # * (required) _scores_: (Array containing Integer) quality scores
      # *Returns*:: (Array containing Integer) quality scores
      def convert_nothing(scores)
        scores
      end

    end #module Converter

    # Bio::Sequence::QualityScore::Phred is a module having quality calculation
    # methods for the PHRED quality score.
    #
    # BioRuby internal use only (mainly from Bio::Fastq).
    module Phred

      include Converter

      # Type of quality scores.
      # ---
      # *Returns*:: (Symbol) the type of quality score.
      def quality_score_type
        :phred
      end

      # PHRED score to probability conversion.
      # ---
      # *Arguments*:
      # * (required) _scores_: (Array containing Integer) scores
      # *Returns*:: (Array containing Float) probabilities (0<=p<=1)
      def phred_q2p(scores)
        scores.collect do |q|
          r = 10 ** (- q / 10.0)
          if r > 1.0 then
            r = 1.0
          #elsif r < 0.0 then
          #  r = 0.0
          end
          r
        end
      end
      alias q2p phred_q2p
      module_function :q2p
      public :q2p

      # Probability to PHRED score conversion.
      #
      # The values may be truncated or incorrect if overflows/underflows
      # occurred during the calculation.
      # ---
      # *Arguments*:
      # * (required) _probabilities_: (Array containing Float) probabilities
      # *Returns*:: (Array containing Float) scores
      def phred_p2q(probabilities)
        probabilities.collect do |p|
          p = Float::MIN if p < Float::MIN
          q = -10 * Math.log10(p)
          q.finite? ? q.round : q
        end
      end
      alias p2q phred_p2q
      module_function :p2q
      public :p2q

      alias convert_scores_from_phred   convert_nothing
      alias convert_scores_to_phred     convert_nothing
      alias convert_scores_from_solexa  convert_scores_from_solexa_to_phred 
      alias convert_scores_to_solexa    convert_scores_from_phred_to_solexa
      module_function :convert_scores_to_solexa
      public :convert_scores_to_solexa

    end #module Phred

    # Bio::Sequence::QualityScore::Solexa is a module having quality
    # calculation methods for the Solexa quality score.
    #
    # BioRuby internal use only (mainly from Bio::Fastq).
    module Solexa

      include Converter

      # Type of quality scores.
      # ---
      # *Returns*:: (Symbol) the type of quality score.
      def quality_score_type
        :solexa
      end

      # Solexa score to probability conversion.
      # ---
      # *Arguments*:
      # * (required) _scores_: (Array containing Integer) scores
      # *Returns*:: (Array containing Float) probabilities
      def solexa_q2p(scores)
        scores.collect do |q|
          t = 10 ** (- q / 10.0)
          t /= (1.0 + t)
          if t > 1.0 then
            t = 1.0
          #elsif t < 0.0 then
          #  t = 0.0
          end
          t
        end
      end
      alias q2p solexa_q2p
      module_function :q2p
      public :q2p

      # Probability to Solexa score conversion.
      # ---
      # *Arguments*:
      # * (required) _probabilities_: (Array containing Float) probabilities
      # *Returns*:: (Array containing Float) scores
      def solexa_p2q(probabilities)
        probabilities.collect do |p|
          t = p / (1.0 - p)
          t = Float::MIN if t < Float::MIN
          q = -10 * Math.log10(t)
          q.finite? ? q.round : q
        end
      end
      alias p2q solexa_p2q
      module_function :p2q
      public :p2q

      alias convert_scores_from_solexa  convert_nothing
      alias convert_scores_to_solexa    convert_nothing
      alias convert_scores_from_phred   convert_scores_from_phred_to_solexa
      alias convert_scores_to_phred     convert_scores_from_solexa_to_phred
      module_function :convert_scores_to_phred
      public :convert_scores_to_phred

    end #module Solexa

  end #module QualityScore

end #class Sequence

end #module Bio