1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199
|
#
# = bio/db/fasta/format_qual.rb - Qual format and FastaNumericFormat generater
#
# Copyright:: Copyright (C) 2009
# Naohisa Goto <ng@bioruby.org>
# License:: The Ruby License
#
module Bio::Sequence::Format::Formatter
# INTERNAL USE ONLY, YOU SHOULD NOT USE THIS CLASS.
# Simple FastaNumeric format output class for Bio::Sequence.
class Fasta_numeric < Bio::Sequence::Format::FormatterBase
# INTERNAL USE ONLY, YOU SHOULD NOT CALL THIS METHOD.
#
# Creates a new FastaNumericFormat generater object from the sequence.
#
# It does not care whether the content of the quality score is
# consistent with the sequence or not, e.g. it does not check
# length of the quality score.
#
# ---
# *Arguments*:
# * _sequence_: Bio::Sequence object
# * (optional) :header => _header_: (String) (default nil)
# * (optional) :width => _width_: (Fixnum) (default 70)
def initialize; end if false # dummy for RDoc
# INTERNAL USE ONLY, YOU SHOULD NOT CALL THIS METHOD.
#
# Output the FASTA format string of the sequence.
#
# Currently, this method is used in Bio::Sequence#output like so,
#
# s = Bio::Sequence.new('atgc')
# s.quality_scores = [ 70, 80, 90, 100 ]
# puts s.output(:fasta_numeric)
# ---
# *Returns*:: String object
def output
header = @options[:header]
width = @options.has_key?(:width) ? @options[:width] : 70
seq = @sequence.seq.to_s
entry_id = @sequence.entry_id ||
"#{@sequence.primary_accession}.#{@sequence.sequence_version}"
definition = @sequence.definition
header ||= "#{entry_id} #{definition}"
sc = fastanumeric_quality_scores(seq)
if width then
if width <= 0 then
main = sc.join("\n")
else
len = 0
main = sc.collect do |x|
str = (len == 0) ? "#{x}" : " #{x}"
len += str.size
if len > width then
len = "#{x}".size
str = "\n#{x}"
end
str
end.join('')
end
else
main = sc.join(' ')
end
">#{header}\n#{main}\n"
end
private
def fastanumeric_quality_scores(seq)
@sequence.quality_scores || []
end
end #class Fasta_numeric
# INTERNAL USE ONLY, YOU SHOULD NOT USE THIS CLASS.
# Simple Qual format (sequence quality) output class for Bio::Sequence.
class Qual < Fasta_numeric
# INTERNAL USE ONLY, YOU SHOULD NOT CALL THIS METHOD.
#
# Creates a new Qual format generater object from the sequence.
#
# The only difference from Fastanumeric is that Qual outputs
# Phred score by default, and data conversion will be performed
# if needed. Output score type can be changed by the
# ":quality_score_type" option.
#
# If the sequence have no quality score type information
# and no error probabilities, but the score exists,
# the score is regarded as :phred (Phred score).
#
# ---
# *Arguments*:
# * _sequence_: Bio::Sequence object
# * (optional) :header => _header_: (String) (default nil)
# * (optional) :width => _width_: (Fixnum) (default 70)
# * (optional) :quality_score_type => _type_: (Symbol) (default nil)
# * (optional) :default_score => _score_: (Integer) default score for bases that have no valid quality scores or error probabilities (default 0)
def initialize; end if false # dummy for RDoc
private
def fastanumeric_quality_scores(seq)
qsc = qual_quality_scores(seq)
if qsc.size > seq.length then
qsc = qsc[0, seq.length]
elsif qsc.size < seq.length then
padding = @options[:default_score] || 0
psize = seq.length - qsc.size
qsc += Array.new(psize, padding)
end
qsc
end
def qual_quality_scores(seq)
return [] if seq.length <= 0
# get output quality score type
fmt = @options[:quality_score_type]
qsc = @sequence.quality_scores
qsc_type = @sequence.quality_score_type
# checks if no need to convert
if qsc and qsc_type == fmt and
qsc.size >= seq.length then
return qsc
end
# default output quality score type is :phred
fmt ||= :phred
# If quality score type of the sequence is nil, implicitly
# regarded as :phred.
qsc_type ||= :phred
# checks error_probabilities
ep = @sequence.error_probabilities
if ep and ep.size >= seq.length then
case fmt
when :phred
return Bio::Sequence::QualityScore::Phred.p2q(ep[0, seq.length])
when :solexa
return Bio::Sequence::QualityScore::Solexa.p2q(ep[0, seq.length])
end
end
# Checks if scores can be converted.
if qsc and qsc.size >= seq.length then
case [ qsc_type, fmt ]
when [ :phred, :solexa ]
return Bio::Sequence::QualityScore::Phred.convert_scores_to_solexa(qsc[0, seq.length])
when [ :solexa, :phred ]
return Bio::Sequence::QualityScore::Solexa.convert_scores_to_phred(qsc[0, seq.length])
end
end
# checks quality scores type
case qsc_type
when :phred, :solexa
#does nothing
else
qsc_type = nil
qsc = nil
end
# collects piece of information
qsc_cov = qsc ? qsc.size.quo(seq.length) : 0
ep_cov = ep ? ep.size.quo(seq.length) : 0
if qsc_cov > ep_cov then
case [ qsc_type, fmt ]
when [ :phred, :phred ], [ :solexa, :solexa ]
return qsc
when [ :phred, :solexa ]
return Bio::Sequence::QualityScore::Phred.convert_scores_to_solexa(qsc)
when [ :solexa, :phred ]
return Bio::Sequence::QualityScore::Solexa.convert_scores_to_phred(qsc)
end
elsif ep_cov > qsc_cov then
case fmt
when :phred
return Bio::Sequence::QualityScore::Phred.p2q(ep)
when :solexa
return Bio::Sequence::QualityScore::Solexa.p2q(ep)
end
end
# if no information, returns empty array
return []
end
end #class Qual
end #module Bio::Sequence::Format::Formatter
|