1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174
|
#
# = bio/db/fasta/format_fastq.rb - FASTQ format generater
#
# Copyright:: Copyright (C) 2009
# Naohisa Goto <ng@bioruby.org>
# License:: The Ruby License
#
require 'bio/db/fastq'
module Bio::Sequence::Format::Formatter
# INTERNAL USE ONLY, YOU SHOULD NOT USE THIS CLASS.
#
# FASTQ format output class for Bio::Sequence.
#
# The default FASTQ format is fastq-sanger.
class Fastq < Bio::Sequence::Format::FormatterBase
# INTERNAL USE ONLY, YOU SHOULD NOT CALL THIS METHOD.
#
# Creates a new Fasta format generater object from the sequence.
#
# ---
# *Arguments*:
# * _sequence_: Bio::Sequence object
# * (optional) :repeat_title => (true or false) if true, repeating title in the "+" line; if not true, "+" only (default false)
# * (optional) :width => _width_: (Fixnum) width to wrap sequence and quality lines; nil to prevent wrapping (default nil)
# * (optional) :title => _title_: (String) completely replaces title line with the _title_ (default nil)
# * (optional) :default_score => _score_: (Integer) default score for bases that have no valid quality scores or error probabilities; false or nil means the lowest score, true means the highest score (default nil)
def initialize; end if false # dummy for RDoc
# INTERNAL USE ONLY, YOU SHOULD NOT CALL THIS METHOD.
#
# Output the FASTQ format string of the sequence.
#
# Currently, this method is used in Bio::Sequence#output like so,
#
# s = Bio::Sequence.new('atgc')
# puts s.output(:fastq_sanger)
# ---
# *Returns*:: String object
def output
title = @options[:title]
width = @options.has_key?(:width) ? @options[:width] : nil
seq = @sequence.seq.to_s
entry_id = @sequence.entry_id ||
"#{@sequence.primary_accession}.#{@sequence.sequence_version}"
definition = @sequence.definition
unless title then
title = definition.to_s
unless title[0, entry_id.length] == entry_id and
/\s/ =~ title[entry_id.length, 1].to_s then
title = "#{entry_id} #{title}"
end
end
title2 = @options[:repeat_title] ? title : ''
qstr = fastq_quality_string(seq, @options[:default_score])
"@#{title}\n" +
if width then
seq.gsub(Regexp.new(".{1,#{width}}"), "\\0\n")
else
seq + "\n"
end +
"+#{title2}\n" +
if width then
qstr.gsub(Regexp.new(".{1,#{width}}"), "\\0\n")
else
qstr + "\n"
end
end
private
def fastq_format_data
Bio::Fastq::FormatData::FASTQ_SANGER.instance
end
def fastq_quality_string(seq, default_score)
sc = fastq_quality_scores(seq)
if sc.size < seq.length then
if default_score == true then
# when true, the highest score
default_score = fastq_format_data.score_range.end
else
# when false or nil, the lowest score
default_score ||= fastq_format_data.score_range.begin
end
sc = sc + ([ default_score ] * (seq.length - sc.size))
end
fastq_format_data.scores2str(sc)
end
def fastq_quality_scores(seq)
return [] if seq.length <= 0
fmt = fastq_format_data
# checks quality_scores
qsc = @sequence.quality_scores
qsc_type = @sequence.quality_score_type
if qsc and qsc_type and
qsc_type == fmt.quality_score_type and
qsc.size >= seq.length then
return qsc
end
# checks error_probabilities
ep = @sequence.error_probabilities
if ep and ep.size >= seq.length then
return fmt.p2q(ep[0, seq.length])
end
# If quality score type of the sequence is nil, regarded as :phred.
qsc_type ||= :phred
# checks if scores can be converted
if qsc and qsc.size >= seq.length then
case [ qsc_type, fmt.quality_score_type ]
when [ :phred, :solexa ]
return fmt.convert_scores_from_phred_to_solexa(qsc[0, seq.length])
when [ :solexa, :phred ]
return fmt.convert_scores_from_solexa_to_phred(qsc[0, seq.length])
end
end
# checks quality scores type
case qsc_type
when :phred, :solexa
#does nothing
else
qsc_type = nil
qsc = nil
end
# collects piece of information
qsc_cov = qsc ? qsc.size.quo(seq.length) : 0
ep_cov = ep ? ep.size.quo(seq.length) : 0
if qsc_cov > ep_cov then
case [ qsc_type, fmt.quality_score_type ]
when [ :phred, :phred ], [ :solexa, :solexa ]
return qsc
when [ :phred, :solexa ]
return fmt.convert_scores_from_phred_to_solexa(qsc)
when [ :solexa, :phred ]
return fmt.convert_scores_from_solexa_to_phred(qsc)
end
elsif ep_cov > qsc_cov then
return fmt.p2q(ep)
end
# if no information, returns empty array
return []
end
end #class Fastq
# class Fastq_sanger is the same as the Fastq class.
Fastq_sanger = Fastq
class Fastq_solexa < Fastq
private
def fastq_format_data
Bio::Fastq::FormatData::FASTQ_SOLEXA.instance
end
end #class Fastq_solexa
class Fastq_illumina < Fastq
private
def fastq_format_data
Bio::Fastq::FormatData::FASTQ_ILLUMINA.instance
end
end #class Fastq_illumina
end #module Bio::Sequence::Format::Formatter
|