1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209
|
# A segment line of a RGFA file
class RGFA::Line::Segment < RGFA::Line
RECORD_TYPE = :S
REQFIELDS = [:name, :sequence]
PREDEFINED_OPTFIELDS = [:LN, :RC, :FC, :KC, :SH, :UR]
DATATYPE = {
:name => :lbl,
:sequence => :seq,
:LN => :i,
:RC => :i,
:FC => :i,
:KC => :i,
:SH => :H,
:UR => :Z
}
define_field_methods!
attr_writer :links, :containments, :paths
# References to the links in which the segment is involved.
#
# @!macro references_table
# The references are in four arrays which are
# accessed from a nested hash table. The first key is
# the direction (from or to), the second is the orientation
# (+ or -).
#
# @example
# segment.links[:from][:+]
#
# @return [Hash{RGFA::Line::DIRECTION => Hash{RGFA::Line::ORIENTATION => Array<RGFA::Line::Link>}}]
def links
@links ||= {:from => {:+ => [], :- => []},
:to => {:+ => [], :- => []}}
@links
end
# References to the containments in which the segment is involved.
# @!macro references_table
#
# @example
# segment.containments[:from][:+]
#
# @return [Hash{RGFA::Line::DIRECTION => Hash{RGFA::Line::ORIENTATION => Array<RGFA::Line::Containment>}}]
def containments
@containments ||= {:from => {:+ => [], :- => []},
:to => {:+ => [], :- => []}}
@containments
end
# References to the containments in which the segment is involved.
#
# The references are in two arrays which are
# accessed from a hash table. The key is the orientation
# (+ or -).
#
# @example
# segment.paths[:+]
#
# @return [Hash{RGFA::Line::ORIENTATION => Array<RGFA::Line::Path>}]
def paths
@paths ||= {:+ => [], :- => []}
@paths
end
# All containments where a segment is involved.
# @!macro this_is_a_copy
# @note the list shall be considered read-only, as this
# is a copy of the original arrays of references, concatenated
# to each other.
def all_containments
l = self.containments
l[:from][:+] + l[:from][:-] + l[:to][:+] + l[:to][:-]
end
# All links where the segment is involved.
# @!macro this_is_a_copy
def all_links
l = self.links
l[:from][:+] + l[:from][:-] + l[:to][:+] + l[:to][:-]
end
# All links and containments where the segment is involved.
# @!macro this_is_a_copy
def all_connections
all_links + all_containments
end
# All paths where the segment is involved.
# @!macro this_is_a_copy
def all_paths
pt = self.paths
pt[:+] + pt[:-]
end
# All paths, links and containments where the segment is involved.
# @!macro this_is_a_copy
def all_references
all_connections + all_paths
end
# @raise [RGFA::Line::Segment::InconsistentLengthError]
# if sequence length and LN tag are not consistent.
def validate_length!
if sequence != "*" and optional_fieldnames.include?(:LN)
if self.LN != sequence.length
raise RGFA::Line::Segment::InconsistentLengthError,
"Length in LN tag (#{self.LN}) "+
"is different from length of sequence field (#{sequence.length})"
end
end
end
# @!macro [new] length
# @return [Integer] value of LN tag, if segment has LN tag
# @return [Integer] sequence length if no LN and sequence not "*"
# @return [nil] if sequence is "*"
# @see #length!
def length
if self.LN
self.LN
elsif sequence != "*"
sequence.length
else
nil
end
end
# @!macro length
# @!macro [new] length_needed
# @raise [RGFA::Line::Segment::UndefinedLengthError] if not an LN tag and
# the sequence is "*"
# @see #length
def length!
l = self.length()
raise RGFA::Line::Segment::UndefinedLengthError,
"No length information available" if l.nil?
return l
end
# @!macro [new] coverage
# The coverage computed from a count_tag.
# If unit_length is provided then: count/(length-unit_length+1),
# otherwise: count/length.
# The latter is a good approximation if length >>> unit_length.
# @param [Symbol] count_tag <i>(defaults to +:RC+)</i>
# integer tag storing the count, usually :KC, :RC or :FC
# @param [Integer] unit_length the (average) length of a read (for
# :RC), fragment (for :FC) or k-mer (for :KC)
# @return [Integer] coverage, if count_tag and length are defined
# @return [nil] otherwise
# @see #coverage!
def coverage(count_tag: :RC, unit_length: 1)
if optional_fieldnames.include?(count_tag) and self.length
return (self.get(count_tag).to_f)/(self.length-unit_length+1)
else
return nil
end
end
# @see #coverage
# @!macro coverage
# @raise [RGFA::Line::TagMissingError] if segment does not have count_tag
# @!macro length_needed
def coverage!(count_tag: :RC, unit_length: 1)
c = coverage(count_tag: count_tag, unit_length: unit_length)
if c.nil?
self.length!
raise RGFA::Line::TagMissingError,
"Tag #{count_tag} undefined for segment #{name}"
else
return c
end
end
# @return string representation of the segment
# @param [Boolean] without_sequence if +true+, output "*" instead of sequence
def to_s(without_sequence: false)
if !without_sequence
return super()
else
saved = self.sequence
self.sequence = "*"
retval = super()
self.sequence = saved
return retval
end
end
# @return [Symbol] name of the segment as symbol
def to_sym
name.to_sym
end
private
def validate_record_type_specific_info!
validate_length!
end
end
# Error raised if length of segment cannot be computed
class RGFA::Line::Segment::UndefinedLengthError < RGFA::Error; end
# Error raised if length of segment and LN are not consistent
class RGFA::Line::Segment::InconsistentLengthError < RGFA::Error; end
|