File: segment.rb

package info (click to toggle)
ruby-rgfa 1.3.1%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, bullseye
  • size: 820 kB
  • sloc: ruby: 5,649; makefile: 9
file content (209 lines) | stat: -rw-r--r-- 5,858 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
# A segment line of a RGFA file
class RGFA::Line::Segment < RGFA::Line

  RECORD_TYPE = :S
  REQFIELDS = [:name, :sequence]
  PREDEFINED_OPTFIELDS = [:LN, :RC, :FC, :KC, :SH, :UR]
  DATATYPE = {
    :name => :lbl,
    :sequence => :seq,
    :LN => :i,
    :RC => :i,
    :FC => :i,
    :KC => :i,
    :SH => :H,
    :UR => :Z
  }

  define_field_methods!

  attr_writer :links, :containments, :paths

  # References to the links in which the segment is involved.
  #
  # @!macro references_table
  #   The references are in four arrays which are
  #   accessed from a nested hash table. The first key is
  #   the direction (from or to), the second is the orientation
  #   (+ or -).
  #
  # @example
  #   segment.links[:from][:+]
  #
  # @return [Hash{RGFA::Line::DIRECTION => Hash{RGFA::Line::ORIENTATION => Array<RGFA::Line::Link>}}]
  def links
    @links ||= {:from => {:+ => [], :- => []},
                :to   => {:+ => [], :- => []}}
    @links
  end

  # References to the containments in which the segment is involved.
  # @!macro references_table
  #
  # @example
  #   segment.containments[:from][:+]
  #
  # @return [Hash{RGFA::Line::DIRECTION => Hash{RGFA::Line::ORIENTATION => Array<RGFA::Line::Containment>}}]
  def containments
    @containments ||= {:from => {:+ => [], :- => []},
                       :to   => {:+ => [], :- => []}}
    @containments
  end

  # References to the containments in which the segment is involved.
  #
  # The references are in two arrays which are
  # accessed from a hash table. The key is the orientation
  # (+ or -).
  #
  # @example
  #   segment.paths[:+]
  #
  # @return [Hash{RGFA::Line::ORIENTATION => Array<RGFA::Line::Path>}]
  def paths
    @paths ||= {:+ => [], :- => []}
    @paths
  end

  # All containments where a segment is involved.
  # @!macro this_is_a_copy
  #   @note the list shall be considered read-only, as this
  #     is a copy of the original arrays of references, concatenated
  #     to each other.
  def all_containments
    l = self.containments
    l[:from][:+] + l[:from][:-] + l[:to][:+] + l[:to][:-]
  end

  # All links where the segment is involved.
  # @!macro this_is_a_copy
  def all_links
    l = self.links
    l[:from][:+] + l[:from][:-] + l[:to][:+] + l[:to][:-]
  end

  # All links and containments where the segment is involved.
  # @!macro this_is_a_copy
  def all_connections
    all_links + all_containments
  end

  # All paths where the segment is involved.
  # @!macro this_is_a_copy
  def all_paths
    pt = self.paths
    pt[:+] + pt[:-]
  end

  # All paths, links and containments where the segment is involved.
  # @!macro this_is_a_copy
  def all_references
    all_connections + all_paths
  end

  # @raise [RGFA::Line::Segment::InconsistentLengthError]
  #    if sequence length and LN tag are not consistent.
  def validate_length!
    if sequence != "*" and optional_fieldnames.include?(:LN)
      if self.LN != sequence.length
        raise RGFA::Line::Segment::InconsistentLengthError,
          "Length in LN tag (#{self.LN}) "+
          "is different from length of sequence field (#{sequence.length})"
      end
    end
  end

  # @!macro [new] length
  #   @return [Integer] value of LN tag, if segment has LN tag
  #   @return [Integer] sequence length if no LN and sequence not "*"
  # @return [nil] if sequence is "*"
  # @see #length!
  def length
    if self.LN
      self.LN
    elsif sequence != "*"
      sequence.length
    else
      nil
    end
  end

  # @!macro length
  # @!macro [new] length_needed
  #   @raise [RGFA::Line::Segment::UndefinedLengthError] if not an LN tag and
  #     the sequence is "*"
  # @see #length
  def length!
    l = self.length()
    raise RGFA::Line::Segment::UndefinedLengthError,
      "No length information available" if l.nil?
    return l
  end

  # @!macro [new] coverage
  #   The coverage computed from a count_tag.
  #   If unit_length is provided then: count/(length-unit_length+1),
  #   otherwise: count/length.
  #   The latter is a good approximation if length >>> unit_length.
  #   @param [Symbol] count_tag <i>(defaults to +:RC+)</i>
  #     integer tag storing the count, usually :KC, :RC or :FC
  #   @param [Integer] unit_length the (average) length of a read (for
  #     :RC), fragment (for :FC) or k-mer (for :KC)
  #   @return [Integer] coverage, if count_tag and length are defined
  # @return [nil] otherwise
  # @see #coverage!
  def coverage(count_tag: :RC, unit_length: 1)
    if optional_fieldnames.include?(count_tag) and self.length
      return (self.get(count_tag).to_f)/(self.length-unit_length+1)
    else
      return nil
    end
  end

  # @see #coverage
  # @!macro coverage
  # @raise [RGFA::Line::TagMissingError] if segment does not have count_tag
  # @!macro length_needed
  def coverage!(count_tag: :RC, unit_length: 1)
    c = coverage(count_tag: count_tag, unit_length: unit_length)
    if c.nil?
      self.length!
      raise RGFA::Line::TagMissingError,
        "Tag #{count_tag} undefined for segment #{name}"
    else
      return c
    end
  end

  # @return string representation of the segment
  # @param [Boolean] without_sequence if +true+, output "*" instead of sequence
  def to_s(without_sequence: false)
    if !without_sequence
      return super()
    else
      saved = self.sequence
      self.sequence = "*"
      retval = super()
      self.sequence = saved
      return retval
    end
  end

  # @return [Symbol] name of the segment as symbol
  def to_sym
    name.to_sym
  end

  private

  def validate_record_type_specific_info!
    validate_length!
  end

end

# Error raised if length of segment cannot be computed
class RGFA::Line::Segment::UndefinedLengthError < RGFA::Error; end

# Error raised if length of segment and LN are not consistent
class RGFA::Line::Segment::InconsistentLengthError < RGFA::Error; end