File: analysis.rb

package info (click to toggle)
ruby-bio 2.0.6-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 7,108 kB
  • sloc: ruby: 68,331; perl: 13; makefile: 11; sh: 1
file content (252 lines) | stat: -rw-r--r-- 9,873 bytes parent folder | download | duplicates (7)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
#
# bio/util/restriction_enzyme/analysis.rb - Does the work of fragmenting the DNA from the enzymes
#
# Author::    Trevor Wennblom  <mailto:trevor@corevx.com>
# Copyright:: Copyright (c) 2005-2007 Midwinter Laboratories, LLC (http://midwinterlabs.com)
# License::   The Ruby License
#

module Bio

require 'bio/util/restriction_enzyme' unless const_defined?(:RestrictionEnzyme)

class RestrictionEnzyme

class Analysis

  #--
  # require "analysis_basic.rb" here to avoid cyclic require
  #++
  require 'bio/util/restriction_enzyme/analysis_basic'

  # See cut instance method
  def self.cut( sequence, *args )
    self.new.cut( sequence, *args )
  end

  # See main documentation for Bio::RestrictionEnzyme
  #
  #
  # +cut+ takes into account
  # permutations of cut variations based on competitiveness of enzymes for an
  # enzyme cutsite or enzyme bindsite on a sequence.
  #
  # Example:
  #
  # FIXME add output
  #
  #   Bio::RestrictionEnzyme::Analysis.cut('gaattc', 'EcoRI')
  #
  # _same as:_
  #
  #   Bio::RestrictionEnzyme::Analysis.cut('gaattc', 'g^aattc')
  # ---
  # *Arguments*
  # * +sequence+: +String+ kind of object that will be used as a nucleic acid sequence.
  # * +args+: Series of enzyme names, enzymes sequences with cut marks, or RestrictionEnzyme objects.
  # *Returns*:: Bio::RestrictionEnzyme::Fragments object populated with Bio::RestrictionEnzyme::Fragment objects.   (Note: unrelated to Bio::RestrictionEnzyme::Range::SequenceRange::Fragments) or a +Symbol+ containing an error code
  def cut( sequence, *args )
    view_ranges = false
    
    args.select { |i| i.class == Hash }.each do |hsh|
      hsh.each do |key, value|
        if key == :view_ranges
          unless ( value.kind_of?(TrueClass) or value.kind_of?(FalseClass) )
            raise ArgumentError, "view_ranges must be set to true or false, currently #{value.inspect}."
          end
          view_ranges = value
        end
      end
    end
    
    res = cut_and_return_by_permutations( sequence, *args )
    return res if res.class == Symbol
    # Format the fragments for the user
    fragments_for_display( res, view_ranges )
  end

  #########
  protected
  #########
  
  # See cut instance method
  #
  # ---
  # *Arguments*
  # * +sequence+: +String+ kind of object that will be used as a nucleic acid sequence.
  # * +args+: Series of enzyme names, enzymes sequences with cut marks, or RestrictionEnzyme objects.
  # May also supply a +Hash+ with the key ":max_permutations" to specificy how many permutations are allowed - a value of 0 indicates no permutations are allowed.
  # *Returns*:: +Hash+ Keys are a permutation ID, values are SequenceRange objects that have cuts applied.
  # _also_ may return the +Symbol+ ':sequence_empty', ':no_cuts_found', or ':too_many_permutations'
  def cut_and_return_by_permutations( sequence, *args )
    my_hash = {}
    maximum_permutations = nil

    hashes_in_args = args.select { |i| i.class == Hash }
    args.delete_if { |i| i.class == Hash }
    hashes_in_args.each do |hsh|
      hsh.each do |key, value|
        case key
        when :max_permutations, 'max_permutations', :maximum_permutations, 'maximum_permutations'
          maximum_permutations = value.to_i unless value == nil
        when :view_ranges
        else
          raise ArgumentError, "Received key #{key.inspect} in argument - I only know the key ':max_permutations' and ':view_ranges' currently.  Hash passed: #{hsh.inspect}"
        end
      end
    end
    
    if !sequence.kind_of?(String) or sequence.empty?
      logger.warn "The supplied sequence is empty." if defined?(logger)
      return :sequence_empty
    end
    sequence = Bio::Sequence::NA.new( sequence )
    
    enzyme_actions, initial_cuts = create_enzyme_actions( sequence, *args )

    if enzyme_actions.empty? and initial_cuts.empty?
      logger.warn "This enzyme does not make any cuts on this sequence." if defined?(logger)
      return :no_cuts_found
    end

    # * When enzyme_actions.size is equal to '1' that means there are no permutations.
    # * If enzyme_actions.size is equal to '2' there is one
    #   permutation ("[0, 1]")
    # * If enzyme_actions.size is equal to '3' there are two
    #   permutations ("[0, 1, 2]")
    # * and so on..
    if maximum_permutations and enzyme_actions.size > 1
      if (enzyme_actions.size - 1) > maximum_permutations.to_i
        logger.warn "More permutations than maximum, skipping.  Found: #{enzyme_actions.size-1}  Max: #{maximum_permutations.to_i}" if defined?(logger)
        return :too_many_permutations
      end
    end
    
    if enzyme_actions.size > 1
      permutations = permute(enzyme_actions.size)
      
      permutations.each do |permutation|
        previous_cut_ranges = []
        # Primary and complement strands are both measured from '0' to 'sequence.size-1' here
        sequence_range = Bio::RestrictionEnzyme::Range::SequenceRange.new( 0, 0, sequence.size-1, sequence.size-1 )

        # Add the cuts to the sequence_range from each enzyme_action contained
        # in initial_cuts.  These are the cuts that have no competition so are
        # not subject to permutations.
        initial_cuts.each do |enzyme_action|
          enzyme_action.cut_ranges.each do |cut_range|
            sequence_range.add_cut_range(cut_range)
          end
        end

        permutation.each do |id|
          enzyme_action = enzyme_actions[id]

          # conflict is false if the current enzyme action may cut in it's range.
          # conflict is true if it cannot due to a previous enzyme action making
          # a cut where this enzyme action needs a whole recognition site.
          conflict = false

          # If current size of enzyme_action overlaps with previous cut_range, don't cut
          # note that the enzyme action may fall in the middle of a previous enzyme action
          # so all cut locations must be checked that would fall underneath.
          previous_cut_ranges.each do |cut_range|
            next unless cut_range.class == Bio::RestrictionEnzyme::Range::VerticalCutRange  # we aren't concerned with horizontal cuts
            previous_cut_left = cut_range.range.first 
            previous_cut_right = cut_range.range.last

            # Keep in mind: 
            # * The cut location is to the immediate right of the base located at the index.
            #   ex: at^gc -- the cut location is at index 1
            # * The enzyme action location is located at the base of the index.
            #   ex: atgc -- 0 => 'a', 1 => 't', 2 => 'g', 3 => 'c'
            # method create_enzyme_actions has similar commentary if interested
            if (enzyme_action.right <= previous_cut_left) or
               (enzyme_action.left > previous_cut_right) or
               (enzyme_action.left > previous_cut_left and enzyme_action.right <= previous_cut_right) # in between cuts
              # no conflict
            else
              conflict = true
            end
          end

          next if conflict == true
          enzyme_action.cut_ranges.each { |cut_range| sequence_range.add_cut_range(cut_range) }
          previous_cut_ranges += enzyme_action.cut_ranges        
        end # permutation.each

        # Fill in the source sequence for sequence_range so it knows what bases
        # to use
        sequence_range.fragments.primary = sequence
        sequence_range.fragments.complement = sequence.forward_complement
        my_hash[permutation] = sequence_range
      end # permutations.each
      
    else # if enzyme_actions.size == 1
      # no permutations, just do it
      sequence_range = Bio::RestrictionEnzyme::Range::SequenceRange.new( 0, 0, sequence.size-1, sequence.size-1 )
      initial_cuts.each { |enzyme_action| enzyme_action.cut_ranges.each { |cut_range| sequence_range.add_cut_range(cut_range) } }
      sequence_range.fragments.primary = sequence
      sequence_range.fragments.complement = sequence.forward_complement
      my_hash[0] = sequence_range
    end

    my_hash
  end


  # Returns permutation orders for a given number of elements.
  #
  # Examples:
  #   permute(0) # => [[0]]
  #   permute(1) # => [[0]]
  #   permute(2) # => [[1, 0], [0, 1]]
  #   permute(3) # => [[2, 1, 0], [2, 0, 1], [1, 2, 0], [0, 2, 1], [1, 0, 2], [0, 1, 2]]
  #   permute(4) # => [[3, 2, 1, 0],
  #                    [3, 2, 0, 1],
  #                    [3, 1, 2, 0],
  #                    [3, 0, 2, 1],
  #                    [3, 1, 0, 2],
  #                    [3, 0, 1, 2],
  #                    [2, 3, 1, 0],
  #                    [2, 3, 0, 1],
  #                    [1, 3, 2, 0],
  #                    [0, 3, 2, 1],
  #                    [1, 3, 0, 2],
  #                    [0, 3, 1, 2],
  #                    [2, 1, 3, 0],
  #                    [2, 0, 3, 1],
  #                    [1, 2, 3, 0],
  #                    [0, 2, 3, 1],
  #                    [1, 0, 3, 2],
  #                    [0, 1, 3, 2],
  #                    [2, 1, 0, 3],
  #                    [2, 0, 1, 3],
  #                    [1, 2, 0, 3],
  #                    [0, 2, 1, 3],
  #                    [1, 0, 2, 3],
  #                    [0, 1, 2, 3]]
  #   
  # ---
  # *Arguments*
  # * +count+: +Number+ of different elements to be permuted
  # * +permutations+: ignore - for the recursive algorithm
  # *Returns*:: +Array+ of +Array+ objects with different possible permutation orders.  See examples.
  def permute(count, permutations = [[0]])
    return permutations if count <= 1
    new_arrays = []
    new_array = []

    (permutations[0].size + 1).times do |n|
      new_array.clear
      permutations.each { |a| new_array << a.dup }
      new_array.each { |e| e.insert(n, permutations[0].size) }
      new_arrays += new_array
    end

    permute(count-1, new_arrays)
  end

end # Analysis
end # RestrictionEnzyme
end # Bio