1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218
|
#
# bio/util/restriction_enzyme/analysis_basic.rb - Does the work of fragmenting the DNA from the enzymes
#
# Author:: Trevor Wennblom <mailto:trevor@corevx.com>
# Copyright:: Copyright (c) 2005-2007 Midwinter Laboratories, LLC (http://midwinterlabs.com)
# License:: The Ruby License
#
require 'set' # for method create_enzyme_actions
require 'bio/sequence'
module Bio
require 'bio/util/restriction_enzyme' unless const_defined?(:RestrictionEnzyme)
class RestrictionEnzyme
class Analysis
# See cut_without_permutations instance method
def self.cut_without_permutations( sequence, *args )
self.new.cut_without_permutations( sequence, *args )
end
# See main documentation for Bio::RestrictionEnzyme
#
# Bio::RestrictionEnzyme.cut is preferred over this!
#
# USE AT YOUR OWN RISK
#
# This is a simpler version of method +cut+. +cut+ takes into account
# permutations of cut variations based on competitiveness of enzymes for an
# enzyme cutsite or enzyme bindsite on a sequence. This does not take into
# account those possibilities and is therefore faster, but less likely to be
# accurate.
#
# This code is mainly included as an academic example
# without having to wade through the extra layer of complexity added by the
# permutations.
#
# Example:
#
# FIXME add output
#
# Bio::RestrictionEnzyme::Analysis.cut_without_permutations('gaattc', 'EcoRI')
#
# _same as:_
#
# Bio::RestrictionEnzyme::Analysis.cut_without_permutations('gaattc', 'g^aattc')
# ---
# *Arguments*
# * +sequence+: +String+ kind of object that will be used as a nucleic acid sequence.
# * +args+: Series of enzyme names, enzymes sequences with cut marks, or RestrictionEnzyme objects.
# *Returns*:: Bio::RestrictionEnzyme::Fragments object populated with Bio::RestrictionEnzyme::Fragment objects. (Note: unrelated to Bio::RestrictionEnzyme::Range::SequenceRange::Fragments)
def cut_without_permutations( sequence, *args )
return fragments_for_display( {} ) if !sequence.kind_of?(String) or sequence.empty?
sequence = Bio::Sequence::NA.new( sequence )
# create_enzyme_actions returns two seperate array elements, they're not
# needed separated here so we put them into one array
enzyme_actions = create_enzyme_actions( sequence, *args ).flatten
return fragments_for_display( {} ) if enzyme_actions.empty?
# Primary and complement strands are both measured from '0' to 'sequence.size-1' here
sequence_range = Bio::RestrictionEnzyme::Range::SequenceRange.new( 0, 0, sequence.size-1, sequence.size-1 )
# Add the cuts to the sequence_range from each enzyme_action
enzyme_actions.each do |enzyme_action|
enzyme_action.cut_ranges.each do |cut_range|
sequence_range.add_cut_range(cut_range)
end
end
# Fill in the source sequence for sequence_range so it knows what bases
# to use
sequence_range.fragments.primary = sequence
sequence_range.fragments.complement = sequence.forward_complement
# Format the fragments for the user
fragments_for_display( {0 => sequence_range} )
end
#########
protected
#########
# Take the fragments from SequenceRange objects generated from add_cut_range
# and return unique results as a Bio::RestrictionEnzyme::Analysis::Fragment object.
#
# ---
# *Arguments*
# * +hsh+: +Hash+ Keys are a permutation ID, if any. Values are SequenceRange objects that have cuts applied.
# *Returns*:: Bio::RestrictionEnzyme::Analysis::Fragments object populated with Bio::RestrictionEnzyme::Analysis::Fragment objects.
def fragments_for_display( hsh, view_ranges=false )
ary = Fragments.new
return ary unless hsh
hsh.each do |permutation_id, sequence_range|
sequence_range.fragments.for_display.each do |fragment|
if view_ranges
ary << Bio::RestrictionEnzyme::Fragment.new(fragment.primary, fragment.complement, fragment.p_left, fragment.p_right, fragment.c_left, fragment.c_right)
else
ary << Bio::RestrictionEnzyme::Fragment.new(fragment.primary, fragment.complement)
end
end
end
ary.uniq! unless view_ranges
ary
end
# Creates an array of EnzymeActions based on the DNA sequence and supplied enzymes.
#
# ---
# *Arguments*
# * +sequence+: The string of DNA to match the enzyme recognition sites against
# * +args+:: The enzymes to use.
# *Returns*:: +Array+ with the first element being an array of EnzymeAction objects that +sometimes_cut+, and are subject to competition. The second is an array of EnzymeAction objects that +always_cut+ and are not subject to competition.
def create_enzyme_actions( sequence, *args )
all_enzyme_actions = []
args.each do |enzyme|
enzyme = Bio::RestrictionEnzyme.new(enzyme) unless enzyme.class == Bio::RestrictionEnzyme::DoubleStranded
# make sure pattern is the proper size
# for more info see the internal documentation of
# Bio::RestrictionEnzyme::DoubleStranded.create_action_at
pattern = Bio::Sequence::NA.new(
Bio::RestrictionEnzyme::DoubleStranded::AlignedStrands.align(
enzyme.primary, enzyme.complement
).primary
).to_re
find_match_locations( sequence, pattern ).each do |offset|
all_enzyme_actions << enzyme.create_action_at( offset )
end
end
# FIXME VerticalCutRange should really be called VerticalAndHorizontalCutRange
# * all_enzyme_actions is now full of EnzymeActions at specific locations across
# the sequence.
# * all_enzyme_actions will now be examined to see if any EnzymeActions may
# conflict with one another, and if they do they'll be made note of in
# indicies_of_sometimes_cut. They will then be remove FIXME
# * a conflict occurs if another enzyme's bind site is compromised do due
# to another enzyme's cut. Enzyme's bind sites may overlap and not be
# competitive, however neither bind site may be part of the other
# enzyme's cut or else they do become competitive.
#
# Take current EnzymeAction's entire bind site and compare it to all other
# EzymeAction's cut ranges. Only look for vertical cuts as boundaries
# since trailing horizontal cuts would have no influence on the bind site.
#
# If example Enzyme A makes this cut pattern (cut range 2..5):
#
# 0 1 2|3 4 5 6 7
# +-----+
# 0 1 2 3 4 5|6 7
#
# Then the bind site (and EnzymeAction range) for Enzyme B would need it's
# right side to be at index 2 or less, or it's left side to be 6 or greater.
competition_indexes = Set.new
all_enzyme_actions[0..-2].each_with_index do |current_enzyme_action, i|
next if competition_indexes.include? i
next if current_enzyme_action.cut_ranges.empty? # no cuts, some enzymes are like this (ex. CjuI)
all_enzyme_actions[i+1..-1].each_with_index do |comparison_enzyme_action, j|
j += (i + 1)
next if competition_indexes.include? j
next if comparison_enzyme_action.cut_ranges.empty? # no cuts
if (current_enzyme_action.right <= comparison_enzyme_action.cut_ranges.min_vertical) or
(current_enzyme_action.left > comparison_enzyme_action.cut_ranges.max_vertical)
# no conflict
else
competition_indexes += [i, j] # merge both indexes into the flat set
end
end
end
sometimes_cut = all_enzyme_actions.values_at( *competition_indexes )
always_cut = all_enzyme_actions
always_cut.delete_if {|x| sometimes_cut.include? x }
[sometimes_cut, always_cut]
end
# Returns an +Array+ of the match indicies of a +RegExp+ to a string.
#
# Example:
#
# find_match_locations('abccdefeg', /[ce]/) # => [2,3,5,7]
#
# ---
# *Arguments*
# * +string+: The string to scan
# * +re+: A RegExp to use
# *Returns*:: +Array+ with indicies of match locations
def find_match_locations( string, re )
md = string.match( re )
locations = []
counter = 0
while md
# save the match index relative to the original string
locations << (counter += md.begin(0))
# find the next match
md = string[ (counter += 1)..-1 ].match( re )
end
locations
end
end # Analysis
end # RestrictionEnzyme
end # Bio
|