1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320
|
#
# bio/util/restriction_enzyme/double_stranded.rb - DoubleStranded restriction enzyme sequence
#
# Author:: Trevor Wennblom <mailto:trevor@corevx.com>
# Copyright:: Copyright (c) 2005-2007 Midwinter Laboratories, LLC (http://midwinterlabs.com)
# License:: The Ruby License
#
module Bio
require 'bio/util/restriction_enzyme' unless const_defined?(:RestrictionEnzyme)
class RestrictionEnzyme
# A pair of SingleStrand and SingleStrandComplement objects with methods to
# add utility to their relation.
#
# = Notes
# * This is created by Bio::RestrictionEnzyme.new for convenience.
# * The two strands accessible are +primary+ and +complement+.
# * SingleStrand methods may be used on DoubleStranded and they will be passed to +primary+.
#
#
# FIXME needs better docs
class DoubleStranded
autoload :AlignedStrands, 'bio/util/restriction_enzyme/double_stranded/aligned_strands'
autoload :CutLocations, 'bio/util/restriction_enzyme/double_stranded/cut_locations'
autoload :CutLocationPair, 'bio/util/restriction_enzyme/double_stranded/cut_location_pair'
autoload :CutLocationsInEnzymeNotation, 'bio/util/restriction_enzyme/double_stranded/cut_locations_in_enzyme_notation'
autoload :CutLocationPairInEnzymeNotation, 'bio/util/restriction_enzyme/double_stranded/cut_location_pair_in_enzyme_notation'
include CutSymbol
extend CutSymbol
include StringFormatting
extend StringFormatting
# The primary strand
attr_reader :primary
# The complement strand
attr_reader :complement
# Cut locations in 0-based index format, DoubleStranded::CutLocations object
attr_reader :cut_locations
# Cut locations in enzyme index notation, DoubleStranded::CutLocationsInEnzymeNotation object
attr_reader :cut_locations_in_enzyme_notation
# [+erp+] One of three possible parameters: The name of an enzyme, a REBASE::EnzymeEntry object, or a nucleotide pattern with a cut mark.
# [+raw_cut_pairs+] The cut locations in enzyme index notation.
#
# Enzyme index notation:: 1.._n_, value before 1 is -1
#
# Examples of the allowable cut locations for +raw_cut_pairs+ follows. 'p' and
# 'c' refer to a cut location on the 'p'rimary and 'c'omplement strands.
#
# 1, [3,2], [20,22], 57
# p, [p,c], [p, c], p
#
# Which is the same as:
#
# 1, (3..2), (20..22), 57
# p, (p..c), (p..c), p
#
# Examples of partial cuts:
# 1, [nil,2], [20,nil], 57
# p, [p, c], [p, c], p
#
def initialize(erp, *raw_cut_pairs)
# 'erp' : 'E'nzyme / 'R'ebase / 'P'attern
k = erp.class
if k == Bio::REBASE::EnzymeEntry
# Passed a Bio::REBASE::EnzymeEntry object
unless raw_cut_pairs.empty?
err = "A Bio::REBASE::EnzymeEntry object was passed, however the cut locations contained values. Ambiguous or redundant.\n"
err += "inspect = #{raw_cut_pairs.inspect}"
raise ArgumentError, err
end
initialize_with_rebase( erp )
elsif erp.kind_of? String
# Passed something that could be an enzyme pattern or an anzyme name
# Decide if this String is an enzyme name or a pattern
if Bio::RestrictionEnzyme.enzyme_name?( erp )
# FIXME we added this to rebase...
# Check if it's a known name
known_enzyme = false
known_enzyme = true if Bio::RestrictionEnzyme.rebase[ erp ]
# Try harder to find the enzyme
unless known_enzyme
re = %r"^#{erp}$"i
Bio::RestrictionEnzyme.rebase.each { |name, v| (known_enzyme = true; erp = name; break) if name =~ re }
end
if known_enzyme
initialize_with_rebase( Bio::RestrictionEnzyme.rebase[erp] )
else
raise IndexError, "No entry found for enzyme named '#{erp}'"
end
else
# Not an enzyme name, so a pattern is assumed
if erp =~ re_cut_symbol
initialize_with_pattern_and_cut_symbols( erp )
else
initialize_with_pattern_and_cut_locations( erp, raw_cut_pairs )
end
end
elsif k == NilClass
err = "Passed a nil value. Perhaps you tried to pass a Bio::REBASE::EnzymeEntry that does not exist?\n"
err += "inspect = #{erp.inspect}"
raise ArgumentError, err
else
err = "I don't know what to do with class #{k} for erp.\n"
err += "inspect = #{erp.inspect}"
raise ArgumentError, err
end
end
# See AlignedStrands.align
def aligned_strands
AlignedStrands.align(@primary.pattern, @complement.pattern)
end
# See AlignedStrands.align_with_cuts
def aligned_strands_with_cuts
AlignedStrands.align_with_cuts(@primary.pattern, @complement.pattern, @primary.cut_locations, @complement.cut_locations)
end
# Returns +true+ if the cut pattern creates blunt fragments.
# (opposite of sticky)
def blunt?
as = aligned_strands_with_cuts
ary = [as.primary, as.complement]
ary.collect! { |seq| seq.split( cut_symbol ) }
# convert the cut sections to their lengths
ary.each { |i| i.collect! { |c| c.length } }
ary[0] == ary[1]
end
# Returns +true+ if the cut pattern creates sticky fragments.
# (opposite of blunt)
def sticky?
!blunt?
end
# Takes a RestrictionEnzyme object and a numerical offset to the sequence and
# returns an EnzymeAction
#
# +restriction_enzyme+:: RestrictionEnzyme
# +offset+:: Numerical offset of where the enzyme action occurs on the seqeunce
def create_action_at( offset )
# x is the size of the fully aligned sequence with maximum padding needed
# to make a match on the primary and complement strand.
#
# For example -
# Note how EcoRII needs extra padding on the beginning and ending of the
# sequence 'ccagg' to make the match since the cut must occur between
# two nucleotides and can not occur on the very end of the sequence.
#
# EcoRII:
# :blunt: "0"
# :c2: "5"
# :c4: "0"
# :c1: "-1"
# :pattern: CCWGG
# :len: "5"
# :name: EcoRII
# :c3: "0"
# :ncuts: "2"
#
# -1 1 2 3 4 5
# 5' - n^c c w g g n - 3'
# 3' - n g g w c c^n - 5'
#
# (w == [at])
x = aligned_strands.primary.size
enzyme_action = EnzymeAction.new( offset,
offset + x-1,
offset,
offset + x-1)
@cut_locations.each do |cut_location_pair|
# cut_pair is a DoubleStranded::CutLocationPair
p, c = cut_location_pair.primary, cut_location_pair.complement
if c >= p
enzyme_action.add_cut_range(offset+p, nil, nil, offset+c)
else
enzyme_action.add_cut_range(nil, offset+p, offset+c, nil)
end
end
enzyme_action
end
# An EnzymeAction is a way of representing a potential effect that a
# RestrictionEnzyme may have on a nucleotide sequence, an 'action'.
#
# Multiple cuts in multiple locations on a sequence may occur in one
# 'action' if it is done by a single enzyme.
#
# An EnzymeAction is a series of locations that represents where the restriction
# enzyme will bind on the sequence, as well as what ranges are cut on the
# sequence itself. The complexity is due to the fact that our virtual
# restriction enzyme may create multiple segments from its cutting action,
# on which another restriction enzyme may operate upon.
#
# For example, the DNA sequence:
#
# 5' - G A A T A A A C G A - 3'
# 3' - C T T A T T T G C T - 5'
#
# When mixed with the restriction enzyme with the following cut pattern:
#
# 5' - A|A T A A A C|G - 3'
# +-+ +
# 3' - T T|A T T T G|C - 5'
#
# And also mixed with the restriction enzyme of the following cut pattern:
#
# 5' - A A|A C - 3'
# +-+
# 3' - T|T T G - 5'
#
# Would result in a DNA sequence with these cuts:
#
# 5' - G A|A T A A|A C|G A - 3'
# +-+ +-+ +
# 3' - C T T|A T|T T G|C T - 5'
#
# Or these separate "free-floating" sequences:
#
# 5' - G A - 3'
# 3' - C T T - 5'
#
# 5' - A T A A - 3'
# 3' - A T - 5'
#
# 5' - A C - 3'
# 3' - T T G - 5'
#
# 5' - G A - 3'
# 3' - C T - 5'
#
# This would be represented by two EnzymeActions - one for each
# RestrictionEnzyme.
#
# This is, however, subject to competition. If the second enzyme reaches
# the target first, the the first enzyme will not be able to find the
# appropriate bind site.
#
# FIXME complete these docs
#
# To initialize an EnzymeAction you must first instantiate it with the
# beginning and ending locations of where it will operate on a nucleotide
# sequence.
#
# Next the ranges of cu
#
# An EnzymeAction is
# Defines a single enzyme action, in this case being a range that correlates
# to the DNA sequence that may contain it's own internal cuts.
class EnzymeAction < Bio::RestrictionEnzyme::Range::SequenceRange
end
#########
protected
#########
def initialize_with_pattern_and_cut_symbols( s )
p_cl = SingleStrand::CutLocationsInEnzymeNotation.new( strip_padding(s) )
s = Bio::Sequence::NA.new( strip_cuts_and_padding(s) )
# * Reflect cuts that are in enzyme notation
# * 0 is not a valid enzyme index, decrement 0 and all negative
c_cl = p_cl.collect {|n| (n >= s.length or n < 1) ? ((s.length - n) - 1) : (s.length - n)}
create_cut_locations( p_cl.zip(c_cl) )
create_primary_and_complement( s, p_cl, c_cl )
end
def initialize_with_pattern_and_cut_locations( s, raw_cl )
create_cut_locations(raw_cl)
create_primary_and_complement( Bio::Sequence::NA.new(s), @cut_locations_in_enzyme_notation.primary, @cut_locations_in_enzyme_notation.complement )
end
def create_primary_and_complement(primary_seq, p_cuts, c_cuts)
@primary = SingleStrand.new( primary_seq, p_cuts )
@complement = SingleStrandComplement.new( primary_seq.forward_complement, c_cuts )
end
def create_cut_locations(raw_cl)
@cut_locations_in_enzyme_notation = CutLocationsInEnzymeNotation.new( *raw_cl.collect {|cl| CutLocationPairInEnzymeNotation.new(cl)} )
@cut_locations = @cut_locations_in_enzyme_notation.to_array_index
end
def initialize_with_rebase( e )
p_cl = [e.primary_strand_cut1, e.primary_strand_cut2]
c_cl = [e.complementary_strand_cut1, e.complementary_strand_cut2]
# If there's no cut in REBASE it's represented as a 0.
# 0 is an invalid index, it just means no cut.
p_cl.delete(0)
c_cl.delete(0)
raise IndexError unless p_cl.size == c_cl.size
initialize_with_pattern_and_cut_locations( e.pattern, p_cl.zip(c_cl) )
end
end # DoubleStranded
end # RestrictionEnzyme
end # Bio
|