1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134
|
#
# bio/util/restriction_enzyme/double_stranded/aligned_strands.rb - Align two SingleStrand objects
#
# Author:: Trevor Wennblom <mailto:trevor@corevx.com>
# Copyright:: Copyright (c) 2005-2007 Midwinter Laboratories, LLC (http://midwinterlabs.com)
# License:: The Ruby License
#
module Bio
require 'bio/util/restriction_enzyme' unless const_defined?(:RestrictionEnzyme)
class RestrictionEnzyme
class DoubleStranded
# Align two SingleStrand objects and return a Result
# object with +primary+ and +complement+ accessors.
#
class AlignedStrands
extend CutSymbol
extend StringFormatting
# Creates a new object.
# ---
# *Returns*:: Bio::RestrictionEnzyme::DoubleStranded::AlignedStrands object
def initialize; super; end
# The object returned for alignments
Result = Struct.new(:primary, :complement)
# Pad and align two String objects without cut symbols.
#
# This will look for the sub-sequence without left and right 'n' padding
# and re-apply 'n' padding to both strings on both sides equal to the
# maximum previous padding on that side.
#
# The sub-sequences stripped of left and right 'n' padding must be of equal
# length.
#
# Example:
# AlignedStrands.align('nngattacannnnn', 'nnnnnctaatgtnn') # =>
# <struct Bio::RestrictionEnzyme::DoubleStranded::AlignedStrands::Result
# primary="nnnnngattacannnnn",
# complement="nnnnnctaatgtnnnnn">
#
# ---
# *Arguments*
# * +a+: Primary strand
# * +b+: Complementary strand
# *Returns*:: +Result+ object with equal padding on both strings
def self.align(a, b)
a = a.to_s
b = b.to_s
validate_input( strip_padding(a), strip_padding(b) )
left = [left_padding(a), left_padding(b)].sort.last
right = [right_padding(a), right_padding(b)].sort.last
p = left + strip_padding(a) + right
c = left + strip_padding(b) + right
Result.new(p,c)
end
# Pad and align two String objects with cut symbols.
#
# Example:
# AlignedStrands.with_cuts('nngattacannnnn', 'nnnnnctaatgtnn', [0, 10, 12], [0, 2, 12]) # =>
# <struct Bio::RestrictionEnzyme::DoubleStranded::AlignedStrands::Result
# primary="n n n n^n g a t t a c a n n^n n^n",
# complement="n^n n^n n c t a a t g t n^n n n n">
#
# Notes:
# * To make room for the cut symbols each nucleotide is spaced out.
# * This is meant to be able to handle multiple cuts and completely
# unrelated cutsites on the two strands, therefore no biological
# algorithm assumptions (shortcuts) are made.
#
# The sequences stripped of left and right 'n' padding must be of equal
# length.
#
# ---
# *Arguments*
# * +a+: Primary sequence
# * +b+: Complementary sequence
# * +a_cuts+: Primary strand cut locations in 0-based index notation
# * +b_cuts+: Complementary strand cut locations in 0-based index notation
# *Returns*:: +Result+ object with equal padding on both strings and spacing between bases
def self.align_with_cuts(a,b,a_cuts,b_cuts)
a = a.to_s
b = b.to_s
validate_input( strip_padding(a), strip_padding(b) )
a_left, a_right = left_padding(a), right_padding(a)
b_left, b_right = left_padding(b), right_padding(b)
left_diff = a_left.length - b_left.length
right_diff = a_right.length - b_right.length
(right_diff > 0) ? (b_right += 'n' * right_diff) : (a_right += 'n' * right_diff.abs)
a_adjust = b_adjust = 0
if left_diff > 0
b_left += 'n' * left_diff
b_adjust = left_diff
else
a_left += 'n' * left_diff.abs
a_adjust = left_diff.abs
end
a = a_left + strip_padding(a) + a_right
b = b_left + strip_padding(b) + b_right
a_cuts.sort.reverse.each { |c| a.insert(c+1+a_adjust, cut_symbol) }
b_cuts.sort.reverse.each { |c| b.insert(c+1+b_adjust, cut_symbol) }
Result.new( add_spacing(a), add_spacing(b) )
end
#########
protected
#########
def self.validate_input(a,b)
unless a.size == b.size
err = "Result sequences are not the same size. Does not align sequences with differing lengths after strip_padding.\n"
err += "#{a.size}, #{a.inspect}\n"
err += "#{b.size}, #{b.inspect}"
raise ArgumentError, err
end
end
end # AlignedStrands
end # DoubleStranded
end # RestrictionEnzyme
end # Bio
|