File: aligned_strands.rb

package info (click to toggle)
ruby-bio 2.0.6-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 7,108 kB
  • sloc: ruby: 68,331; perl: 13; makefile: 11; sh: 1
file content (134 lines) | stat: -rw-r--r-- 4,300 bytes parent folder | download | duplicates (7)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
#
# bio/util/restriction_enzyme/double_stranded/aligned_strands.rb - Align two SingleStrand objects
#
# Author::    Trevor Wennblom  <mailto:trevor@corevx.com>
# Copyright:: Copyright (c) 2005-2007 Midwinter Laboratories, LLC (http://midwinterlabs.com)
# License::   The Ruby License
#

module Bio

require 'bio/util/restriction_enzyme' unless const_defined?(:RestrictionEnzyme)

class RestrictionEnzyme
class DoubleStranded

# Align two SingleStrand objects and return a Result
# object with +primary+ and +complement+ accessors.
#
class AlignedStrands
  extend CutSymbol
  extend StringFormatting

  # Creates a new object.
  # ---
  # *Returns*:: Bio::RestrictionEnzyme::DoubleStranded::AlignedStrands object
  def initialize; super; end

  # The object returned for alignments
  Result = Struct.new(:primary, :complement)

  # Pad and align two String objects without cut symbols.
  #
  # This will look for the sub-sequence without left and right 'n' padding
  # and re-apply 'n' padding to both strings on both sides equal to the 
  # maximum previous padding on that side.
  #
  # The sub-sequences stripped of left and right 'n' padding must be of equal
  # length.
  #
  # Example:
  #   AlignedStrands.align('nngattacannnnn', 'nnnnnctaatgtnn') # => 
  #    <struct Bio::RestrictionEnzyme::DoubleStranded::AlignedStrands::Result
  #      primary="nnnnngattacannnnn",
  #      complement="nnnnnctaatgtnnnnn">
  #
  # ---
  # *Arguments*
  # * +a+: Primary strand
  # * +b+: Complementary strand
  # *Returns*:: +Result+ object with equal padding on both strings
  def self.align(a, b)
    a = a.to_s
    b = b.to_s
    validate_input( strip_padding(a), strip_padding(b) )
    left = [left_padding(a), left_padding(b)].sort.last
    right = [right_padding(a), right_padding(b)].sort.last

    p = left + strip_padding(a) + right
    c = left + strip_padding(b) + right
    Result.new(p,c)
  end

  # Pad and align two String objects with cut symbols.
  #
  # Example:
  #   AlignedStrands.with_cuts('nngattacannnnn', 'nnnnnctaatgtnn', [0, 10, 12], [0, 2, 12]) # => 
  #     <struct Bio::RestrictionEnzyme::DoubleStranded::AlignedStrands::Result
  #       primary="n n n n^n g a t t a c a n n^n n^n",
  #       complement="n^n n^n n c t a a t g t n^n n n n">
  #
  # Notes:
  # * To make room for the cut symbols each nucleotide is spaced out.
  # * This is meant to be able to handle multiple cuts and completely
  #   unrelated cutsites on the two strands, therefore no biological
  #   algorithm assumptions (shortcuts) are made.
  #
  # The sequences stripped of left and right 'n' padding must be of equal
  # length.
  #
  # ---
  # *Arguments*
  # * +a+: Primary sequence
  # * +b+: Complementary sequence
  # * +a_cuts+: Primary strand cut locations in 0-based index notation
  # * +b_cuts+: Complementary strand cut locations in 0-based index notation
  # *Returns*:: +Result+ object with equal padding on both strings and spacing between bases
  def self.align_with_cuts(a,b,a_cuts,b_cuts)
    a = a.to_s
    b = b.to_s
    validate_input( strip_padding(a), strip_padding(b) )

    a_left, a_right = left_padding(a), right_padding(a)
    b_left, b_right = left_padding(b), right_padding(b)

    left_diff = a_left.length - b_left.length
    right_diff = a_right.length - b_right.length

    (right_diff > 0) ? (b_right += 'n' * right_diff) : (a_right += 'n' * right_diff.abs)

    a_adjust = b_adjust = 0

    if left_diff > 0
      b_left += 'n' * left_diff
      b_adjust = left_diff
    else
      a_left += 'n' * left_diff.abs
      a_adjust = left_diff.abs
    end

    a = a_left + strip_padding(a) + a_right
    b = b_left + strip_padding(b) + b_right

    a_cuts.sort.reverse.each { |c| a.insert(c+1+a_adjust, cut_symbol) }
    b_cuts.sort.reverse.each { |c| b.insert(c+1+b_adjust, cut_symbol) }

    Result.new( add_spacing(a), add_spacing(b) )
  end

  #########
  protected
  #########

  def self.validate_input(a,b)
    unless a.size == b.size
      err = "Result sequences are not the same size.  Does not align sequences with differing lengths after strip_padding.\n"
      err += "#{a.size}, #{a.inspect}\n"
      err += "#{b.size}, #{b.inspect}"
      raise ArgumentError, err
    end
  end
end # AlignedStrands
end # DoubleStranded
end # RestrictionEnzyme
end # Bio