File: string_formatting.rb

package info (click to toggle)
ruby-bio 2.0.6-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 7,108 kB
  • sloc: ruby: 68,331; perl: 13; makefile: 11; sh: 1
file content (110 lines) | stat: -rw-r--r-- 2,871 bytes parent folder | download | duplicates (7)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
#
# bio/util/restriction_enzyme/string_formatting.rb - Useful functions for string manipulation
#
# Author::    Trevor Wennblom  <mailto:trevor@corevx.com>
# Copyright:: Copyright (c) 2005-2007 Midwinter Laboratories, LLC (http://midwinterlabs.com)
# License::   The Ruby License
#

module Bio

require 'bio/util/restriction_enzyme' unless const_defined?(:RestrictionEnzyme)

class RestrictionEnzyme

module StringFormatting
  include CutSymbol
  extend CutSymbol

  # Return the sequence with spacing for alignment.  Does not add whitespace
  # around cut symbols.
  #
  # Example:
  #   pattern = 'n^ng^arraxt^n'
  #   add_spacing( pattern )      # => "n^n g^a r r a x t^n"
  #
  # ---
  # *Arguments*
  # * +seq+: sequence with cut symbols
  # * +cs+: (_optional_) Cut symbol along the string.  The reason this is
  #   definable outside of CutSymbol is that this is a utility function used
  #   to form vertical and horizontal cuts such as:
  #
  #     a|t g c
  #      +---+
  #     t a c|g
  # *Returns*:: +String+ sequence with single character distance between bases
  def add_spacing( seq, cs = cut_symbol )
    str = ''
    flag = false
    seq.each_byte do |c|
      c = c.chr
      if c == cs
        str += c
        flag = false
      elsif flag
        str += ' ' + c
      else
        str += c
        flag = true
      end
    end
    str
  end

  # Remove extraneous nucleic acid wildcards ('n' padding) from the
  # left and right sides
  #
  # ---
  # *Arguments*
  # * +s+: sequence with extraneous 'n' padding
  # *Returns*:: +String+ sequence without 'n' padding on the sides
  def strip_padding( s )
    if s[0].chr == 'n'
      s =~ %r{(n+)(.+)}
      s = $2
    end
    if s[-1].chr == 'n'
      s =~ %r{(.+?)(n+)$}
      s = $1
    end
    s
  end

  # Remove extraneous nucleic acid wildcards ('n' padding) from the
  # left and right sides and remove cut symbols
  #
  # ---
  # *Arguments*
  # * +s+: sequence with extraneous 'n' padding and cut symbols
  # *Returns*:: +String+ sequence without 'n' padding on the sides or cut symbols
  def strip_cuts_and_padding( s )
    strip_padding( s.tr(cut_symbol, '') )
  end

  # Return the 'n' padding on the left side of the strand
  #
  # ---
  # *Arguments*
  # * +s+: sequence with extraneous 'n' padding on the left side of the strand
  # *Returns*:: +String+ the 'n' padding from the left side
  def left_padding( s )
    s =~ %r{^n+}
    ret = $&
    ret ? ret : ''  # Don't pass nil values
  end

  # Return the 'n' padding on the right side of the strand
  #
  # ---
  # *Arguments*
  # * +s+: sequence with extraneous 'n' padding on the right side of the strand
  # *Returns*:: +String+ the 'n' padding from the right side
  def right_padding( s )
    s =~ %r{n+$}
    ret = $&
    ret ? ret : ''  # Don't pass nil values
  end
end # StringFormatting
end # RestrictionEnzyme
end # Bio