File: emboss.rb

package info (click to toggle)
bioruby 1.1.0-1
  • links: PTS
  • area: main
  • in suites: lenny
  • size: 3,880 kB
  • ctags: 5,416
  • sloc: ruby: 44,401; makefile: 58; sh: 4
file content (154 lines) | stat: -rw-r--r-- 4,511 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
#
# = bio/appl/emboss.rb - EMBOSS wrapper
# 
# Copyright::  Copyright (C) 2002, 2005 Toshiaki Katayama<k@bioruby.org>
# Copyright::  Copyright (C) 2006       Jan Aerts <jan.aerts@bbsrc.ac.uk>
# License::    The Ruby License
#
# $Id: emboss.rb,v 1.8 2007/04/05 23:35:39 trevor Exp $
#

module Bio

# == Description
#
# This file holds classes pertaining to the EMBOSS software suite.
#
# This class provides a wrapper for the applications of the EMBOSS suite, which 
# is a mature and stable collection of open-source applications that can handle
# a huge range of sequence formats.
# Applications include:
# * Sequence alignment
# * Rapid database searching with sequence patterns
# * Protein motif identification, including domain analysis
# * Nucleotide sequence pattern analysis---for example to identify CpG islands or repeats
# * Codon usage analysis for small genomes
# * Rapid identification of sequence patterns in large scale sequence sets
# * Presentation tools for publication
#
# See the emboss website for more information: http://emboss.sourceforge.net.
#
#
# == Usage
#
#  require 'bio'
#
#  # Suppose that you could get the sequence for XLRHODOP by running
#  # the EMBOSS command +seqret embl:xlrhodop+ on the command line.
#  # Then you can get the output of that command in a Bio::EMBOSS object
#  # by creating a new Bio::EMBOSS object and subsequently executing it.
#  xlrhodop = Bio::EMBOSS.new('seqret embl:xlrhodop')
#  puts xlrhodop.exec
#
#  # Or all in one go:
#  puts Bio::EMBOSS.new('seqret embl:xlrhodop').exec
#
#  # Similarly:
#  puts Bio::EMBOSS.new('transeq -sbegin 110 -send 1171 embl:xlrhodop')
#  puts Bio::EMBOSS.new('showfeat embl:xlrhodop').exec
#  puts Bio::EMBOSS.new('seqret embl:xlrhodop -osformat acedb').exec
#
#  # A shortcut exists for this two-step process for +seqret+ and +entret+.
#  puts Bio::EMBOSS.seqret('embl:xlrhodop')
#  puts Bio::EMBOSS.entret('embl:xlrhodop')
#
# == Pre-requisites
#
# You must have the EMBOSS suite installed locally. You can download from the
# project website (see References below).
#
# = Rereferences
#
# * http://emboss.sourceforge.net
# * Rice P, Longden I and Bleasby A. \
#    EMBOSS: the European Molecular Biology Open Software Suite. \
#    Trends Genet. 2000 Jun ; 16(6): 276-7 
#
class EMBOSS

  # Combines the initialization and execution for the emboss +seqret+ command.
  #
  #  puts Bio::EMBOSS.seqret('embl:xlrhodop')
  #
  # is equivalent to:
  #
  #  object = Bio::EMBOSS.new('seqret embl:xlrhodop')
  #  puts object.exec
  # ---
  # *Arguments*:
  # * (required) _command_: emboss command
  # *Returns*:: Bio::EMBOSS object
  def self.seqret(arg)
    str = self.retrieve('seqret', arg)
  end

  # Combines the initialization and execution for the emboss +entret+ command.
  #
  #  puts Bio::EMBOSS.entret('embl:xlrhodop')
  #
  # is equivalent to:
  #
  #  object = Bio::EMBOSS.new('entret embl:xlrhodop')
  #  puts object.exec
  # ---
  # *Arguments*:
  # * (required) _command_: emboss command
  # *Returns*:: Bio::EMBOSS object
  def self.entret(arg)
    str = self.retrieve('entret', arg)
  end

  # Initializes a new Bio::EMBOSS object. This provides a holder that can
  # subsequently be executed (see Bio::EMBOSS.exec). The object does _not_
  # hold any actual data when initialized.
  #
  #   e = Bio::EMBOSS.new('seqret embl:xlrhodop')
  #
  # For e to actually hold data, it has to be executed:
  #   puts e.exec
  #
  # For an overview of commands that can be used with this method, see the
  # emboss website.
  # ---
  # *Arguments*:
  # * (required) _command_: emboss command
  # *Returns*:: Bio::EMBOSS object
  def initialize(cmd_line)
    @cmd_line = cmd_line + ' -stdout -auto'
  end

  # A Bio::EMBOSS object has to be executed before it can return any result.
  #   obj_A = Bio::EMBOSS.new('transeq -sbegin 110 -send 1171 embl:xlrhodop')
  #   puts obj_A.result                   #=> nil
  #   obj_A.exec
  #   puts obj_A.result                   #=> a FASTA-formatted sequence
  #
  #   obj_B = Bio::EMBOSS.new('showfeat embl:xlrhodop')
  #   obj_B.exec
  #   puts obj_B.result
  def exec
    begin
      @io = IO.popen(@cmd_line, "w+")
      @result = @io.read
      return @result
    ensure
      @io.close
    end
  end
  
  # Pipe for the command
  attr_reader :io
  
  # Result of the executed command
  attr_reader :result

  private

  def self.retrieve(cmd, arg)
    cmd = [ cmd, arg, '-auto', '-stdout' ]
    return Bio::Command.query_command(cmd)
  end

end # EMBOSS

end # Bio