File: clustal.Rd

package info (click to toggle)
r-cran-ape 5.8-1-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 3,676 kB
  • sloc: ansic: 7,676; cpp: 116; sh: 17; makefile: 2
file content (143 lines) | stat: -rw-r--r-- 5,817 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
\name{clustal}
\alias{clustal}
\alias{clustalomega}
\alias{muscle}
\alias{muscle5}
\alias{tcoffee}
\alias{efastats}
\alias{letterconf}
\title{Multiple Sequence Alignment with External Applications}
\description{
  These functions call their respective program from \R to align a set
  of nucleotide sequences of class \code{"DNAbin"} or
  \code{"AAbin"}. The application(s) must be installed seperately and it
  is highly recommended to do this so that the executables are in a
  directory located on the PATH of the system.

  This version includes an experimental version of \code{muscle5} which
  calls MUSCLE5 (see the link to the documentation in the References
  below); \code{muscle} still calls MUSCLE version 3. Note that the
  executable of MUSCLE5 is also named `muscle' by the default
  compilation setting.

  The functions \code{efastats} and \code{letterconf} require MUSCLE5.
}
\usage{
clustal(x, y, guide.tree, pw.gapopen = 10, pw.gapext = 0.1,
        gapopen = 10, gapext = 0.2, exec = NULL, MoreArgs = "",
        quiet = TRUE, original.ordering = TRUE, file)
clustalomega(x, y, guide.tree, exec = NULL,MoreArgs = "",
              quiet = TRUE, original.ordering = TRUE, file)
muscle(x, y, guide.tree, exec, MoreArgs = "",
        quiet = TRUE, original.ordering = TRUE, file)
muscle5(x, exec = "muscle", MoreArgs = "", quiet = FALSE,
        file, super5 = FALSE, mc.cores = 1)
tcoffee(x, exec = "t_coffee", MoreArgs = "", quiet = TRUE,
        original.ordering = TRUE)

efastats(X, exec = "muscle", quiet = FALSE)
letterconf(X, exec = "muscle")
}
\arguments{
  \item{x}{an object of class \code{"DNAbin"} or \code{"AAbin"} (can be
    missing).}
  \item{y}{an object of class \code{"DNAbin"} or \code{"AAbin"} used for
    profile alignment (can be missing).}
  \item{guide.tree}{guide tree, an object of class \code{"phylo"} (can
    be missing).}
  \item{pw.gapopen, pw.gapext}{gap opening and gap extension penalties
    used by Clustal during pairwise alignments.}
  \item{gapopen, gapext}{idem for global alignment.}
  \item{exec}{a character string giving the name of the program, with
    its path if necessary. \code{clustal} tries to guess this argument
    depending on the operating system (see details).}
  \item{MoreArgs}{a character string giving additional options.}
  \item{quiet}{a logical: the default is to not print on \R's console the
    messages from the external program.}
  \item{original.ordering}{a logical specifying whether to return the
    aligned sequences in the same order than in \code{x} (\code{TRUE} by
    default).}
  \item{file}{a file with its path if results should be stored (can be
    missing).}
  \item{super5}{a logical value. By default, the PPP algorithm is used.}
  \item{mc.cores}{the number of cores to be used by MUSCLE5.}
  \item{X}{a list with several alignments of the same sequences with
    all with the same row order.}
}
\details{
  It is highly recommended to install the executables properly so that
  they are in a directory located on the PATH (i.e., accessible from any
  other directory). Alternatively, the full path to the executable
  may be given (e.g., \code{exec = "~/muscle/muscle"}), or a (symbolic)
  link may be copied in the working directory. For Debian and its
  derivatives (e.g., Ubuntu), it is recommended to use the binaries
  distributed by Debian.

  \code{clustal} tries to guess the name of the executable program
  depending on the operating system. Specifically, the followings are
  used: ``clustalw'' under Linux, ``clustalw2'' under MacOS, and
  ``clustalw2.exe'' under Windows. For \code{clustalomega},
  ``clustalo[.exe]'' is the default on all systems (with no specific
  path).

  When called without arguments (i.e., \code{clustal()}, \dots), the
  function prints the options of the program which may be passed to
  \code{MoreArgs}.

  Since \pkg{ape} 5.1, \code{clustal}, \code{clustalomega}, and
  \code{muscle} can align AA sequences as well as DNA sequences.
}
\value{
  an object of class \code{"DNAbin"} or \code{"AAbin"} with the aligned
  sequences.

  \code{efastats} returns a data frame.

  \code{letterconf} opens the default Web brower.
}
\references{
  Chenna, R., Sugawara, H., Koike, T., Lopez, R., Gibson, T. J.,
  Higgins, D. G. and Thompson, J. D. (2003) Multiple sequence alignment
  with the Clustal series of programs. \emph{Nucleic Acids Research}
  \bold{31}, 3497--3500. \url{http://www.clustal.org/}

  Edgar, R. C. (2004) MUSCLE: Multiple sequence alignment with high
  accuracy and high throughput. \emph{Nucleic Acids Research},
  \bold{32}, 1792--1797.
  \url{http://www.drive5.com/muscle/muscle_userguide3.8.html}

  Notredame, C., Higgins, D. and Heringa, J. (2000) T-Coffee: A novel
  method for multiple sequence alignments. \emph{Journal of Molecular
  Biology}, \bold{302}, 205--217.
  \url{https://tcoffee.org/}

  Sievers, F., Wilm, A., Dineen, D., Gibson, T. J., Karplus, K., Li, W.,
  Lopez, R., McWilliam, H., Remmert, M., S\"oding, J., Thompson,
  J. D. and Higgins, D. G. (2011) Fast, scalable generation of
  high-quality protein multiple sequence alignments using Clustal
  Omega. \emph{Molecular Systems Biology}, \bold{7}, 539.
  \url{http://www.clustal.org/}

  \url{https://drive5.com/muscle5/}
}
\author{Emmanuel Paradis, Franz Krah}
\seealso{
  \code{\link{image.DNAbin}}, \code{\link{del.gaps}},
  \code{\link{all.equal.DNAbin}}, \code{\link{alex}},
  \code{\link{alview}}, \code{\link{checkAlignment}}
}
\examples{
\dontrun{
### display the options:
clustal()
clustalomega()
muscle()
tcoffee()

data(woodmouse)
### open gaps more easily:
clustal(woodmouse, pw.gapopen = 1, pw.gapext = 1)
### T-Coffee requires negative values (quite slow; muscle() is much faster):
tcoffee(woodmouse,  MoreArgs = "-gapopen=-10 -gapext=-2")
}}
\keyword{manip}