File: taxonomy.R

package info (click to toggle)
r-cran-rotl 3.1.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 1,384 kB
  • sloc: sh: 9; makefile: 5
file content (332 lines) | stat: -rw-r--r-- 10,603 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
##' Summary information about the Open Tree Taxonomy (OTT)
##'
##' Return metadata and information about the taxonomy
##' itself. Currently, the available metadata is fairly sparse, but
##' includes (at least) the version, and the location from which the
##' complete taxonomy source files can be downloaded.
##'
##' @title Information about the Open Tree Taxonomy
##' @param ... additional arguments to customize the API request (see
##'     \code{\link{rotl}} package documentation).
##' @return A list with the following properties:
##' \itemize{
##'
##'     \item {weburl} {String. The release page for this version
##'     of the taxonomy.}
##'
##'     \item {author} {String. The author string.}
##'
##'     \item {name} {String. The name of the taxonomy.}
##'
##'     \item {source} {String. The full identifying information for
##'     this version of the taxonomy.}
##'
##'     \item {version} {String. The version number of the taxonomy.}
##' }
##' @examples
##' \dontrun{
##' taxonomy_about()
##' }
##' @export
taxonomy_about <- function(...) {
  res <- .taxonomy_about(...)
  return(res)
}


##' Information about taxa.
##'
##' Given a vector of ott ids, \code{taxonomy_taxon_info} returns
##' information about the specified taxa.
##'
##' The functions \code{tax_rank}, \code{tax_name}, and
##' \code{synonyms} can extract this information from an object
##' created by the \code{taxonomy_taxon_info()}.
##'
##' @title Taxon information
##' @param ott_ids the ott ids of the taxon of interest (numeric or
##'     character containing only numbers)
##' @param include_children whether to include information about all
##'     the children of this taxon. Default \code{FALSE}.
##' @param include_lineage whether to include information about all
##'     the higher level taxa that include the \code{ott_ids}.
##'     Default \code{FALSE}.
##' @param include_terminal_descendants whether to include the list of
##'     terminal \code{ott_ids} contained in the \code{ott_ids}
##'     provided.
##' @param ... additional arguments to customize the API request (see
##'     \code{\link{rotl}} package documentation).
##' @param tax an object generated by the \code{taxonomy_taxon_info}
##'     function
##' @return \code{taxonomy_taxon_info} returns a list detailing
##'     information about the taxa. \code{tax_rank} and
##'     \code{tax_name} return a vector. \code{synonyms} returns a
##'     list whose elements are the synonyms for each of the
##'     \code{ott_id} requested.
##'
##' @seealso \code{\link{tnrs_match_names}} to obtain \code{ott_id}
##'     from a taxonomic name.
##' @examples
##' \dontrun{
##' req <- taxonomy_taxon_info(ott_id=515698)
##' tax_rank(req)
##' tax_name(req)
##' synonyms(req)
##' }
##' @export
taxonomy_taxon_info <- function(ott_ids, include_children = FALSE,
                                include_lineage = FALSE,
                                include_terminal_descendants = FALSE, ...) {
  res <- lapply(ott_ids, function(x) {
    .taxonomy_taxon_info(
      ott_id = x,
      include_children = include_children,
      include_lineage = include_lineage,
      include_terminal_descendants = include_terminal_descendants,
      ...
    )
  })
  names(res) <- ott_ids
  class(res) <- "taxon_info"
  return(res)
}


##' Given an ott id, return the inclusive taxonomic subtree descended
##' from the specified taxon.
##'
##' If the output of this function is exported to a file, the only
##' possible value for the \code{output_format} argument is
##' \dQuote{\code{newick}}. If the file provided already exists, it
##' will be silently overwritten.
##'
##' @title Taxonomy subtree
##' @param ott_id The ott id of the taxon of interest.
##' @param output_format the format of the object to be returned. See
##'     the \sQuote{Return} section.
##' @param label_format Character. Defines the label type; one of
##'     \dQuote{\code{name}}, \dQuote{\code{id}}, or
##'      \dQuote{\code{name_and_id}} (the default).
##' @param ... additional arguments to customize the API request (see
##'     \code{\link{rotl}} package documentation).
##' @param file the file name where to save the output of the
##'     function. Ignored unless \code{output_format} is set to
##'     \dQuote{\code{phylo}}.
##' @return If the \code{file} argument is missing: \itemize{
##'
##'     \item{\dQuote{\code{taxa}}} { a list of the taxa names
##'     (species) in slot \code{tip_label}, and higher-level taxonomy
##'     (e.g., families, genera) in slot \code{edge_label}, descending
##'     from the taxa corresponding to the \code{ott_id} provided. }
##'
##'     \item{\dQuote{\code{newick}}} { a character vector containing
##'     the newick formatted string corresponding to the taxonomic
##'     subtree for the \code{ott_id} provided. }
##'
##'     \item{\dQuote{\code{phylo}}} { an object of the class
##'     \code{phylo} from the \code{ape} package. }
##'
##'     \item{\dQuote{\code{raw}}} { the direct output from the API,
##'     i.e., a list with an element named \sQuote{newick} that
##'     contains the subtree as a newick formatted string. }
##'
##'     }
##'
##'     If a \code{file} argument is provided (and
##'     \code{output_format} is set to \dQuote{\code{phylo}}), a
##'     logical indicating whether the file was successfully created.
##'
##' @examples
##' \dontrun{
##' req <- taxonomy_subtree(ott_id=515698)
##' plot(taxonomy_subtree(ott_id=515698, output_format="phylo"))
##' }
##' @export
taxonomy_subtree <- function(ott_id = NULL,
                             output_format = c("taxa", "newick", "phylo", "raw"),
                             label_format = NULL, file, ...) {
  output_format <- match.arg(output_format)
  res <- .taxonomy_subtree(ott_id = ott_id, label_format = label_format, ...)
  if (!missing(file) && !identical(output_format, "newick")) {
    warning(
      sQuote("file"),
      " argument is ignored, you can only write newick tree strings to a file."
    )
  }
  if (identical(output_format, "raw")) {
    return(res)
  } else if (identical(output_format, "newick")) {
    res <- res$newick
    if (!missing(file)) {
      unlink(file)
      cat(res, file = file)
      invisible(return(file.exists(file)))
    }
  } else if (identical(output_format, "phylo")) {
    res <- phylo_from_otl(res)
  } else { ## in all other cases use tree_to_labels
    res <- tree_to_labels(res)
  }
  return(res)
}


##' Taxonomic Least Inclusive Common Ancestor (MRCA)
##'
##' Given a set of OTT ids, get the taxon that is the most recent common
##' ancestor (the MRCA) of all the identified taxa.
##'
##' @title Taxonomic MRCA
##' @param ott_ids a vector of ott ids for the taxa whose MRCA is to
##'     be found (numeric).
##' @param tax an object generated by the \code{taxonomy_mrca}
##'     function
##' @param ... additional arguments to customize the API request (see
##'     \code{\link{rotl}} package documentation).
##' @return \itemize{
##'
##'     \item{\code{taxonomy_mrca}} { returns a list about the
##'     taxonomic information relating to the MRCA for the ott_ids
##'     provided. }
##'
##'     \item{\code{tax_rank}} { returns a character vector of the
##'     taxonomic rank for the MRCA. }
##'
##'     \item{\code{tax_name}} { returns a character vector the
##'     Open Tree Taxonomy name for the MRCA. }
##'
##'     \item{\code{ott_id}} { returns a numeric vector of the ott id
##'     for the MRCA. }
##'
##' }
##' @examples
##' \dontrun{
##' req <- taxonomy_mrca(ott_ids=c(515698,590452,643717))
##' tax_rank(req)
##' tax_name(req)
##' ott_id(req)
##' }
##' @export
taxonomy_mrca <- function(ott_ids = NULL, ...) {
  res <- .taxonomy_mrca(ott_ids = ott_ids, ...)
  class(res) <- c("taxon_mrca", class(res))
  return(res)
}



### methods for taxonomy_taxon_info ---------------------------------------------

taxon_info_method_factory <- function(.f) {
  function(tax, ...) {
    res <- lapply(tax, .f)
    names(res) <- vapply(tax, .tax_unique_name, character(1))
    res <- add_otl_class(res, .f)
    res
  }
}

##' @export
##' @rdname taxonomy_taxon_info
tax_rank.taxon_info <- taxon_info_method_factory(.tax_rank)

##' @export
##' @rdname taxonomy_taxon_info
tax_name.taxon_info <- taxon_info_method_factory(.tax_name)

##' @export
##' @rdname taxonomy_taxon_info
unique_name.taxon_info <- taxon_info_method_factory(.tax_unique_name)

##' @export
##' @rdname taxonomy_taxon_info
synonyms.taxon_info <- taxon_info_method_factory(.tax_synonyms)

##' @export
##' @rdname taxonomy_taxon_info
ott_id.taxon_info <- taxon_info_method_factory(.tax_ott_id)

##' @export
##' @rdname taxonomy_taxon_info
tax_sources.taxon_info <- taxon_info_method_factory(.tax_sources)

##' @export
##' @rdname taxonomy_taxon_info
is_suppressed.taxon_info <- taxon_info_method_factory(.tax_is_suppressed)

##' @export
##' @rdname taxonomy_taxon_info
flags.taxon_info <- taxon_info_method_factory(.tax_flags)


### methods for taxonomy_mrca ---------------------------------------------------

taxon_mrca_method_factory <- function(.f) {
  function(tax, ...) {
    res <- list(.f(tax[["mrca"]]))
    names(res) <- .tax_unique_name(tax[["mrca"]])
    res <- add_otl_class(res, .f)
    res
  }
}

##' @export
##' @rdname taxonomy_mrca
tax_rank.taxon_mrca <- taxon_mrca_method_factory(.tax_rank)

##' @export
##' @rdname taxonomy_mrca
tax_name.taxon_mrca <- taxon_mrca_method_factory(.tax_name)

##' @export
##' @rdname taxonomy_mrca
ott_id.taxon_mrca <- taxon_mrca_method_factory(.tax_ott_id)

##' @export
##' @rdname taxonomy_mrca
unique_name.taxon_mrca <- taxon_mrca_method_factory(.tax_unique_name)

##' @export
##' @rdname taxonomy_mrca
tax_sources.taxon_mrca <- taxon_mrca_method_factory(.tax_sources)

##' @export
##' @rdname taxonomy_mrca
flags.taxon_mrca <- taxon_mrca_method_factory(.tax_flags)

##' @export
##' @rdname taxonomy_mrca
is_suppressed.taxon_mrca <- taxon_mrca_method_factory(.tax_is_suppressed)

### method for extracting higher taxonomy from taxonomy_taxon_info calls  -------

get_lineage <- function(tax) {
  check_lineage(tax)
  lg <- lapply(tax[["lineage"]], build_lineage)
  lg <- do.call("rbind", lg)
  as.data.frame(lg, stringsAsFactors = FALSE)
}

build_lineage <- function(x) {
  c(
    "rank" = .tax_rank(x),
    "name" = .tax_name(x),
    "unique_name" = .tax_unique_name(x),
    "ott_id" = .tax_ott_id(x)
  )
}

check_lineage <- function(tax) {
  if (!exists("lineage", tax)) {
    stop(
      "The object needs to be created using ",
      sQuote("include_lineage=TRUE")
    )
  }
}

##' @export
##' @rdname tax_lineage
tax_lineage.taxon_info <- function(tax, ...) {
  lapply(tax, get_lineage)
}