File: getGeneLengthAndGCContent.Rd

package info (click to toggle)
r-bioc-edaseq 2.40.0%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: sid, trixie
  • size: 408 kB
  • sloc: makefile: 2
file content (65 lines) | stat: -rw-r--r-- 2,110 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
\name{getGeneLengthAndGCContent}
\alias{getGeneLengthAndGCContent}

\title{Get gene length and GC-content}

\description{
Automatically retrieves gene length and GC-content information from Biomart or org.db packages.
}

\usage{
getGeneLengthAndGCContent(id, org, mode=c("biomart", "org.db"))
}

\arguments{
  \item{id}{
    Character vector of one or more ENSEMBL or ENTREZ gene IDs.
  }
  \item{org}{
    Organism three letter code, e.g. 'hsa' for 'Homo sapiens'. 
    See also: http://www.genome.jp/kegg/catalog/org_list.html;
    In org.db mode, this can be also a specific genome assembly, 
    e.g. 'hg38' or 'sacCer3'.
  }
  \item{mode}{
    Mode to retrieve the information. Defaults to 'biomart'. See Details.
  }
}

\details{
    The 'biomart' mode is based on functionality from the biomaRt 
    packgage and retrieves the required information from the BioMart database.
    This is available for all ENSEMBL organisms and is typically most current, 
    but can be time-consuming when querying several thousand genes at a time.

    The 'org.db' mode uses organism-based annotation packages from
    Bioconductor. This is much faster than the 'biomart' mode, but is only
    available for selected model organism currently supported by  
    BioC annotation functionality. 

    Results for the same gene ID(s) can differ between both modes as they
    are based on different sources for the underlying genome assembly. While
    the 'biomart' mode uses the latest ENSEMBL version, the 'org.db' mode uses
    BioC annotation packages typically built from UCSC. 
}

\value{
A numeric matrix with two columns: gene length and GC-content.
}

\seealso{
    \code{\link{getSequence}} to retrieve a genomic sequence from BioMart,
    \code{\link{genes}} to extract genomic coordinates from a TxDb object,
    \code{\link{getSeq}} to extract genomic sequences from a BSgenome object,
    \code{\link{alphabetFrequency}} to calculate nucleotide frequencies.
}

\author{
Ludwig Geistlinger <Ludwig.Geistlinger@bio.ifi.lmu.de>
}

\examples{
\donttest{
getGeneLengthAndGCContent("ENSG00000012048", "hsa")
}
}