File: makeTxDbPackage.Rd

package info (click to toggle)
r-bioc-txdbmaker 1.2.1%2Bds-2
  • links: PTS, VCS
  • area: main
  • in suites: sid, trixie
  • size: 3,168 kB
  • sloc: makefile: 2
file content (246 lines) | stat: -rw-r--r-- 9,983 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
\name{makeTxDbPackage}

\alias{makeTxDbPackage}
\alias{makeTxDbPackageFromUCSC}
\alias{makeFDbPackageFromUCSC}
\alias{makeTxDbPackageFromBiomart}
\alias{supportedMiRBaseBuildValues}
\alias{makePackageName}

\title{
  Making a TxDb package from annotations available at the
  UCSC Genome Browser, biomaRt or from another source.
}
\description{
  A TxDb package is an annotation package containing a
  \link[GenomicFeatures]{TxDb} object.

  The \code{makeTxDbPackageFromUCSC} function allows the user
  to make a \link[GenomicFeatures]{TxDb} package from transcript annotations
  available at the UCSC Genome Browser.

  The \code{makeTxDbPackageFromBiomart} function allows the user
  to do the same thing as \code{makeTxDbPackageFromUCSC} except that the
  annotations originate from biomaRt.

  Finally, the \code{makeTxDbPackage} function allows the user to make a
  \link[GenomicFeatures]{TxDb} package directly from a
  \link[GenomicFeatures]{TxDb} object.
}

\usage{
makeTxDbPackageFromUCSC(
    version=,
    maintainer,
    author,
    destDir=".",
    license="Artistic-2.0",
    genome="hg19",
    tablename="knownGene",
    transcript_ids=NULL,
    circ_seqs=NULL,
    goldenPath.url=getOption("UCSC.goldenPath.url"),
    taxonomyId=NA,
    miRBaseBuild=NA)

makeFDbPackageFromUCSC(
    version,
    maintainer,
    author,
    destDir=".",
    license="Artistic-2.0",
    genome="hg19",
    track="tRNAs",
    tablename="tRNAs",
    columns = UCSCFeatureDbTableSchema(genome, track, tablename),
    url="https://genome.ucsc.edu/cgi-bin/",
    goldenPath.url=getOption("UCSC.goldenPath.url"),
    chromCol=NULL,
    chromStartCol=NULL,
    chromEndCol=NULL,
    taxonomyId=NA)

makeTxDbPackageFromBiomart(
    version,
    maintainer,
    author,
    destDir=".",
    license="Artistic-2.0",
    biomart="ENSEMBL_MART_ENSEMBL",
    dataset="hsapiens_gene_ensembl",
    transcript_ids=NULL,
    circ_seqs=NULL,
    filter=NULL,
    id_prefix="ensembl_",
    host="https://www.ensembl.org",
    port,
    taxonomyId=NA,
    miRBaseBuild=NA)

makeTxDbPackage(txdb,
                version,
                maintainer,
                author,
                destDir=".",
                license="Artistic-2.0",
                pkgname=NULL,
                provider=NULL,
                providerVersion=NULL)

supportedMiRBaseBuildValues()

makePackageName(txdb)
}
\arguments{
  \item{version}{What is the version number for this package?}
  \item{maintainer}{Who is the package maintainer? (must include email
    to be valid). Should be a \code{\link{person}} object, or something
    coercible to one, like a string. May be omitted if the \code{author}
    argument is a \code{person} containing someone with the maintainer role.}
  \item{author}{Who is the creator of this package? Should be
    a \code{\link{person}} object, or something coercible to one, like a
    character vector of names. The \code{maintainer} argument will be
    merged into this list.}
  \item{destDir}{A path where the package source should be assembled.}
  \item{license}{What is the license (and it's version)}
  \item{biomart}{which BioMart database to use.
    Get the list of all available BioMart databases with the
    \code{\link[biomaRt]{listMarts}} function from the biomaRt
    package. See the details section below for a list of BioMart
    databases with compatible transcript annotations.}
  \item{dataset}{which dataset from BioMart. For example:
    \code{"hsapiens_gene_ensembl"}, \code{"mmusculus_gene_ensembl"},
    \code{"dmelanogaster_gene_ensembl"}, \code{"celegans_gene_ensembl"}, etc
    in the ensembl database.
    See the examples section below for how to discover which datasets
    are available in a given BioMart database.}
  \item{genome}{name of a UCSC genome assembly e.g. \code{"hg19"} or
    \code{"panTro6"}. Get the list of UCSC genomes currently available
    with \code{\link[UCSC.utils]{list_UCSC_genomes}()[ , "genome"]}.}
  \item{track}{name of the UCSC track.  Use
    \code{supportedUCSCFeatureDbTracks} to get the list of available
        tracks for a particular genome}
  \item{tablename}{name of the UCSC table containing the transcript
    annotations to retrieve. Use the \code{\link{supportedUCSCtables}}
    utility function to get the list of tables known to work with
    \code{makeTxDbFromUCSC}.}
  \item{transcript_ids}{optionally, only retrieve transcript
    annotation data for the specified set of transcript ids.
    If this is used, then the meta information displayed for the
    resulting \link[GenomicFeatures]{TxDb} object will say 'Full dataset: no'.
    Otherwise it will say 'Full dataset: yes'.}
  \item{circ_seqs}{a character vector to list out which chromosomes
    should be marked as circular.}
  \item{filter}{Additional filters to use in the BioMart query. Must be
    a named list. An example is \code{filter=as.list(c(source="entrez"))}}
  \item{host}{The host URL of the BioMart. Defaults to https://www.ensembl.org.}
  \item{port}{The port to use in the HTTP communication with the host. This
    argument has been deprecated. It is handled by \code{useEnsembl}
    depending on the host input.}
  \item{id_prefix}{Specifies the prefix used in BioMart attributes. For
    example, some BioMarts may have an attribute specified as
    \code{"ensembl_transcript_id"} whereas others have the same attribute
    specified as \code{"transcript_id"}. Defaults to \code{"ensembl_"}.}
  \item{columns}{a named character vector to list out the names and
        types of the other columns that the downloaded track should
        have.  Use \code{UCSCFeatureDbTableSchema} to retrieve this
        information for a particular table.}
  \item{url,goldenPath.url}{use to specify the location of an
    alternate UCSC Genome Browser.}
  \item{chromCol}{If the schema comes back and the 'chrom' column has been
        labeled something other than 'chrom', use this argument to indicate
        what that column has been labeled as so we can properly designate it.
        This could happen (for example) with the knownGene track tables, which
        has no 'chromStart' or 'chromEnd' columns, but which DOES have columns
        that could reasonably substitute for these columns under particular
        circumstances.  Therefore we allow these three columns to have arguments
        so that their definition can be re-specified}
  \item{chromStartCol}{Same thing as chromCol, but for renames of 'chromStart'}
  \item{chromEndCol}{Same thing as chromCol, but for renames of 'chromEnd'}
  \item{txdb}{A \link[GenomicFeatures]{TxDb} object that represents a handle
    to a transcript database. This object type is what is returned by
    \code{makeTxDbFromUCSC}, \code{makeTxDbFromUCSC} or
    \code{makeTxDb}}
  \item{taxonomyId}{By default this value is NA and the organism
    provided (or inferred) will be used to look up the correct value for
    this.  But you can use this argument to override that and supply
    your own valid taxId here}
  \item{miRBaseBuild}{specify the string for the appropriate build
    Information from mirbase.db to use for microRNAs.  This can be
    learned by calling \code{supportedMiRBaseBuildValues}.  By default,
    this value will be set to \code{NA}, which will inactivate the
    \code{microRNAs} accessor.}
  \item{pkgname}{By default this value is NULL and does not need to be
    filled in (a package name will be generated for you).  But if you
    override this value, then the package and it's object will be
    instead named after this value.  Be aware that the standard rules
    for package names will apply, (so don't include spaces,
    underscores or dashes)}
  \item{provider}{If not given, a default is taken from the
    'Data source' field of the metadata table.}
  \item{providerVersion}{If not given, a default is taken from one of
    'UCSC table', 'BioMart version' or 'Data source' fields of the
    metadata table.}
}
\details{
  \code{makeTxDbPackageFromUCSC} is a convenience function that calls
    both the \code{\link{makeTxDbFromUCSC}} and the
    \code{\link{makeTxDbPackage}} functions.  The
    \code{makeTxDbPackageFromBiomart} follows a similar pattern and
    calls the \code{\link{makeTxDbFromBiomart}} and
    \code{\link{makeTxDbPackage}} functions.
  \code{supportedMiRBaseBuildValues} is a convenience function that will
  list all the possible values for the miRBaseBuild argument.
  \code{makePackageName} creates a package name from a TxDb object.
  This function is also used by OrganismDbi.
}

\value{A \link[GenomicFeatures]{TxDb} object.}

\author{
  M. Carlson
}

\seealso{
  \code{\link{makeTxDbFromUCSC}},
  \code{\link{makeTxDbFromBiomart}},
  \code{\link{makeTxDb}},
  \code{\link[UCSC.utils]{list_UCSC_genomes}}
}

\examples{
## First consider relevant helper/discovery functions:
## Get the list of tables known to work with makeTxDbPackageFromUCSC():
supportedUCSCtables(genome="hg19")

## Can also list all the possible values for the miRBaseBuild argument:
supportedMiRBaseBuildValues()

## Next are examples of actually building a package:
\donttest{
## Makes a transcript package for Yeast from the ensGene table at UCSC:
makeTxDbPackageFromUCSC(version="0.01",
                        maintainer="Some One <so@someplace.org>",
                        author="Some One <so@someplace.com>",
                        genome="sacCer2",
                        tablename="ensGene")

## Makes a transcript package from Human by using biomaRt and limited to a
## small subset of the transcripts.
transcript_ids <- c(
    "ENST00000400839",
    "ENST00000400840",
    "ENST00000478783",
    "ENST00000435657",
    "ENST00000268655",
    "ENST00000313243",
    "ENST00000341724")

makeTxDbPackageFromBiomart(version="0.01",
                           maintainer="Some One <so@someplace.org>",
                           author="Some One <so@someplace.com>",
                           transcript_ids=transcript_ids)

}

}