File: readKallisto.Rd

package info (click to toggle)
r-bioc-summarizedexperiment 1.12.0%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 1,752 kB
  • sloc: sh: 3; makefile: 2
file content (120 lines) | stat: -rw-r--r-- 3,987 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
\name{readKallisto}
\alias{readKallisto}
\alias{readKallistoBootstrap}
\alias{KALLISTO_ASSAYS}

\title{
  Input kallisto or kallisto bootstrap results.
}

\description{
  \code{readKallisto} inputs several kallisto output files into a single
  \code{SummarizedExperiment} instance, with rows corresponding to
  estimated transcript abundance and columns to
  samples. \code{readKallistoBootstrap} inputs kallisto bootstrap
  replicates of a single sample into a matrix of transcript x bootstrap
  abundance estimates.
}

\usage{
readKallisto(files,
    json = file.path(dirname(files), "run_info.json"), 
    h5 = any(grepl("\\\\.h5$", files)), what = KALLISTO_ASSAYS,
    as = c("SummarizedExperiment", "list", "matrix"))

readKallistoBootstrap(file, i, j)
}

\arguments{

  \item{files}{character() paths to kallisto \sQuote{abundance.tsv}
    output files. The assumption is that files are organized in the way
    implied by kallisto, with each sample in a distinct directory, and
    the directory containing files abundance.tsv, run_info.json, and
    perhaps abundance.h5.}

  \item{json}{character() vector of the same length as \code{files}
    specifying the location of JSON files produced by kallisto and
    containing information on the run. The default assumes that json
    files are in the same directory as the corresponding abundance
    file.}

  \item{h5}{character() vector of the same length as \code{files}
    specifying the location of HDF5 files produced by kallisto and
    containing bootstrap estimates. The default assumes that HDF5 files
    are in the same directory as the corresponding abundance file.}

  \item{what}{character() vector of kallisto per-sample outputs to be
    input. See KALLISTO_ASSAYS for available values.}

  \item{as}{character(1) specifying the output format. See \code{Value}
    for additional detail.}

  \item{file}{character(1) path to a single HDF5 output file.}

  \item{i, j}{integer() vector of row (\code{i}) and column (\code{j})
    indexes to input.}

}

\value{

  A \code{SummarizedExperiment}, \code{list}, or \code{matrix},
  depending on the value of argument \code{as}; by default a
  \code{SummarizedExperiment}. The \code{as="SummarizedExperiment"}
  \code{rowData(se)} the length of each transcript;
  \code{colData(se)} includes summary information on each sample,
  including the number of targets and bootstraps, the kallisto and index
  version, the start time and operating system call used to create the
  file. \code{assays()} contains one or more transcript x sample
  matrices of parameters estimated by kallisto (see
  \code{KALLISTO_ASSAYS}).

  \code{as="list"} return value contains information simillar to
  \code{SummarizedExperiment} with row, column and assay data as
  elements of the list without coordination of row and column
  annotations into an integrated data container. \code{as="matrix"}
  returns the specified assay as a simple \emph{R} matrix.

}

\references{
  \url{http://pachterlab.github.io/kallisto} software for quantifying
  transcript abundance.
}

\author{
  Martin Morgan \url{martin.morgan@roswellpark.org}
}

\examples{
outputs <- system.file(package="SummarizedExperiment", "extdata",
    "kallisto")
files <- dir(outputs, pattern="abundance.tsv", full=TRUE, recursive=TRUE)
stopifnot(all(file.exists(files)))

## default: input 'est_counts'
(se <- readKallisto(files, as="SummarizedExperiment"))
str(readKallisto(files, as="list"))
str(readKallisto(files, as="matrix"))

## available assays
KALLISTO_ASSAYS
## one or more assay
readKallisto(files, what=c("tpm", "eff_length"))

## alternatively: read hdf5 files
files <- sub(".tsv", ".h5", files, fixed=TRUE)
readKallisto(files)

## input all bootstraps
xx <- readKallistoBootstrap(files[1])
ridx <- head(which(rowSums(xx) != 0), 3)
cidx <- c(1:5, 96:100)
xx[ridx, cidx]

## selective input of rows (transcripts) and/or bootstraps
readKallistoBootstrap(files[1], i=c(ridx, rev(ridx)), j=cidx)
}

\keyword{file}