File: findCompatibleOverlaps-methods.Rd

package info (click to toggle)
r-bioc-genomicalignments 1.0.6-1
  • links: PTS, VCS
  • area: main
  • in suites: jessie, jessie-kfreebsd
  • size: 2,980 kB
  • ctags: 54
  • sloc: ansic: 1,493; makefile: 4; sh: 3
file content (118 lines) | stat: -rw-r--r-- 3,904 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
\name{findCompatibleOverlaps-methods}
\alias{findCompatibleOverlaps-methods}

\alias{findCompatibleOverlaps}
\alias{findCompatibleOverlaps,GAlignments,GRangesList-method}
\alias{findCompatibleOverlaps,GAlignmentPairs,GRangesList-method}
\alias{countCompatibleOverlaps}


\title{Finding hits between reads and transcripts that are \emph{compatible}
       with the splicing of the transcript}

\description{
  In the context of an RNA-seq experiment, \code{findCompatibleOverlaps}
  (or \code{countCompatibleOverlaps}) can be used for finding (or counting)
  hits between reads and transcripts that are \emph{compatible}
  with the splicing of the transcript.
}

\usage{
findCompatibleOverlaps(query, subject)
countCompatibleOverlaps(query, subject)
}

\arguments{
  \item{query}{
    A \link{GAlignments} or \link{GAlignmentPairs} object representing
    the aligned reads.
  }
  \item{subject}{
    A \link{GRangesList} object representing the transcripts.
  }
}

\details{
  \code{findCompatibleOverlaps} is a specialized version of
  \code{\link[IRanges]{findOverlaps}} that uses
  \code{\link{encodeOverlaps}} internally to keep only
  the hits where the junctions in the aligned read are \emph{compatible}
  with the splicing of the annotated transcript.

  The topic of working with overlap encodings is covered in details
  in the "OverlapEncodings" vignette located this package
  (\pkg{GenomicAlignments}) and accessible with
  \code{vignette("OverlapEncodings")}.
}

\value{
  A \link[IRanges]{Hits} object for \code{findCompatibleOverlaps}.

  An integer vector \emph{parallel} to (i.e. same length as) \code{query}.
}

\author{
  H. Pages
}

\seealso{
  \itemize{
    \item The \code{\link[IRanges]{findOverlaps}} generic function defined
          in the \pkg{IRanges} package.

    \item The \code{\link{encodeOverlaps}} generic function and
          \link{OverlapEncodings} class.

    \item The "OverlapEncodings" vignette in this package.

    \item \link{GAlignments} and \link{GAlignmentPairs} objects.

    \item \link[GenomicRanges]{GRangesList} objects in the
          \pkg{GenomicRanges} package.
  }
}

\examples{
## Here we only show a simple example illustrating the use of
## countCompatibleOverlaps() on a very small data set. Please
## refer to the "OverlapEncodings" vignette in the GenomicAlignments
## package for a comprehensive presentation of "overlap
## encodings" and related tools/concepts (e.g. "compatible"
## overlaps, "almost compatible" overlaps etc...), and for more
## examples.

## sm_treated1.bam contains a small subset of treated1.bam, a BAM
## file containing single-end reads from the "Pasilla" experiment
## (RNA-seq, Fly, see the pasilla data package for the details)
## and aligned to reference genome BDGP Release 5 (aka dm3 genome on
## the UCSC Genome Browser):
sm_treated1 <- system.file("extdata", "sm_treated1.bam",
                           package="GenomicAlignments", mustWork=TRUE)

## Load the alignments:
flag0 <- scanBamFlag(isDuplicate=FALSE, isNotPassingQualityControls=FALSE)
param0 <- ScanBamParam(flag=flag0)
gal <- readGAlignments(sm_treated1, use.names=TRUE, param=param0)

## Load the transcripts (IMPORTANT: Like always, the reference genome
## of the transcripts must be *exactly* the same as the reference
## genome used to align the reads):
library(TxDb.Dmelanogaster.UCSC.dm3.ensGene)
exbytx <- exonsBy(TxDb.Dmelanogaster.UCSC.dm3.ensGene, by="tx", use.names=TRUE)

## Number of "compatible" transcripts per alignment in 'gal':
gal_ncomptx <- countCompatibleOverlaps(gal, exbytx)
mcols(gal)$ncomptx <- gal_ncomptx
table(gal_ncomptx)
mean(gal_ncomptx >= 1)
## --> 33% of the alignments in 'gal' are "compatible" with at least
## 1 transcript in 'exbytx'.

## Keep only alignments compatible with at least 1 transcript in
## 'exbytx':
compgal <- gal[gal_ncomptx >= 1]
head(compgal)
}

\keyword{methods}
\keyword{utilities}