File: methods-BowtieQA.R

package info (click to toggle)
r-bioc-shortread 1.32.0-1
  • links: PTS, VCS
  • area: main
  • in suites: stretch
  • size: 8,384 kB
  • ctags: 293
  • sloc: ansic: 2,718; cpp: 202; sh: 3; makefile: 2
file content (182 lines) | stat: -rw-r--r-- 7,144 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
.BowtieQA <- function(x, ...)
{
    new("BowtieQA", .srlist=x, ...)
}

.qa_Bowtie_lane <-
    function(dirPath, pattern, ..., type="Bowtie", 
			 verbose=FALSE)
{
    if (verbose)
        message("qa 'Bowtie' pattern:", pattern)
    rpt <- readAligned(dirPath, pattern, type, ...)
	doc <- .qa_depthOfCoverage(rpt, pattern)
    ac <- .qa_adapterContamination(rpt, pattern, ...)
    alf <- .qa_alphabetFrequency(sread(rpt), baseOnly=TRUE, collapse=TRUE)
    bqtbl <- .qa_alphabetFrequency(quality(rpt), collapse=TRUE)
    rqs <- .qa_qdensity(quality(rpt))
    freqtbl <- tables(sread(rpt))
    abc <- alphabetByCycle(rpt)
    perCycleBaseCall <- .qa_perCycleBaseCall(abc, pattern)
    perCycleQuality <- .qa_perCycleQuality(abc, quality(rpt), pattern)
    aqtbl <- table(quality(alignQuality(rpt)), useNA="always")
    list(readCounts=data.frame(
           read=NA, filter=NA, aligned=length(rpt),
           row.names=pattern),
         baseCalls=data.frame(
           A=alf[["A"]], C=alf[["C"]], G=alf[["G"]], T=alf[["T"]],
           N=alf[["other"]], row.names=pattern),
         readQualityScore=data.frame(
           quality=rqs$x,
           density=rqs$y,
           lane=pattern,
           type="aligned"),
         baseQuality=data.frame(
           score=names(bqtbl),
           count=as.vector(bqtbl),
           lane=pattern),
         alignQuality=data.frame(
           score=as.numeric(names(aqtbl)),
           count=as.vector(aqtbl),
           lane=pattern, row.names=NULL),
         frequentSequences=data.frame(
           sequence=names(freqtbl$top),
           count=as.integer(freqtbl$top),
           type="aligned",
           lane=pattern, row.names=NULL),
         sequenceDistribution=cbind(
           freqtbl$distribution,
           type="aligned",
           lane=pattern),
         perCycle=list(
           baseCall=perCycleBaseCall,
           quality=perCycleQuality),
         perTile=list(
           readCounts=data.frame(
             count=integer(0), type=character(0),
             tile=integer(0), lane=character(0)),
           medianReadQualityScore=data.frame(
             score=integer(), type=character(), tile=integer(),
             lane=integer())),
		 depthOfCoverage=doc,
		 adapterContamination=ac
         )
}

.qa_Bowtie <-
    function(dirPath, pattern, type="Bowtie", ..., 
			 verbose=FALSE)
{
    fls <- .file_names(dirPath, pattern)
    lst <- bplapply(basename(fls), .qa_Bowtie_lane, dirPath=dirPath,
                    type=type, ..., verbose=verbose)
    lst <-
        list(readCounts=.bind(lst, "readCounts"),
             baseCalls=.bind(lst, "baseCalls"),
             readQualityScore=.bind(lst, "readQualityScore"),
             baseQuality=.bind(lst, "baseQuality"),
             alignQuality=.bind(lst, "alignQuality"),
             frequentSequences=.bind(lst, "frequentSequences"),
             sequenceDistribution=.bind(lst, "sequenceDistribution"),
             perCycle=local({
                 lst <- subListExtract(lst, "perCycle")
                 list(baseCall=.bind(lst, "baseCall"),
                      quality=.bind(lst, "quality"))
             }),
             perTile=local({
                 lst <- subListExtract(lst, "perTile")
                 list(readCounts=.bind(lst, "readCounts"),
                      medianReadQualityScore=.bind(
                        lst, "medianReadQualityScore"))
             }),
             depthOfCoverage=.bind(lst, "depthOfCoverage"),
             adapterContamination=.bind(lst, "adapterContamination")
		)
    .BowtieQA(lst)
}

setMethod(report_html, "BowtieQA",
          function(x, dest, type, ...)
{
    qa <- .qa_sampleKey(x)
    dir.create(dest, recursive=TRUE)
    fls <- c("0000-Header.html", "1000-Overview.html",
             "2000-RunSummary.html", "3000-ReadDistribution.html",
             "4000-CycleSpecific.html", "8000-DepthOfCoverage.html",
             "9000-AdapterContamination.html", "9999-Footer.html")
    sections <- system.file("template", fls, package="ShortRead")
    perCycle <- qa[["perCycle"]]
    values <-
        list(SAMPLE_KEY=hwrite(qa[["keyValue"]], border=0),
             PPN_COUNT=.html_img(
               dest, "readCount", .plotReadCount(qa)),
             PPN_COUNT_TBL=hwrite(
               .ppnCount(qa[["readCounts"]]),
               border=0),
             BASE_CALL_COUNT=.html_img(
               dest, "baseCalls", .plotNucleotideCount(qa)),
             READ_QUALITY_FIGURE=.htmlReadQuality(
               dest, "readQuality", qa, "aligned"),
             READ_OCCURRENCES_FIGURE=.htmlReadOccur(
               dest, "readOccurences", qa, "aligned"),
             FREQUENT_SEQUENCES_READ=.html_NA(),
             FREQUENT_SEQUENCES_FILTERED=.html_NA(),
             FREQUENT_SEQUENCES_ALIGNED=hwrite(
               .freqSequences(qa, "aligned"),
               border=0),
             CYCLE_BASE_CALL_FIGURE=.html_img(
               dest, "perCycleBaseCall",
               .plotCycleBaseCall(perCycle$baseCall)),
             CYCLE_QUALITY_FIGURE=.html_img(
               dest, "perCycleQuality",
               .plotCycleQuality(perCycle$quality)),
             DEPTH_OF_COVERAGE_FIGURE=.html_img(
               dest, "depthOfCoverage",
               .plotDepthOfCoverage(qa[["depthOfCoverage"]])),
             ADAPTER_CONTAMINATION=hwrite(
               .df2a(qa[["adapterContamination"]]),
               border=0)
             )
    .report_html_do(dest, sections, values, ...)
})

setGeneric(".bowtie_mismatches",
    function(object, ...) standardGeneric(".bowtie_mismatches"))

setMethod(.bowtie_mismatches, "AlignedRead", function(object, ...)
{
    adata <- alignData(object)
    if (!"mismatch" %in% varLabels(adata))
        .throw(SRError("UserArgumentMismatch",
                       "'%s' does not contain varLabels '%s'",
                       "AlignedDataFrame", "mismatch"))
    if (any(c("nmismatch", "mismatchScore") %in% varLabels(adata)))
        .throw(SRError("UserArgumentMismatch",
                       "'%s' already contains varLabels '%s'",
                       "AlignedDataFrame",
                       "nmismatch', 'mismatchScore'"))
    mmatch <- adata[["mismatch"]]
    idx <- which(nzchar(mmatch))
    if (any(grepl(":", mmatch, fixed=TRUE))) {
        anuc <- lapply(strsplit(mmatch[idx], "[:,]"), "[",
                       c(TRUE, FALSE))
        cidx <- unlist(lapply(anuc, as.integer)) + 1L
    } else {
        anuc <- lapply(strsplit(mmatch[idx], ",", fixed=TRUE),
                       as.integer)
        cidx <- unlist(anuc) + 1L
    }
    len <- sapply(anuc, length)
    ridx <- rep(idx, len)
    x <- as(narrow(quality(object)[ridx], cidx, cidx), "matrix")
    mmscore <- rep(NA_integer_, nrow(adata))
    mmscore[idx] <- unlist(lapply(split(x, ridx), sum), use.names=FALSE)
    lngth <- integer(nrow(adata))
    lngth[idx] <- len

    txt <- "Number of mismatches"
    adata[["nmismatch", labelDescription=txt]] <- lngth
    txt <- "Summed quality scores at mismatched nucleotides"
    adata[["mismatchScore", labelDescription=txt]] <- mmscore
    initialize(object, alignData=adata)
})