File: AllClasses.R

package info (click to toggle)
r-bioc-savr 1.37.0-6
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 1,360 kB
  • sloc: xml: 12; makefile: 5
file content (274 lines) | stat: -rw-r--r-- 11,603 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
#'Illumina read
#'
#'Class representation of the features of an Illumina sequencing read.
#' 
#'@section Slots:
#'\describe{
#'\item{\code{number}:}{the index of this read in sequencing}
#'\item{\code{cycles}:}{number of cycles in this read}
#'\item{\code{index}:}{logical representing whether or not this read is an index read}
#'}
setClass("illuminaRead",   slots=c(number="integer", 
                                   cycles="integer",
                                   index="logical"))

#'Layout of an Illumina flowcell
#'
#'Class representation of the features of an Illumina flow cell.
#' 
#'@section Slots:
#'\describe{
#'\item{\code{lanecount}:}{Number of lanes on the flowcell}
#'\item{\code{surfacecount}:}{Number of surfaces}
#'\item{\code{swathcount}:}{Number of imaging swaths}
#'\item{\code{tilecount}:}{Number of tiles per swath}
#'\item{\code{sectionperlane}:}{Number of sections per lane (NextSeq)}
#'\item{\code{lanepersection}:}{Number of lanes per section (NextSeq)}
#'\item{\code{tilenamingconvention}:}{Description of deviation from original formatting layout}
#'}
setClass("illuminaFlowCellLayout", slots=c(lanecount="integer", 
                                           surfacecount="integer", 
                                           swathcount="integer", 
                                           tilecount="integer",
                                           sectionperlane="integer",
                                           lanepersection="integer",
                                           tilenamingconvention="character"
                                           ))

#'Structure for holding parsed InterOp headers and data
#'
#'@section Slots:
#'\describe{
#'\item{\code{header}:}{list of parsed header values}
#'\item{\code{data}:}{data.frame of parsed values}
#'}
setClass("savData", slots=c(header="list", data="data.frame", accessor="character"),
         prototype=prototype(header=list(), data=NULL, accessor=NULL))

#'SAV project class
#' 
#'Represents a flowcell, metadata and parsed SAV information
#' 
#'@section Slots:
#'\describe{
#'\item{\code{location}:}{Full path to flowcell directory}
#'\item{\code{reads}:}{List of \link{illuminaRead-class}}
#'\item{\code{layout}:}{\link{illuminaFlowCellLayout-class}}
#'\item{\code{runid}:}{Run ID}
#'\item{\code{number}:}{Run number}
#'\item{\code{flowcell}:}{Flowcell ID}
#'\item{\code{instrument}:}{Instrument ID}
#'\item{\code{date}:}{Run date}
#'\item{\code{cycles}:}{Total number of cycles}
#'\item{\code{directions}:}{Total number of sequence runs (ends)}
#'\item{\code{parsedData}:}{SAV data}
#'}
setClass("savProject", 
         slots=c(location="character",	
                 reads="list", 
                 layout="illuminaFlowCellLayout", 
                 runid="character", 
                 number="integer", 
                 flowcell="character", 
                 instrument="character", 
                 date="character", 
                 cycles="integer", 
                 directions="integer", 
                 parsedData="list"), 
         prototype=prototype(location="."))

#'Base class for formatters
#'
#'Defines the necessary slots to create parse different binary files using
#'the same generic parser.
#'
#'@section Slots:
#'\describe{
#'\item{\code{name}:}{vector of column names}
#'\item{\code{type}:}{vector of data types of elements}
#'\item{\code{lengths}:}{vector of byte lengths for each element}
#'\item{\code{order}:}{vector of column names for sorting}
#'\item{\code{version}:}{integer version number}
#'\item{\code{default}:}{logical default format ()}
#'}
setClass("savFormat", slots=c(filename="character", 
                              name="character", 
                              type="character", 
                              lengths="integer", 
                              order="character", 
                              version="integer",
                              accessor="character",
                              default="logical"))

#'Corrected Intensity formatter
#'
#'Lane, tile, cycle, average intensity, corrected intensities (ACGT),
#'average corrected called intensities (ACGT), number of no-calls,
#'number of (ACGT) calls, and signal to noise ratio.
#'
#'@section Slots:
#'\describe{
#'\item{\code{name}:}{vector of column names}
#'\item{\code{type}:}{vector of data types of elements}
#'\item{\code{lengths}:}{vector of byte lengths for each element}
#'\item{\code{order}:}{vector of column names for sorting}
#'\item{\code{version}:}{integer version number}
#'}
setClass("savCorrectedIntensityFormat", contains="savFormat", 
         prototype=prototype(filename="CorrectedIntMetricsOut.bin", 
                             name=c("lane", "tile", "cycle", "avg_intensity", paste("avg_cor", c("A", "C", "G", "T"), sep="_"), 
                                    paste("avg_cor_called", c("A", "C", "G", "T"), sep="_"),
                                    paste("num", c("none", "A", "C", "G", "T"), sep="_"), 
                                    "sig_noise"),
                             type=c(rep("integer", 17), "numeric"),
                             lengths=c(rep(2L,12), rep(4L, 6)),
                             order=c("lane", "cycle", "tile"),
                             version=2L,
                             accessor="correctedIntensities",
                             default=TRUE))

#'Quality Metrics formatter
#'
#'Lane, tile, cycle, Q1-Q50 counts
#'
#'@section Slots:
#'\describe{
#'\item{\code{name}:}{vector of column names}
#'\item{\code{type}:}{vector of data types of elements}
#'\item{\code{lengths}:}{vector of byte lengths for each element}
#'\item{\code{order}:}{vector of column names for sorting}
#'\item{\code{version}:}{integer version number}
#'}
setClass("savQualityFormat", contains="savFormat", 
         prototype=prototype(filename="QMetricsOut.bin", 
                             name=c("lane", "tile", "cycle", paste("Q", 1:50, sep="")),
                             type=c(rep("integer", 53)),
                             lengths=c(rep(2L, 3), rep(4L, 50) ),
                             order=c("lane", "cycle", "tile"),
                             version=4L,
                             accessor="qualityMetrics",
                             default=TRUE))


#'Quality Metrics formatter version 5
#'
#'Lane, tile, cycle, Q1-Q50 counts
#'
#'@section Slots:
#'\describe{
#'\item{\code{name}:}{vector of column names}
#'\item{\code{type}:}{vector of data types of elements}
#'\item{\code{lengths}:}{vector of byte lengths for each element}
#'\item{\code{order}:}{vector of column names for sorting}
#'\item{\code{version}:}{integer version number}
#'}
#'
# Format information found at https://tracker.tgac.ac.uk/browse/MISO-138
# Quality Metrics (QMetricsOut.bin)
# Format:
# byte 0: file version number (5)
# byte 1: length of each record
# byte 2: quality score binning (byte flag representing if binning was on), if (byte 2 == 1) // quality score binning on
# byte 3: number of quality score bins, B
# // if byte 2 == 1
#   bytes 4 - (4+B-1): lower boundary of quality score bins
#   bytes (4+B) - (4+2*B-1): upper boundary of quality score bins
#   bytes (4+2*B) - (4+3*B-1): remapped scores of quality score bins
# The remaining bytes are for the records, with each record in this format:
# 2 bytes: lane number  (uint16)
# 2 bytes: tile number  (uint16)
# 2 bytes: cycle number (uint16)
# 4 x 50 bytes: number of clusters assigned score (uint32) Q1 through Q50
# Where N is the record index
setClass("savQualityFormatV5", contains="savFormat", 
         prototype=prototype(filename="QMetricsOut.bin", 
                             name=c("lane", "tile", "cycle", paste("Q", 1:50, sep="") ),
                             type=c(rep("integer", 53)),
                             lengths=c(2L, 2L, 2L, rep(4L, 50)),
                             order=c("lane", "cycle", "tile"),
                             accessor="qualityMetrics",
                             version=5L,
                             default=FALSE))

#'Tile Metrics formatter
#'
#'Lane, tile, code, value.  Codes are:
#'
#'\tabular{ll}{
#'100 \tab Cluster Density \cr
#'101 \tab PF Cluster Density \cr
#'102 \tab Number of clusters \cr
#'103 \tab Number of PF clusters \cr
#'400 \tab Control lane \cr
#'}
#'
#'@section Slots:
#'\describe{
#'\item{\code{name}:}{vector of column names}
#'\item{\code{type}:}{vector of data types of elements}
#'\item{\code{lengths}:}{vector of byte lengths for each element}
#'\item{\code{order}:}{vector of column names for sorting}
#'\item{\code{version}:}{integer version number (header consists of version (1b), length (1b))}
#'}
setClass("savTileFormat", contains="savFormat", 
         prototype=prototype(filename="TileMetricsOut.bin", 
                             name=c("lane", "tile", "code", "value"),
                             type=c(rep("integer", 3), "numeric"),
                             lengths=c(rep(2L, 3), 4L),
                             order=c("lane", "code", "tile"),
                             version=2L,
                             accessor="tileMetrics",
                             default=TRUE))

#'Extraction Metrics formatter
#'
#'Lane, tile, cycle, FWHM (ACGT), intensity (ACGT), datestamp, timestamp.
#'Datestamp and timestamp are munged at the moment because R does not 
#'have native support for 32-bit unsigned integers and I have not implemented 
#'a solution.
#'
#'@section Slots:
#'\describe{
#'\item{\code{name}:}{vector of column names}
#'\item{\code{type}:}{vector of data types of elements}
#'\item{\code{lengths}:}{vector of byte lengths for each element}
#'\item{\code{order}:}{vector of column names for sorting}
#'\item{\code{version}:}{integer version number}
#'}
setClass("savExtractionFormat", contains="savFormat", 
         prototype=prototype(filename="ExtractionMetricsOut.bin", 
                             name=c("lane", "tile", "cycle", 
                                    paste("FWHM", c("A", "C", "G", "T"), sep="_"), 
                                    paste("int", c("A", "C", "G", "T"), sep="_"), "datestamp", "timestamp"),
                             type=c(rep("integer", 3), rep("numeric", 4), rep("integer", 6)),
                             lengths=c(rep(2L, 3), rep(4L,4), rep(2L,4), rep(4L,2) ),
                             order=c("lane", "cycle", "tile"),
                             version=2L,
                             accessor="extractionMetrics",
                             default=TRUE))

#'Error Metrics formatter
#'
#'Lane, tile, cycle, errorrate, nPerfect, n1Error, n2Error,
#'n3Error, n4Error.
#'
#'@section Slots:
#'\describe{
#'\item{\code{name}:}{vector of column names}
#'\item{\code{type}:}{vector of data types of elements}
#'\item{\code{lengths}:}{vector of byte lengths for each element}
#'\item{\code{order}:}{vector of column names for sorting}
#'\item{\code{version}:}{integer version number}
#'}
setClass("savErrorFormat", contains="savFormat",
         prototype=prototype(filename="ErrorMetricsOut.bin",
                             name=c("lane", "tile", "cycle", "errorrate", "nPerfect", paste("n", 1:4, "Error", sep="")),
                             type=c(rep("integer", 3), "numeric", rep("integer", 5)),
                             lengths=c(rep(2L, 3), rep(4L, 6)),
                             order=c("lane", "cycle", "tile"),
                             version=3L,
                             accessor="errorMetrics",
                             default=TRUE))

setClass("savParser", slots=c(project="savProject", format="savFormat"))