File: methods-readVcfLongForm.R

package info (click to toggle)
r-bioc-variantannotation 1.10.5-1
  • links: PTS, VCS
  • area: main
  • in suites: jessie, jessie-kfreebsd
  • size: 2,172 kB
  • ctags: 109
  • sloc: ansic: 1,088; sh: 4; makefile: 2
file content (140 lines) | stat: -rw-r--r-- 3,871 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
### =========================================================================
### readVcfLongForm methods 
### =========================================================================

## TabixFile

msg <- paste0("'readVcfLongForm' is defunct. Use 'expand' instead. ",
              "See ?'expand,CollapsedVCF-method'")
setMethod(readVcfLongForm, c(file="TabixFile", genome="character", 
          param="ScanVcfParam"), 
    function(file, genome, param, ...)
{
    .Defunct(msg=msg)
    .readVcfLongForm(file, genome, param)
})

setMethod(readVcfLongForm, c(file="TabixFile", genome="character",
          param="GRanges"),
    function(file, genome, param, ...)
{
    .Defunct(msg=msg)
    .readVcfLongForm(file, genome, param=ScanVcfParam(which=param))
})

setMethod(readVcfLongForm, c(file="TabixFile", genome="character",
          param="RangedData"),
    function(file, genome, param, ...)
{
    .Defunct(msg=msg)
    .readVcfLongForm(file, genome, param=ScanVcfParam(which=param))
})

setMethod(readVcfLongForm, c(file="TabixFile", genome="character",
          param="RangesList"),
    function(file, genome, param, ...)
{
    .Defunct(msg=msg)
    .readVcfLongForm(file, genome, param=ScanVcfParam(which=param))
})

setMethod(readVcfLongForm, c(file="TabixFile", genome="character",
          param="missing"), 
    function(file, genome, param, ...)
{
    .Defunct(msg=msg)
    .readVcfLongForm(file, genome, param=ScanVcfParam())
})

## character

setMethod(readVcfLongForm, c(file="character", genome="character",
          param="ScanVcfParam"),
    function(file, genome, param, ...)
{
    .Defunct(msg=msg)
    file <- .checkTabix(file)
    .readVcfLongForm(file, genome, param)
})

setMethod(readVcfLongForm, c(file="character", genome="character",
          param="missing"),
    function(file, genome, param, ...)
{
    .Defunct(msg=msg)
    file <- .checkTabix(file)
    .readVcfLongForm(file, genome, param=ScanVcfParam())
})

setMethod(readVcfLongForm, c(file="character", genome="missing",
          param="missing"),
    function(file, genome, param, ...)
{
    .Defunct(msg=msg)
    stop("'genome' argument is missing") 
})

.checkTabix <- function(x)
{
    if (1L != length(x)) 
        stop("'x' must be character(1)")
    if (grepl("\\.tbi$", x))
        TabixFile(sub("\\.tbi", "", x))
    else 
        x 
}

.readVcfLongForm <- function(file, genome, param = ScanVcfParam(), ...)
{
    .scanVcfToLongForm(scanVcf(file, param=param), file, genome, param)
}

.scanVcfToLongForm <- function(vcf, file, genome, param, ...)
{
    hdr <- scanVcfHeader(file)
    vcf <- .collapseLists(vcf, param)

    ## rowData
    rowData <- vcf[["rowData"]]
    genome(seqinfo(rowData)) <- genome
    values(rowData) <- DataFrame(paramRangeID=vcf[["paramRangeID"]])

    ## fixed fields
    ALT <- .formatALT(vcf[["ALT"]])
    fx <- list(ID=names(rowData), REF=vcf[["REF"]], ALT=ALT, 
               QUAL=vcf[["QUAL"]], FILTER=vcf[["FILTER"]])
    fixed <- DataFrame(fx[lapply(fx, is.null) == FALSE])

    ## info 
    info <- .formatInfo(vcf[["INFO"]], info(hdr))

    ## expand to match ALT
    names(rowData) <-  NULL
    values(rowData) <- append(values(rowData), c(fixed, info))
    rowData <- rep(rowData, elementLengths(ALT))
    values(rowData)[["ALT"]] <- unlist(ALT, use.names=FALSE)
    rowData 
}

.formatGeno <- function(x)
{
    if (length(x) == 0L)
        return(list())
    cls <- lapply(x, class)
    nvar <- dim(x[[1]])[1]
    nsmp <- dim(x[[1]])[2]
    nms <- colnames(x[[1]]) 

    ## collapse matrices and arrays
    for (i in which(cls == "array")) {
        dim(x[[i]]) <- c(nvar*nsmp, dim(x[[i]])[3])
        x[[i]] <- split(x[[i]], seq_len(nvar*nsmp))
    }
    for (i in which(cls == "matrix")) {
        dim(x[[i]]) <- c(nvar*nsmp, 1)
    }
    ## sample names become a data column
    c(list(SAMPLES=rep(nms, each=nvar)), x)
}