1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219
|
.require <-
function(pkg)
{
result <- TRUE
if (length(grep(sprintf("package:%s", pkg), search())) != 0L)
return(invisible(result))
message("require(", dQuote(pkg), ")")
handler <- function(err) {
msg <- sprintf("require(%s) failed: %s", dQuote(pkg),
conditionMessage(err))
stop(msg)
}
result <- tryCatch(suppressPackageStartupMessages(
require(pkg, quietly=TRUE, character.only=TRUE)
), error=handler)
if (!result)
handler(simpleError("use BiocManager::install() to install package?"))
invisible(result)
}
.ask <- function(txt, values) {
txt <- sprintf("%s [%s] ", txt, paste(values, collapse="/"))
repeat {
ans <- tolower(trimws(readline(txt)))
if (ans %in% values)
break
}
ans
}
.gunzip <- function(file, destination)
{
bufferSize <- 1e7
fin <- gzfile(file, "rb")
fout <- file(destination, "wb")
on.exit({
close(fin)
close(fout)
})
repeat {
x <- readBin(fin, raw(0L), bufferSize, 1L)
if (length(x) == 0L)
break
writeBin(x, fout, size=1L)
}
invisible(destination)
}
## tidyGRanges
.metadataForAH <-
function(x, ...)
{
stopifnot(length(getHub(x)) == 1)
meta <- getHub(x)
list(AnnotationHubName=names(meta),
`File Name`=basename(meta$sourceurl),
`Data Source`=meta$sourceurl,
`Provider`=meta$dataprovider,
`Organism`=meta$species,
`Taxonomy ID`=meta$taxonomyid )
}
.guessIsCircular <-
function(x)
{
ans <- GenomeInfoDb::isCircular(x)
idx <- is.na(ans)
test <- names(ans) %in% c("MT", "MtDNA", "dmel_mitochondrion_genome",
"Mito", "chrM")
ans[idx] <- test[idx]
ans
}
# tasks we want .tidyGRanges() to do:
# 1. add metdata() to GRanges containing the names() of hub object
# 2. sortSeqlevels()
# 3. fill the seqinfo with correct information
# for step 3 - comparison is done with existingSeqinfo and
# GenomeInfoDb::Seqinfo() - currently if its not the same, seqinfo is replaced.
.tidyGRanges <-
function(x, gr, sort=TRUE, guess.circular=TRUE, addGenome=TRUE,
metadata=TRUE, genome=getHub(x)$genome)
{
if (metadata)
metadata(gr) <- .metadataForAH(x)
## BEWARE:
## 1) GenomeInfoDb::Seqinfo doesnt sortSeqlevels - so we need to
## sortSeqlevels before comparison else identical wont work.
## 2) case - the input GRanges might have a subset of seqlevels whereas
## the GenomeInfoDb::Seqinfo returns all seqlevels with scaffolds
## from an assembly.
## 3)only 10-15 genomes supported by GenomeInfoDb::Seqinfo
tryCatch({
loadNamespace("GenomeInfoDb")
}, error=function(err) {
## quietly return un-tidied GRanges (?)
return(gr)
})
if (sort)
gr <- GenomeInfoDb::sortSeqlevels(gr)
existingSeqinfo <- GenomeInfoDb::seqinfo(gr)
## Not all Genome's are supported by GenomeInfoDb::Seqinfo
newSeqinfo <- tryCatch({
GenomeInfoDb::Seqinfo(genome=genome)
}, error= function(err) {
NULL
})
if (is.null(newSeqinfo) || !all(GenomeInfoDb::seqlevels(gr) %in% GenomeInfoDb::seqlevels(newSeqinfo))) {
## use guess work to populate
if (guess.circular)
GenomeInfoDb::isCircular(existingSeqinfo) <-
.guessIsCircular(existingSeqinfo)
if (addGenome)
GenomeInfoDb::genome(existingSeqinfo) <- genome
if (sort || guess.circular || addGenome) {
new2old <- match(GenomeInfoDb::seqlevels(existingSeqinfo),
GenomeInfoDb::seqlevels(gr))
GenomeInfoDb::seqinfo(gr, new2old=new2old) <- existingSeqinfo
}
return(gr)
}
newSeqinfo <- newSeqinfo[GenomeInfoDb::seqlevels(gr)]
# comapre the current and new seqinfo
diffSeqlengths <- setdiff(GenomeInfoDb::seqlengths(newSeqinfo),
GenomeInfoDb::seqlengths(existingSeqinfo))
diffSeqnames <- setdiff(GenomeInfoDb::seqnames(newSeqinfo),
GenomeInfoDb::seqnames(existingSeqinfo))
diffGenome <- identical(unique(GenomeInfoDb::genome(newSeqinfo)),
unique(GenomeInfoDb::genome(existingSeqinfo)))
diffIscircular <- identical(table(GenomeInfoDb::isCircular(newSeqinfo)),
table(GenomeInfoDb::isCircular(existingSeqinfo)))
len <- c(length(diffSeqlengths), length(diffSeqnames))
# if its the same dont replace
if(all(unique(len)==0 & diffGenome & diffIscircular))
return(gr)
## Replace incorrect seqinfo
if (sort || guess.circular || addGenome) {
new2old <- match(GenomeInfoDb::seqlevels(gr),
GenomeInfoDb::seqlevels(newSeqinfo))
GenomeInfoDb::seqinfo(gr, new2old=new2old) <- newSeqinfo
}
gr
}
DispatchClassList <- function(){
matrix(
c("FaFile", "Rsamtools::FaFile(); requires rtracklayer",
"BamFile", "Rsamtools::BamFile(); requires rtracklayer",
"Rds", "readRDS()",
"RDS", "readRDS()",
"Rda", "get(load())",
"data.frame", "get(load())",
"dcf", "read.dcf()",
"GRanges", "get(load()); requires GenomicRanges",
"VCF", "get(load()); requires VariantAnnotation",
"ChainFile",
"rtracklayer::import.chain(); requires rtracklayer and GenomeInfoDb; before using import.chain internally uses gzfile and writeBin to extract data from file; files saved as chain.gz",
"TwoBitFile", "rtracklayer::TwoBitFile(); requires rtracklayer",
"GFFFile",
"rtracklayer::import(); require rtracklayer and GenomeInfoDB; after import converts to GRanges object",
"GFF3File", "rtracklayer::import(); require rtracklayer",
"BigWig", "rtracklayer::BigWigFile(); require rtracklayer",
"dbSNPVCFFile",
"VariantAnnotation::VcfFile(); require VariantAnnotation; files saved as vcf.gz and vcf.gz.tbi",
"SQLiteFile", "AnnotationDbi::loadDb(); files saved as sqlite",
"GRASP", "dbFileConnect()",
"Zip", "unzip(); returns file path to files",
"ChEA", "unzip(); returns data.frame from reading chea-background.csv",
"BioPax", "get(load()); require rBiopaxParser",
"Pazar",
"read.delim(); require GenomicRanges; reads specific columns from file and coverts to GRanges object",
"CSVtoGranges",
"read.csv(); require GenomicRanges; coverts data.frame to GRanges object",
"ExpressionSet", "get(load()); require Biobase",
"GDS", "gdsfmt::openfn.gds(); require gdsfmt",
"H5File", "require rhdf5; resource downloaded but not loaded; returns file path",
"FilePath", "resource downloaded but not loaded; returns file path",
"BEDFile",
"rtracklayer::import(rtracklayer::BEDFile()); require rtracklayer; converts to GRanges object",
"UCSCBroadPeak", "rtracklayer::import(rtracklayer::BEDFile()); require rtracklayer; converts to GRanges object",
"UCSCNarrowPeak", "rtracklayer::import(rtracklayer::BEDFile()); require rtracklayer; converts to GRanges object",
"UCSCBEDRnaElements", "rtracklayer::import(rtracklayer::BEDFile()); require rtracklayer; converts to GRanges object",
"UCSCGappedPeak", "rtracklayer::import(rtracklayer::BEDFile()); require rtracklayer; converts to GRanges object",
"EpiMetadata", "read.delim()",
"EpiExpressionText", "read.table(); converts to SummarizedExperiment object",
"EpichmmModels",
"rtracklayer::import(); calls additional helper AnnotationHub:::.mapAbbr2FullName and then converts to GRange object; file assumed to be bed file format",
"EpigenomeRoadmapFile", "rtracklayer::import(); converts to GRange object; file assumed to be bed file format",
"EpigenomeRoadmapNarrowAllPeaks", "rtracklayer::import(rtracklayer::BEDFile()); require rtracklayer; converts to GRanges object",
"EpigenomeRoadmapNarrowFDR", "rtracklayer::import(rtracklayer::BEDFile()); require rtracklayer; converts to GRanges object",
"EnsDb", "ensembldb::EnsDb(); require ensembldb",
"mzRpwiz", "mzR::openMSfile(); require mzR",
"mzRident", "mzR::openIDfile(); require mzR",
"MSnSet", "get(load()); require MSnbase",
"AAStringSet", "Biostrings::readAAStringSet(); require Biostrings",
"CompDb", "CompoundDb::Compdb(); requires CompoundDb",
"kerasHDF5Model", "keras::load_model_hdf5(); requires keras",
"kerasHDF5ModelWeights", "keras::load_model_weights_hdf5(); requires keras"
), ncol=2, byrow=TRUE)
}
|