1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357
|
## Some utility functions for Filters.
## Vector to map AnnotationFilter fields to actual database columns.
## Format: field name = database column name
.ENSDB_FIELDS <- c(
## gene
entrez = "entrezid",
gene_biotype = "gene_biotype",
gene_id = "gene_id",
genename = "gene_name",
gene_name = "gene_name",
symbol = "gene_name",
seq_name = "seq_name",
seq_strand = "seq_strand",
gene_start = "gene_seq_start",
gene_end = "gene_seq_end",
description = "description",
## tx
tx_id = "tx_id",
tx_biotype = "tx_biotype",
tx_name = "tx_id",
tx_start = "tx_seq_start",
tx_end = "tx_seq_end",
tx_support_level = "tx_support_level",
## exon
exon_id = "exon_id",
exon_rank = "exon_idx",
exon_start = "exon_seq_start",
exon_end = "exon_seq_end",
## protein
protein_id = "protein_id",
uniprot = "uniprot_id",
uniprot_db = "uniprot_db",
uniprot_mapping_type = "uniprot_mapping_type",
prot_dom_id = "protein_domain_id",
protein_domain_id = "protein_domain_id",
protein_domain_source = "protein_domain_source"
)
.supportedFilters <- function(x) {
flds <- .filterFields(x)
flts <- c(.fieldToClass(flds), "GRangesFilter")
flds <- c(flds, NA)
idx <- order(flts)
data.frame(filter = flts[idx], field = flds[idx], stringsAsFactors = FALSE)
}
.filterFields <- function(x) {
flds <- c("entrez", "gene_biotype", "gene_id", "gene_name", "genename",
"symbol", "seq_name", "seq_strand", "gene_start", "gene_end",
"tx_id", "tx_biotype", "tx_name", "tx_start", "tx_end",
"exon_id", "exon_rank", "exon_start", "exon_end")
if (hasProteinData(x))
flds <- c(flds, "protein_id", "uniprot", "uniprot_db",
"uniprot_mapping_type", "prot_dom_id",
"protein_domain_id",
"protein_domain_source")
if (any(listColumns(x) == "tx_support_level"))
flds <- c(flds, "tx_support_level")
sort(flds)
}
.fieldToClass <- function(field) {
class <- gsub("_([[:alpha:]])", "\\U\\1", field, perl=TRUE)
class <- sub("^([[:alpha:]])", "\\U\\1", class, perl=TRUE)
paste0(class, if (length(class)) "Filter" else character(0))
}
#' Utility function to map from the default AnnotationFilters fields to the
#' database columns used in ensembldb.
#'
#' @param x The field name to be \emph{translated}.
#' @return The column name in the EnsDb database.
#' @noRd
.fieldInEnsDb <- function(x) {
if (length(x) == 0 || missing(x))
stop("Error in .fieldInEnsDb: got empty input argument!")
if (is.na(.ENSDB_FIELDS[x]))
stop("Unable to map field '", x, "'!")
else
.ENSDB_FIELDS[x]
}
#' Utility function to map the condition of an AnnotationFilter to the SQL
#' condition to be used in the EnsDb database.
#'
#' @param x An \code{AnnotationFilter}.
#'
#' @return A character representing the condition for the SQL call.
#' @noRd
.conditionForEnsDb <- function(x) {
cond <- condition(x)
if (length(unique(value(x))) > 1) {
if (cond == "==")
cond <- "in"
if (cond == "!=")
cond <- "not in"
}
if (cond == "==")
cond <- "="
if (cond %in% c("startsWith", "endsWith", "contains"))
cond <- "like"
cond
}
#' Single quote character values, paste multiple values and enclose in quotes.
#'
#' @param x An \code{AnnotationFilter} object.
#' @noRd
.valueForEnsDb <- function(x) {
vals <- unique(value(x))
if (is(x, "CharacterFilter")) {
vals <- sQuote(gsub(unique(vals), pattern = "'", replacement = "''"))
}
if (length(vals) > 1)
vals <- paste0("(", paste0(vals, collapse = ","), ")")
## Process the like/startsWith/endsWith
if (condition(x) == "startsWith")
vals <- paste0("'", unique(x@value), "%'")
if (condition(x) == "endsWith")
vals <- paste0("'%", unique(x@value), "'")
if (condition(x) == "contains")
vals <- paste0("'%", unique(x@value), "%'")
vals
}
#' That's to build the standard query from an AnnotationFilter for EnsDb.
#'
#' @param x An \code{AnnotationFilter}.
#' @noRd
.queryForEnsDb <- function(x) {
paste(.fieldInEnsDb(field(x)), .conditionForEnsDb(x), .valueForEnsDb(x))
}
#' This is a slightly more sophisticated function that does also prefix the
#' columns.
#' @noRd
.queryForEnsDbWithTables <- function(x, db, tables = character()) {
clmn <- .fieldInEnsDb(field(x))
if (!missing(db)) {
if (length(tables) == 0)
tables <- names(listTables(db))
clmn <- unlist(prefixColumns(db, clmn, with.tables = tables))
}
res <- paste(clmn, .conditionForEnsDb(x), .valueForEnsDb(x))
## cat(" ", res, "\n")
return(res)
}
#' Simple helper function to convert expressions to AnnotationFilter or
#' AnnotationFilterList.
#'
#' @param x Can be an \code{AnnotationFilter}, an \code{AnnotationFilterList},
#' a \code{list} or a filter \code{expression}. This should NOT be empty!
#'
#' @return Returns an \code{AnnotationFilterList} with all filters.
#'
#' @noRd
.processFilterParam <- function(x, db) {
if (missing(db))
stop("Argument 'db' missing.")
## Check if x is a formula and eventually translate it.
if (is(x, "formula"))
res <- AnnotationFilter(x)
else res <- x
if (is(res, "AnnotationFilter"))
res <- AnnotationFilterList(res)
if (!is(res, "AnnotationFilterList")) {
## Did not get a filter expression, thus checking what we've got.
if (is(res, "list")) {
if (length(res)) {
## Check that all elements are AnnotationFilter objects!
if (!all(unlist(lapply(res, function(z) {
inherits(z, "AnnotationFilter")
}), use.names = FALSE)))
stop("One of more elements in 'filter' are not ",
"'AnnotationFilter' objects!")
res <- as(res, "AnnotationFilterList")
res@logOp <- rep("&", (length(res) - 1))
} else {
res <- AnnotationFilterList()
}
} else {
stop("'filter' has to be an 'AnnotationFilter', a list of ",
"'AnnotationFilter' object, an 'AnnotationFilterList' ",
"or a valid filter expression!")
}
}
supp_filters <- supportedFilters(db)$filter
have_filters <- unique(.AnnotationFilterClassNames(res))
if (!all(have_filters %in% supp_filters))
stop("AnnotationFilter classes: ",
paste(have_filters[!(have_filters %in% supp_filters)]),
" are not supported by EnsDb databases.")
res
}
############################################################
## setFeatureInGRangesFilter
##
## Simple helper function to set the @feature in GRangesFilter
## depending on the calling method.
setFeatureInGRangesFilter <- function(x, feature){
for (i in seq(along.with = x)){
if (is(x[[i]], "GRangesFilter"))
x[[i]]@feature <- feature
if (is(x[[i]], "AnnotationFilterList"))
x[[i]] <- setFeatureInGRangesFilter(x[[i]], feature = feature)
}
x
}
############################################################
## isProteinFilter
##' evaluates whether the filter is a protein annotation related filter.
##' @param x The object that should be evaluated. Can be an AnnotationFilter or
##' an AnnotationFilterList.
##' @return Returns TRUE if 'x' is a filter for protein annotation tables and
##' FALSE otherwise.
##' @noRd
isProteinFilter <- function(x) {
if (is(x, "AnnotationFilterList"))
return(unlist(lapply(x, isProteinFilter)))
else
return(is(x, "ProteinIdFilter") | is(x, "UniprotFilter") |
is(x, "ProtDomIdFilter") | is(x, "UniprotDbFilter") |
is(x, "UniprotMappingTypeFilter"))
}
## ############################################################
## ## checkFilter:
## ##
## ## checks the filter argument and ensures that a list of Filter
## ## object is returned
## checkFilter <- function(x){
## if(is(x, "list")){
## if(length(x) == 0)
## return(x)
## ## check if all elements are Filter classes.
## if(!all(unlist(lapply(x, function(z){
## return((is(z, "AnnotationFilter") | is(z, "GRangesFilter")))
## }), use.names = FALSE)))
## stop("One of more elements in 'filter' are not filter objects!")
## }else{
## if(is(x, "AnnotationFilter") | is(x, "GRangesFilter")){
## x <- list(x)
## }else{
## stop("'filter' has to be a filter object or a list of",
## " filter objects!")
## }
## }
## return(x)
## }
#' build the \emph{where} query for a \code{GRangedFilter}. Supported conditions
#' are: \code{"start"}, \code{"end"}, \code{"equal"}, \code{"within"},
#' \code{"any"}.
#'
#' @param grf \code{GRangesFilter}.
#'
#' @param columns named character vectors with the column names for start, end,
#' strand and seq_name.
#'
#' @param db An optional \code{EnsDb} instance. Used to \emph{translate}
#' seqnames depending on the specified seqlevels style.
#'
#' @return A character with the corresponding \emph{where} query.
#' @noRd
buildWhereForGRanges <- function(grf, columns, db = NULL){
condition <- condition(grf)
if (!(condition %in% c("start", "end", "within", "equal", "any")))
stop("'condition' ", condition, " not supported. Condition (type) can ",
"be one of 'any', 'start', 'end', 'equal', 'within'.")
if( is.null(names(columns)))
stop("The vector with the required column names for the",
" GRangesFilter query has to have names!")
if (!all(c("start", "end", "seqname", "strand") %in% names(columns)))
stop("'columns' has to be a named vector with names being ",
"'start', 'end', 'seqname', 'strand'!")
## Build the query to fetch all features that are located within the range
quers <- sapply(as(value(grf), "GRangesList"), function(z) {
if (!is.null(db)) {
seqn <- formatSeqnamesForQuery(db, as.character(seqnames(z)))
} else {
seqn <- as.character(seqnames(z))
}
## start: start, seqname and strand have to match.
if (condition == "start") {
query <- paste0(columns["start"], "=", start(z), " and ",
columns["seqname"], "='", seqn, "'")
}
## end: end, seqname and strand have to match.
if (condition == "end") {
query <- paste0(columns["end"], "=", end(z), " and ",
columns["seqname"], "='", seqn, "'")
}
## equal: start, end, seqname and strand have to match.
if (condition == "equal") {
query <- paste0(columns["start"], "=", start(z), " and ",
columns["end"], "=", end(z), " and ",
columns["seqname"], "='", seqn, "'")
}
## within: start has to be >= start, end <= end, seqname and strand
## have to match.
if (condition == "within") {
query <- paste0(columns["start"], ">=", start(z), " and ",
columns["end"], "<=", end(z), " and ",
columns["seqname"], "='", seqn, "'")
}
## any: essentially the overlapping.
if (condition == "any") {
query <- paste0(columns["start"], "<=", end(z), " and ",
columns["end"], ">=", start(z), " and ",
columns["seqname"], "='", seqn, "'")
}
## Include the strand, if it's not "*"
if(as.character(strand(z)) != "*"){
query <- paste0(query, " and ", columns["strand"], " = ",
strand2num(as.character(strand(z))))
}
return(query)
})
if(length(quers) > 1)
quers <- paste0("(", quers, ")")
## Collapse now the queries.
query <- paste0(quers, collapse=" or ")
paste0("(", query, ")")
}
#' @description Helper to extract all AnnotationFilter class names from an
#' AnnotationFilterList (recursively!)
#'
#' @param x The \code{AnnotationFilterList}.
#'
#' @return A \code{character} with the names of the classes.
#' @noRd
.AnnotationFilterClassNames <- function(x) {
classes <- lapply(x, function(z) {
if (is(z, "AnnotationFilterList"))
return(.AnnotationFilterClassNames(z))
class(z)
})
unlist(classes, use.names = FALSE)
}
#' @description Test if any of the filter(s) is an SymbolFilter.
#'
#' @noRd
.anyIs <- function(x, what = "SymbolFilter") {
if (is(x, "AnnotationFilter")) {
is(x, what)
} else {
unlist(lapply(x, .anyIs, what = what))
}
}
|