File: checkEnsDbs.R

package info (click to toggle)
r-bioc-ensembldb 2.14.0%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 2,764 kB
  • sloc: perl: 331; sh: 15; makefile: 5
file content (38 lines) | stat: -rw-r--r-- 1,226 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
#' @description Check EnsDb sqlite files found in the specified folder.
#'
#' @param x \code{character(1)} with the folder in which we're looking for EnsDb
#'     objects.
#'
#' @author Johannes Rainer
#'
#' @noRd
#'
#' @examples
#' dir <- "/Users/jo/tmp/ensdb_20"
checkEnsDbs <- function(x) {
    edbs <- dir(x, pattern = ".sqlite$", full.names = TRUE)
    for (i in 1:length(edbs)) {
        message("\nChecking EnsDb: ", basename(edbs[i]))
        edb <- EnsDb(edbs[i])
        ensembldb:::validateEnsDb(edb)
        ensembldb:::checkValidEnsDb(edb)
        ## Now check also some query calls:
        gns <- genes(edb)
        message(" version: ", ensembldb:::dbSchemaVersion(edb))
        message(" OK")
    }
}

#' @param x `character(1)` with the directory containing EnsDb SQLite files.
#'
#' @return the species/organism name of the databases that have to be re-created
check_gc_content <- function(x) {
    edbs <- dir(x, pattern = ".sqlite$", full.names = TRUE)
    failed <- lapply(edbs, function(edb) {
        edb <- EnsDb(edb)
        if (!any(colnames(mcols(transcripts(edb))) == "gc_content"))
            paste0(tolower(organism(edb)), collapse = "_")
        else NULL
    })
    failed[lengths(failed)]
}