File: test_makeTxDbFromGFF.R

package info (click to toggle)
r-bioc-txdbmaker 1.2.1%2Bds-2
  • links: PTS, VCS
  • area: main
  • in suites: sid, trixie
  • size: 3,168 kB
  • sloc: makefile: 2
file content (84 lines) | stat: -rw-r--r-- 3,115 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
gffFile <- system.file("extdata", "GFF3_files", "a.gff3",
                       package="txdbmaker")

gtfFile <- system.file("extdata", "GTF_files",
                       "GCA_002204515.1_AaegL5.0_genomic.gtf.gz",
                       package="txdbmaker")

flyFile <- system.file("extdata","GFF3_files","dmel-1000-r5.11.filtered.gff",
                       package="txdbmaker")

## bad bacterial GFFs require use of special argument to ignore most of data.
gffB <- system.file("extdata", "GFF3_files",
                    "GCF_000020065.1_ASM2006v1_genomic.gff",
                    package="txdbmaker")

## Test that outputs match what is expected. ## BOOM
test_makeTxDbFromGFF <- function(){  
  ## wanted
  gffDBFile <- system.file("extdata", "GFF3_files", "a.sqlite",
                           package="txdbmaker")
  txdb_gff <- loadDb(gffDBFile)

  ## generated
  txdb1 <- makeTxDbFromGFF(file=gffFile,
               dataSource="partial GFF file for Tomatoes for testing",
               organism="Solanum lycopersicum",
               circ_seqs=character(0))

  ## test
  checkTrue(GenomicFeatures:::compareTxDbs(txdb1, txdb_gff))

  
  ## wanted
  gtfDBFile <- system.file("extdata", "GTF_files",
                           "GCA_002204515.1_AaegL5.0_genomic.sqlite",
                           package="txdbmaker")
  txdb_gtf <- loadDb(gtfDBFile)

  ## generated
  chrominfo <- data.frame(chrom="MF194022.1", length=16790, is_circular=TRUE)
  dataSource <- paste0("https://ftp.ncbi.nlm.nih.gov/genomes/all/",
                       "GCA/002/204/515/GCA_002204515.1_AaegL5.0/",
                       "GCA_002204515.1_AaegL5.0_genomic.gtf.gz")
  organism <- "Aedes aegypti"
  metadata <- data.frame(name="Genome", value="AaegL5.0")

  txdb2 <- makeTxDbFromGFF(gtfFile, dataSource=dataSource, organism=organism,
                           chrominfo=chrominfo, metadata=metadata)

  ## test
  checkTrue(GenomicFeatures:::compareTxDbs(txdb2, txdb_gtf))


  ## wanted
  flyDBFile <- system.file("extdata", "GFF3_files",
                           "dmel-1000-r5.11.filtered.sqlite",
                           package="txdbmaker")
  txdb_fly <- loadDb(flyDBFile)

  txdb3 <- makeTxDbFromGFF(file=flyFile,
                           dataSource="gff file from flybase",
                           organism="Drosophila melanogaster",
                           circ_seqs=character(0))
  
  checkTrue(GenomicFeatures:::compareTxDbs(txdb3, txdb_fly))


  ## test for broken NCBI bacterial GFFs (that only seem to have
  ## reliable gene info and little else)
  chrominfoBac <- data.frame(chrom = c('NC_011025.1'),
                          length=c(830000), ## placeholder = iow it big enough
                          is_circular=c(TRUE))

  ## mostly I want to see if if can run this:
  txdb_bac <- makeTxDbFromGFF(file = gffB,
                              chrominfo = chrominfoBac,
                              dataSource = "NCBI",
                              organism = "Metamycoplasma arthritidis")

  ## Tests
  checkTrue(class(txdb_bac) == "TxDb")
  checkEquals(length(transcripts(txdb_bac)), 672)
}