1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128
|
# expected data for tests using FBgn0031208.gff and FBgn0031208.gtf files
# list the children and their expected first-order parents for the GFF test file.
GFF_parent_check_level_1 = {
"FBtr0300690": ["FBgn0031208"],
"FBtr0300689": ["FBgn0031208"],
"CG11023:1": ["FBtr0300689", "FBtr0300690"],
"five_prime_UTR_FBgn0031208:1_737": ["FBtr0300689", "FBtr0300690"],
"CDS_FBgn0031208:1_737": ["FBtr0300689", "FBtr0300690"],
"intron_FBgn0031208:1_FBgn0031208:2": ["FBtr0300690"],
"intron_FBgn0031208:1_FBgn0031208:3": ["FBtr0300689"],
"FBgn0031208:3": ["FBtr0300689"],
"CDS_FBgn0031208:3_737": ["FBtr0300689"],
"CDS_FBgn0031208:2_737": ["FBtr0300690"],
"exon:chr2L:8193-8589:+": ["FBtr0300690"],
"intron_FBgn0031208:2_FBgn0031208:4": ["FBtr0300690"],
"three_prime_UTR_FBgn0031208:3_737": ["FBtr0300689"],
"FBgn0031208:4": ["FBtr0300690"],
"CDS_FBgn0031208:4_737": ["FBtr0300690"],
"three_prime_UTR_FBgn0031208:4_737": ["FBtr0300690"],
}
# and second-level . . . they should all be grandparents of the same gene.
GFF_parent_check_level_2 = {
"CG11023:1": ["FBgn0031208"],
"five_prime_UTR_FBgn0031208:1_737": ["FBgn0031208"],
"CDS_FBgn0031208:1_737": ["FBgn0031208"],
"intron_FBgn0031208:1_FBgn0031208:2": ["FBgn0031208"],
"intron_FBgn0031208:1_FBgn0031208:3": ["FBgn0031208"],
"FBgn0031208:3": ["FBgn0031208"],
"CDS_FBgn0031208:3_737": ["FBgn0031208"],
"CDS_FBgn0031208:2_737": ["FBgn0031208"],
"exon:chr2L:8193-8589:+": ["FBgn0031208"],
"intron_FBgn0031208:2_FBgn0031208:4": ["FBgn0031208"],
"three_prime_UTR_FBgn0031208:3_737": ["FBgn0031208"],
"FBgn0031208:4": ["FBgn0031208"],
"CDS_FBgn0031208:4_737": ["FBgn0031208"],
"three_prime_UTR_FBgn0031208:4_737": ["FBgn0031208"],
}
# Same thing for GTF test file . . .
GTF_parent_check_level_1 = {
"exon:chr2L:7529-8116:+": ["FBtr0300689"],
"exon:chr2L:7529-8116:+_1": ["FBtr0300690"],
"exon:chr2L:8193-9484:+": ["FBtr0300689"],
"exon:chr2L:8193-8589:+": ["FBtr0300690"],
"exon:chr2L:8668-9484:+": ["FBtr0300690"],
"exon:chr2L:10000-11000:-": ["transcript_Fk_gene_1"],
"exon:chr2L:11500-12500:-": ["transcript_Fk_gene_2"],
"CDS:chr2L:7680-8116:+": ["FBtr0300689"],
"CDS:chr2L:7680-8116:+_1": ["FBtr0300690"],
"CDS:chr2L:8193-8610:+": ["FBtr0300689"],
"CDS:chr2L:8193-8589:+": ["FBtr0300690"],
"CDS:chr2L:8668-9276:+": ["FBtr0300690"],
"CDS:chr2L:10000-11000:-": ["transcript_Fk_gene_1"],
"FBtr0300689": ["FBgn0031208"],
"FBtr0300690": ["FBgn0031208"],
"transcript_Fk_gene_1": ["Fk_gene_1"],
"transcript_Fk_gene_2": ["Fk_gene_2"],
"start_codon:chr2L:7680-7682:+": ["FBtr0300689"],
"start_codon:chr2L:7680-7682:+_1": ["FBtr0300690"],
"start_codon:chr2L:10000-11002:-": ["transcript_Fk_gene_1"],
"stop_codon:chr2L:8611-8613:+": ["FBtr0300689"],
"stop_codon:chr2L:9277-9279:+": ["FBtr0300690"],
"stop_codon:chr2L:11001-11003:-": ["transcript_Fk_gene_1"],
}
GTF_parent_check_level_2 = {
"exon:chr2L:7529-8116:+": ["FBgn0031208"],
"exon:chr2L:8193-9484:+": ["FBgn0031208"],
"exon:chr2L:8193-8589:+": ["FBgn0031208"],
"exon:chr2L:8668-9484:+": ["FBgn0031208"],
"exon:chr2L:10000-11000:-": ["Fk_gene_1"],
"exon:chr2L:11500-12500:-": ["Fk_gene_2"],
"CDS:chr2L:7680-8116:+": ["FBgn0031208"],
"CDS:chr2L:8193-8610:+": ["FBgn0031208"],
"CDS:chr2L:8193-8589:+": ["FBgn0031208"],
"CDS:chr2L:8668-9276:+": ["FBgn0031208"],
"CDS:chr2L:10000-11000:-": ["Fk_gene_1"],
"FBtr0300689": [],
"FBtr0300690": [],
"transcript_Fk_gene_1": [],
"transcript_Fk_gene_2": [],
"start_codon:chr2L:7680-7682:+": ["FBgn0031208"],
"start_codon:chr2L:10000-11002:-": ["Fk_gene_1"],
"stop_codon:chr2L:8611-8613:+": ["FBgn0031208"],
"stop_codon:chr2L:9277-9279:+": ["FBgn0031208"],
"stop_codon:chr2L:11001-11003:-": ["Fk_gene_1"],
}
expected_feature_counts = {
"gff3": {
"gene": 3,
"mRNA": 4,
"exon": 6,
"CDS": 5,
"five_prime_UTR": 1,
"intron": 3,
"pcr_product": 1,
"protein": 2,
"three_prime_UTR": 2,
},
"gtf": {
#'gene':3,
# 'mRNA':4,
"CDS": 6,
"exon": 7,
"start_codon": 3,
"stop_codon": 3,
},
}
expected_features = {
"gff3": [
"gene",
"mRNA",
"protein",
"five_prime_UTR",
"three_prime_UTR",
"pcr_product",
"CDS",
"exon",
"intron",
],
"gtf": ["gene", "mRNA", "CDS", "exon", "start_codon", "stop_codon"],
}
|