1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164
|
from gffutils import parser, feature, helpers, constants
def test_feature_from_line():
# spaces and tabs should give identical results
line1 = "chr2L FlyBase exon 7529 8116 . + . Name=CG11023:1;Parent=FBtr0300689,FBtr0300690"
line2 = "chr2L FlyBase exon 7529 8116 . + . Name=CG11023:1;Parent=FBtr0300689,FBtr0300690"
assert feature.feature_from_line(
line1, strict=False, keep_order=True
) == feature.feature_from_line(line2, strict=False, keep_order=True)
def test_default_feature():
# Default Feature is 8 tab-delimited ".", with a trailing tab
assert str(feature.Feature()) == ". . . . . . . . "
def test_attributes_representations():
# These different ways of supplying attributes should yield identical
# results:
s = ". . . . . . . . ID=asdf"
for item in ('{"ID": ["asdf"]}', dict(ID=["asdf"]), "ID=asdf"):
result = str(feature.Feature(attributes=item))
assert result == s, result
def test_default_start_stop():
# Whether start or end is "." or None, attribute should always be None and
# printing should show "."
c = [".", None]
for i1 in c:
for i2 in c:
f = feature.Feature(start=i1, end=i2)
assert f.start is None
assert f.end is None
assert f.stop is None
assert str(f) == ". . . . . . . . ", str(f)
# Make sure zero works (protects against sloppy "if start:")
f = feature.Feature(start=0, end=0)
assert f.start == f.end == f.stop == 0
assert str(f) == ". . . 0 0 . . . ", str(f)
def test_aliases():
line = "chr2L FlyBase exon 7529 8116 . + . Name=CG11023:1;Parent=FBtr0300689,FBtr0300690"
f = feature.feature_from_line(line, keep_order=True)
assert f.chrom == "chr2L" == f.seqid
assert f.end == 8116 == f.stop
f.chrom = "fake"
f.stop = 1
assert f.chrom == "fake" == f.seqid
assert f.stop == 1 == f.end
def test_string_representation():
line = "chr2L FlyBase exon 7529 8116 . + . Name=CG11023:1;Parent=FBtr0300689,FBtr0300690"
f = feature.feature_from_line(line, keep_order=True)
assert line == str(f), str(f)
line = "chr2L FlyBase exon 7529 8116 . + . Name=CG11023:1;Parent=FBtr0300689,FBtr0300690 some more stuff"
f = feature.feature_from_line(line, keep_order=True)
assert line == str(f)
def test_pbt_interval_conversion():
try:
import pybedtools
except ImportError:
return
line = "chr2L FlyBase exon 7529 8116 . + . Name=CG11023:1;Parent=FBtr0300689,FBtr0300690"
f = feature.feature_from_line(line, strict=False, keep_order=True)
pbt = helpers.asinterval(f)
assert pbt.chrom == f.chrom == f.seqid
assert pbt.start == f.start - 1
assert pbt.stop == f.stop == f.end
pn = pbt.name
fn = f.attributes["Name"][0]
assert pn == fn, "%s, %s" % (pn, fn)
def test_hash():
line = "chr2L FlyBase exon 7529 8116 . + . Name=CG11023:1;Parent=FBtr0300689,FBtr0300690 some more stuff"
f = feature.feature_from_line(line, keep_order=True)
assert hash(f) == hash(line)
def test_repr():
line = "chr2L FlyBase exon 7529 8116 . + . Name=CG11023:1;Parent=FBtr0300689,FBtr0300690 some more stuff"
f = feature.feature_from_line(line, keep_order=True)
print(repr(f))
print(hex(id(f)))
assert repr(f) == ("<Feature exon (chr2L:7529-8116[+]) at %s>" % hex(id(f)))
def test_attribute_order():
# default order is gene_id, transcript_id. But feature_from_line -- if
# dialect not provided -- will infer its own dialect. In this case,
# transcript_id comes first.
attributes = 'transcript_id "mRNA1"; gene_id "gene1";'
a = feature.feature_from_line(
"""
chr1 . mRNA 1 100 . + . %s
"""
% attributes,
strict=False,
keep_order=True,
)
a.strict = True
a.keep_order = True
assert (
str(a) == 'chr1 . mRNA 1 100 . + . transcript_id "mRNA1"; gene_id "gene1";'
), str(a)
# ensure that using the default dialect uses the default order (and
# indidentally converts to GFF3 format)
orig_dialect = a.dialect
a.dialect = constants.dialect
a.keep_order = True
assert str(a) == "chr1 . mRNA 1 100 . + . gene_id=gene1;transcript_id=mRNA1", str(a)
# adding an attribute shoud always result in that attribute coming last (as
# long as that attribute is not in the dialect order)
a["dummy"] = ["asdf"]
a.strict = True
assert (
str(a) == "chr1 . mRNA 1 100 . + . gene_id=gene1;transcript_id=mRNA1;dummy=asdf"
), str(a)
def test_unjsonify():
attributes, dialect = parser._split_keyvals('transcript_id "mRNA1"')
assert attributes == {"transcript_id": ["mRNA1"]}, attributes
s = helpers._jsonify(attributes)
assert s == '{"transcript_id":["mRNA1"]}', s
d = helpers._unjsonify(s, isattributes=True)
assert d == attributes
class IsolatedTestCase(object):
"""
Isolated test case for checking that the module-level
constants.always_return_list works.
This was needed because having this test as a function caused other tests
to fail even though constants.always_return_list was put back to its
original setting. Apparently nose runs tests concurrently in the same
namespace or something? Anyway, these setup/teardowns do the trick.
"""
def setup_method(self):
constants.always_return_list = False
def teardown_method(self):
constants.always_return_list = True
def test_feature_single_item(self):
line = "chr2L FlyBase exon 7529 8116 . + . Name=CG11023:1;Parent=FBtr0300689,FBtr0300690 some more stuff"
f = feature.feature_from_line(line, keep_order=True)
assert f["Name"] == ["CG11023:1"]
|