File: feature_test.py

package info (click to toggle)
python-gffutils 0.13-4
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 10,164 kB
  • sloc: python: 5,557; makefile: 62; sh: 13
file content (164 lines) | stat: -rw-r--r-- 5,759 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
from gffutils import parser, feature, helpers, constants


def test_feature_from_line():
    # spaces and tabs should give identical results
    line1 = "chr2L	FlyBase	exon	7529	8116	.	+	.	Name=CG11023:1;Parent=FBtr0300689,FBtr0300690"
    line2 = "chr2L FlyBase exon 7529 8116 . + . Name=CG11023:1;Parent=FBtr0300689,FBtr0300690"
    assert feature.feature_from_line(
        line1, strict=False, keep_order=True
    ) == feature.feature_from_line(line2, strict=False, keep_order=True)


def test_default_feature():
    # Default Feature is 8 tab-delimited ".", with a trailing tab
    assert str(feature.Feature()) == ".	.	.	.	.	.	.	.	"


def test_attributes_representations():
    # These different ways of supplying attributes should yield identical
    # results:
    s = ".	.	.	.	.	.	.	.	ID=asdf"
    for item in ('{"ID": ["asdf"]}', dict(ID=["asdf"]), "ID=asdf"):
        result = str(feature.Feature(attributes=item))
        assert result == s, result


def test_default_start_stop():
    # Whether start or end is "." or None, attribute should always be None and
    # printing should show "."
    c = [".", None]
    for i1 in c:
        for i2 in c:
            f = feature.Feature(start=i1, end=i2)
            assert f.start is None
            assert f.end is None
            assert f.stop is None
            assert str(f) == ".	.	.	.	.	.	.	.	", str(f)

    # Make sure zero works (protects against sloppy "if start:")
    f = feature.Feature(start=0, end=0)
    assert f.start == f.end == f.stop == 0
    assert str(f) == ".	.	.	0	0	.	.	.	", str(f)


def test_aliases():
    line = "chr2L	FlyBase	exon	7529	8116	.	+	.	Name=CG11023:1;Parent=FBtr0300689,FBtr0300690"
    f = feature.feature_from_line(line, keep_order=True)
    assert f.chrom == "chr2L" == f.seqid
    assert f.end == 8116 == f.stop

    f.chrom = "fake"
    f.stop = 1
    assert f.chrom == "fake" == f.seqid
    assert f.stop == 1 == f.end


def test_string_representation():
    line = "chr2L	FlyBase	exon	7529	8116	.	+	.	Name=CG11023:1;Parent=FBtr0300689,FBtr0300690"
    f = feature.feature_from_line(line, keep_order=True)
    assert line == str(f), str(f)

    line = "chr2L	FlyBase	exon	7529	8116	.	+	.	Name=CG11023:1;Parent=FBtr0300689,FBtr0300690	some	more	stuff"
    f = feature.feature_from_line(line, keep_order=True)
    assert line == str(f)


def test_pbt_interval_conversion():
    try:
        import pybedtools
    except ImportError:
        return
    line = "chr2L FlyBase exon 7529 8116 . + . Name=CG11023:1;Parent=FBtr0300689,FBtr0300690"
    f = feature.feature_from_line(line, strict=False, keep_order=True)
    pbt = helpers.asinterval(f)
    assert pbt.chrom == f.chrom == f.seqid
    assert pbt.start == f.start - 1
    assert pbt.stop == f.stop == f.end
    pn = pbt.name
    fn = f.attributes["Name"][0]
    assert pn == fn, "%s, %s" % (pn, fn)


def test_hash():
    line = "chr2L	FlyBase	exon	7529	8116	.	+	.	Name=CG11023:1;Parent=FBtr0300689,FBtr0300690	some	more	stuff"
    f = feature.feature_from_line(line, keep_order=True)
    assert hash(f) == hash(line)


def test_repr():
    line = "chr2L	FlyBase	exon	7529	8116	.	+	.	Name=CG11023:1;Parent=FBtr0300689,FBtr0300690	some	more	stuff"
    f = feature.feature_from_line(line, keep_order=True)
    print(repr(f))
    print(hex(id(f)))
    assert repr(f) == ("<Feature exon (chr2L:7529-8116[+]) at %s>" % hex(id(f)))


def test_attribute_order():

    # default order is gene_id, transcript_id.  But feature_from_line -- if
    # dialect not provided -- will infer its own dialect.  In this case,
    # transcript_id comes first.
    attributes = 'transcript_id "mRNA1"; gene_id "gene1";'
    a = feature.feature_from_line(
        """
        chr1	.	mRNA	1	100	.	+	.	%s
        """
        % attributes,
        strict=False,
        keep_order=True,
    )
    a.strict = True
    a.keep_order = True
    assert (
        str(a) == 'chr1	.	mRNA	1	100	.	+	.	transcript_id "mRNA1"; gene_id "gene1";'
    ), str(a)

    # ensure that using the default dialect uses the default order (and
    # indidentally converts to GFF3 format)
    orig_dialect = a.dialect
    a.dialect = constants.dialect
    a.keep_order = True
    assert str(a) == "chr1	.	mRNA	1	100	.	+	.	gene_id=gene1;transcript_id=mRNA1", str(a)

    # adding an attribute shoud always result in that attribute coming last (as
    # long as that attribute is not in the dialect order)
    a["dummy"] = ["asdf"]
    a.strict = True
    assert (
        str(a) == "chr1	.	mRNA	1	100	.	+	.	gene_id=gene1;transcript_id=mRNA1;dummy=asdf"
    ), str(a)


def test_unjsonify():
    attributes, dialect = parser._split_keyvals('transcript_id "mRNA1"')
    assert attributes == {"transcript_id": ["mRNA1"]}, attributes

    s = helpers._jsonify(attributes)
    assert s == '{"transcript_id":["mRNA1"]}', s

    d = helpers._unjsonify(s, isattributes=True)
    assert d == attributes


class IsolatedTestCase(object):
    """
    Isolated test case for checking that the module-level
    constants.always_return_list works.

    This was needed because having this test as a function caused other tests
    to fail even though constants.always_return_list was put back to its
    original setting.  Apparently nose runs tests concurrently in the same
    namespace or something?  Anyway, these setup/teardowns do the trick.
    """

    def setup_method(self):
        constants.always_return_list = False

    def teardown_method(self):
        constants.always_return_list = True

    def test_feature_single_item(self):
        line = "chr2L	FlyBase	exon	7529	8116	.	+	.	Name=CG11023:1;Parent=FBtr0300689,FBtr0300690	some	more	stuff"
        f = feature.feature_from_line(line, keep_order=True)
        assert f["Name"] == ["CG11023:1"]