File: gff.py

package info (click to toggle)
python-cogent 1.4.1-1.2
  • links: PTS, VCS
  • area: non-free
  • in suites: squeeze
  • size: 13,260 kB
  • ctags: 20,087
  • sloc: python: 116,163; ansic: 732; makefile: 74; sh: 9
file content (57 lines) | stat: -rw-r--r-- 1,929 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
#!/usr/bin/env python

__author__ = "Peter Maxwell"
__copyright__ = "Copyright 2007-2009, The Cogent Project"
__credits__ = ["Peter Maxwell", "Matthew Wakefield", "Gavin Huttley"]
__license__ = "GPL"
__version__ = "1.4.1"
__maintainer__ = "Peter Maxwell"
__email__ = "pm67nz@gmail.com"
__status__ = "Production"

def GffParser(f):
    assert not isinstance(f, str)
    for line in f:
        # comments and blank lines
        if "#" in line:
            (line, comments) = line.split("#", 1)
        else:
            comments = None
        line = line.strip()
        if not line:
            continue
        
        # parse columns
        cols = line.split('\t')
        if len(cols) == 8:
            cols.append('')
        assert len(cols) == 9, line
        (seqname, source, feature, start, end, score,
                strand, frame, attributes) = cols
        
        # adjust for python 0-based indexing etc.
        (start, end) = (int(start) - 1, int(end))
        # start is always meant to be less than end in GFF
        # and in v 2.0, features that extend beyond sequence have negative
        # indices
        if start < 0 or end < 0:
            start, end = abs(start), abs(end)
            if start > end:
                start, end = end, start
        
        # but we use reversal of indices when the feature is on the opposite
        # strand
        if strand == '-':
            (start, end) = (end, start)
        
        # should parse attributes too
        yield (seqname, source, feature, start, end, score,
                strand, frame, attributes, comments)

def parse_attributes(attribute_string):
    """Returns region of attribute string between first pair of double quotes"""
    attribute_string = attribute_string[attribute_string.find('"')+1:]
    if '"' in attribute_string:
        attribute_string = attribute_string[:attribute_string.find('"')]
    return attribute_string