File: column.py

package info (click to toggle)
python-cogent 1.4.1-1.2
  • links: PTS, VCS
  • area: non-free
  • in suites: squeeze
  • size: 13,260 kB
  • ctags: 20,087
  • sloc: python: 116,163; ansic: 732; makefile: 74; sh: 9
file content (59 lines) | stat: -rw-r--r-- 1,666 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
#!/usr/bin/env python
#file: column_parser.py
"""Parser for column format

Works for the following column format:
; COL 1             label
; COL 2             residue
; COL 3             seqpos
; COL 4             alignpos
; COL 5             align_bp
; COL 6             certainty/seqpos_bp

Structure part separated by '; ------' and ends with '; ******' 
"""

from string import split
from cogent.struct.rna2d import Pairs
from cogent.struct.pairs_util import adjust_base

__author__ = "Shandy Wikman"
__copyright__ = "Copyright 2007-2009, The Cogent Project"
__contributors__ = ["Shandy Wikman"]
__license__ = "GPL"
__version__ = "1.4.1"
__maintainer__ = "Shandy Wikman"
__email__ = "ens01svn@cs.umu.se"
__status__ = "Development"

def column_parser(lines):
    """Parser column format"""

    record = False
    result = []
    struct = []
    seq = ''
    for line in lines:
        if line.startswith('; ------'): #structure part beginns
            record = True
            continue
        if line.startswith('; ******'): #structure part ends
            record = False
            struct =  adjust_base(struct,-1)
            struct = Pairs(struct).directed()#remove duplicates
            struct.sort()

            result.append([seq,struct])
            struct = []
            seq = ''
            continue
        if record:
            sline = line.split()
            if sline[4] == '.': #skip not paired
                seq = ''.join([seq,sline[1]])
                continue
            seq = ''.join([seq,sline[1]])
            pair = (int(sline[3]),int(sline[4])) #(alignpos,align_bp)
            struct.append(pair)
        
    return result