File: correct_head.py

package info (click to toggle)
hinge 0.5.0-8
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 2,972 kB
  • sloc: cpp: 9,480; ansic: 8,826; python: 5,023; sh: 340; makefile: 10
file content (42 lines) | stat: -rwxr-xr-x 1,237 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
#!/usr/bin/python3

import sys, os
from pbcore.io import FastaIO

def run(reader, writer, lookupfile):
    with open (lookupfile,'w') as f:
        for i,record in enumerate(reader):
            seq_length = len(record.sequence)

            zmw = i+1
            old_header=record.header

            if seq_length < 30:
                new_header = 'Deleted'
                f.write(old_header+'\t'+new_header+'\n')
                continue
            #bounds = record.header.split('/')[-1]
            #start, end = [int(k) for k in bounds.split('_')]
            start = 0
            new_end = start + seq_length

            new_header = "m000_000/{zmw}/{start}_{end}".format(zmw=zmw, start=start, end=new_end)
            f.write(old_header+'\t'+new_header+'\n')

            writer.writeRecord(new_header, record.sequence)

def main(iname, ofile, lookupfile):
    reader = FastaIO.FastaReader(iname)
    writer = FastaIO.FastaWriter(ofile)
    run(reader, writer,lookupfile)

if __name__ == '__main__':
    iname, oname, lookupfile = sys.argv[1:4]
    ofile = open(oname, 'w')
    try:
        main(iname, ofile, lookupfile)
    except:
        # clean up (for make)
        ofile.close()
        os.unlink(oname)
        raise