File: txt2gff.py

package info (click to toggle)
spades 3.13.1+dfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: bullseye, sid
  • size: 22,172 kB
  • sloc: cpp: 136,213; ansic: 48,218; python: 16,809; perl: 4,252; sh: 2,115; java: 890; makefile: 507; pascal: 348; xml: 303
file content (62 lines) | stat: -rwxr-xr-x 1,823 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
#!/usr/bin/python3

############################################################################
# Copyright (c) 2015 Saint Petersburg State University
# Copyright (c) 2011-2014 Saint Petersburg Academic University
# All Rights Reserved
# See file LICENSE for details.
############################################################################


import os
import shutil
import sys

if len(sys.argv) != 3:
    print ("Converter for genes and operons files from .txt to .gff format")
    print ("Usage: " + sys.argv[0] + "  <input .txt file>  <keyword: either 'gene' or 'operon'>")
    sys.exit(1)

input_filename = sys.argv[1]
keyword = sys.argv[2]

if not os.path.exists(input_filename):
    print ("Specified file not found! Exiting...")
    sys.exit(1)

basename = os.path.splitext(input_filename)[0]
output_filename = basename + ".gff"

if os.path.exists(output_filename):
    print ("Output file (" + output_filename + ") already exists! Exiting...")
    sys.exit(1)

out = open(output_filename, 'w')
out.write("##gff-version	2\n")
for line in open(input_filename, 'r'):

    # EXAMPLE:
    # gi|48994873|gb|U00096.2|	1	4263805	4264884
    # gi|48994873|gb|U00096.2|	2	795085	795774 

    sections = line.split()
    if len(sections) != 4:
        continue
    ref_id  = sections[0]
    gene_id = sections[1]
    start   = int(sections[2])
    end     = int(sections[3])
    
    # EXAMPLE:    
    # ctg123 . gene            1000  9000  .  +  .  ID=gene00001;Name=EDEN

    out.write(ref_id + '\t' + '.' + '\t' + keyword + '\t')
    out.write(str(min(start, end)) + '\t' + str(max(start, end)) + '\t' + '.' + '\t')
    if start < end:
        out.write('+')
    else:
        out.write('-')
    out.write('\t' + '.' + '\t' + 'ID=' + gene_id + '\n')

out.close()
print ("Conversion finished! See " + output_filename)