File: get_up_prot_iso_sql.py

package info (click to toggle)
fasta3 36.3.8i.14-Nov-2020-3
  • links: PTS, VCS
  • area: main
  • in suites: sid, trixie
  • size: 7,016 kB
  • sloc: ansic: 77,269; perl: 10,677; python: 2,461; sh: 428; csh: 86; sql: 55; makefile: 40
file content (46 lines) | stat: -rwxr-xr-x 1,188 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
#!/usr/bin/python3

import sys
import re
import textwrap
import argparse
import MySQLdb.cursors

db = MySQLdb.connect(db='uniprot', host='wrpxdb.bioch.virginia.edu', user='web_user', passwd='fasta_www',
                     cursorclass=MySQLdb.cursors.DictCursor)

cur1 = db.cursor()
cur2 = db.cursor()
get_iso_acc='select acc from annot2_iso where prim_acc="%s"'
get_fasta_info='select db, acc, id, descr, seq from annot2 join protein using(acc) where acc="%s"'
get_iso_fasta_info='select db, acc, id, descr, seq from annot2_iso join protein_iso using(acc) where prim_acc="%s"'

fasta_seqs=[]

for acc in sys.argv[1:]:

  if (re.search(r':',acc)):
    (acc, sub_range) = acc.split(':')

  if (re.match(r'^(sp|tr|iso|ref)\|',acc)):
      acc=acc.split('|')[1]

  cur1.execute(get_fasta_info%(acc,))
  row = cur1.fetchone()
  if (row):
    fasta_seqs.append(row)
  else:
    sys.stderr.write("***error*** %s sequence not found\n"%(acc))
    continue

  cur2.execute(get_iso_fasta_info%(acc,))
  for row in cur2:
    fasta_seqs.append(row)

  for row in fasta_seqs:
    print(">%s|%s|%s %s"%(row['db'],row['acc'],row['id'],row['descr']))
    print('\n'.join(textwrap.wrap(row['seq'])))