File: example.py

package info (click to toggle)
abpoa 1.5.6-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 1,060 kB
  • sloc: ansic: 8,434; sh: 762; makefile: 194; python: 115
file content (90 lines) | stat: -rw-r--r-- 3,463 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
import pyabpoa as pa

#@parameters of msa_aligner:
#   aln_mode='g' # g: global, l: local, e: extension
#   is_aa=False # set as True if input is amino acid sequence
#   score_matrix='' # file of score matrix, e.g. HOXD70.mtx/BLOSUM62.mtx
#   match=2
#   mismatch=4
#   gap_open1=4
#   gap_open2=24
#   gap_ext1=2
#   gap_ext2=1
#   extra_b = 10 # 1st part of extra band, -1 to disable banded DP
#   extra_f = 0.01  # 2nd part of eatra band. w = extra_b+extra_f*L (L is sequence length)
#   max_n_cons=1 # to output at most N cons, set max_n_cons as N
#   min_freq=0.3 # minimum frequence of each consensus to output for diploid data

# construct msa aligner
a = pa.msa_aligner()

print("==== First exmaple: 2 consensus sequences ====\n")
# for multiple consensus
seqs=[
 'CGATCGATCGATCGATGCATGCATCGATGCATCGATCGATGCATGCAT',
 'CGATCGATCGATAAAAAAAAAAAAAAAAAAACGATGCATGCATCGATGCATCGATCGATGCATGCAT',
 'CGATCGATCGATCGATGCATGCATCGATGCATCGATCGATGCATGCAT',
 'CGATCGATCGATCGATGCATGCATCGATGCATCGATCGATGCATGCAT',
 'CGATCGATCGATAAAAAAAAAAAAAAAAAAACGATGCATGCATCGATGCATCGATCGATGCATGCAT',
 'CGATCGATCGATAAAAAAAAAAAAAAAAAAACGATGCATGCATCGATGCATCGATCGATGCATGCAT',
 'CGATCGATCGATAAAAAAAAAAAAAAAAAAACGATGCATGCATCGATGCATCGATCGATGCATGCAT',
 'CGATCGATCGATCGATGCATGCATCGATGCATCGATCGATGCATGCAT',
 'CGATCGATCGATCGATGCATGCATCGATGCATCGATCGATGCATGCAT',
 'CGATCGATCGATCGATGCATGCATCGATGCATCGATCGATGCATGCAT'
 ]

#@parameters of msa
#seqs: multiple sequences
out_cons=True # generate consensus sequence, set as False to disable
out_msa=True # generate row-column multiple sequence alignment, set as False to disable
#out_pog="example1.png" # generate plot of alignment graph, set None to disable, require `dot` to be installed
max_n_cons = 2

# multiple sequence alignment for 'seqs'
res=a.msa(seqs, out_cons=out_cons, out_msa=out_msa, max_n_cons=max_n_cons) #, out_pog=out_pog)

# output result
if out_cons:
    for i in range(res.n_cons):
        print(">Consensus_sequence_{}".format(i+1))
        print(res.cons_seq[i])
if out_msa:
    res.print_msa()


print("\n\n==== Second exmaple: 1 consensus sequence ====\n")
seqs=[
'CGTCAATCTATCGAAGCATACGCGGGCAGAGCCGAAGACCTCGGCAATCCA',
'CCACGTCAATCTATCGAAGCATACGCGGCAGCCGAACTCGACCTCGGCAATCAC',
'CGTCAATCTATCGAAGCATACGCGGCAGAGCCCGGAAGACCTCGGCAATCAC',
'CGTCAATGCTAGTCGAAGCAGCTGCGGCAGAGCCGAAGACCTCGGCAATCAC',
'CGTCAATCTATCGAAGCATTCTACGCGGCAGAGCCGACCTCGGCAATCAC',
'CGTCAATCTAGAAGCATACGCGGCAAGAGCCGAAGACCTCGGCCAATCAC',
'CGTCAATCTATCGGTAAAGCATACGCTCTGTAGCCGAAGACCTCGGCAATCAC',
'CGTCAATCTATCTTCAAGCATACGCGGCAGAGCCGAAGACCTCGGCAATC',
'CGTCAATGGATCGAGTACGCGGCAGAGCCGAAGACCTCGGCAATCAC',
'CGTCAATCTAATCGAAGCATACGCGGCAGAGCCGTCTACCTCGGCAATCACGT'
]

#@parameters of msa
#seqs: multiple sequences
out_cons=True # generate consensus sequence, set as False to disable
out_msa=True # generate row-column multiple sequence alignment, set as False to disable
# out_pog="example2.png" # generate plot of alignment graph, set None to disable
max_n_cons = 1

# multiple sequence alignment for 'seqs'
res=a.msa(seqs, out_cons=out_cons, out_msa=out_msa, max_n_cons=max_n_cons) #, out_pog=out_pog)

# output result
if out_cons:
    for i in range(res.n_cons):
        print(">Consensus_sequence_{}".format(i+1))
        print(res.cons_seq[i])
if out_msa:
    for i in range(res.n_seq):
        print(">Seq_{}".format(i+1))
        print(res.msa_seq[i])
    for i in range(res.n_cons):
        print(">Consensus_sequence_{}".format(i+1))
        print(res.msa_seq[res.n_seq+i])