1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90
|
import pyabpoa as pa
#@parameters of msa_aligner:
# aln_mode='g' # g: global, l: local, e: extension
# is_aa=False # set as True if input is amino acid sequence
# score_matrix='' # file of score matrix, e.g. HOXD70.mtx/BLOSUM62.mtx
# match=2
# mismatch=4
# gap_open1=4
# gap_open2=24
# gap_ext1=2
# gap_ext2=1
# extra_b = 10 # 1st part of extra band, -1 to disable banded DP
# extra_f = 0.01 # 2nd part of eatra band. w = extra_b+extra_f*L (L is sequence length)
# max_n_cons=1 # to output at most N cons, set max_n_cons as N
# min_freq=0.3 # minimum frequence of each consensus to output for diploid data
# construct msa aligner
a = pa.msa_aligner()
print("==== First exmaple: 2 consensus sequences ====\n")
# for multiple consensus
seqs=[
'CGATCGATCGATCGATGCATGCATCGATGCATCGATCGATGCATGCAT',
'CGATCGATCGATAAAAAAAAAAAAAAAAAAACGATGCATGCATCGATGCATCGATCGATGCATGCAT',
'CGATCGATCGATCGATGCATGCATCGATGCATCGATCGATGCATGCAT',
'CGATCGATCGATCGATGCATGCATCGATGCATCGATCGATGCATGCAT',
'CGATCGATCGATAAAAAAAAAAAAAAAAAAACGATGCATGCATCGATGCATCGATCGATGCATGCAT',
'CGATCGATCGATAAAAAAAAAAAAAAAAAAACGATGCATGCATCGATGCATCGATCGATGCATGCAT',
'CGATCGATCGATAAAAAAAAAAAAAAAAAAACGATGCATGCATCGATGCATCGATCGATGCATGCAT',
'CGATCGATCGATCGATGCATGCATCGATGCATCGATCGATGCATGCAT',
'CGATCGATCGATCGATGCATGCATCGATGCATCGATCGATGCATGCAT',
'CGATCGATCGATCGATGCATGCATCGATGCATCGATCGATGCATGCAT'
]
#@parameters of msa
#seqs: multiple sequences
out_cons=True # generate consensus sequence, set as False to disable
out_msa=True # generate row-column multiple sequence alignment, set as False to disable
#out_pog="example1.png" # generate plot of alignment graph, set None to disable, require `dot` to be installed
max_n_cons = 2
# multiple sequence alignment for 'seqs'
res=a.msa(seqs, out_cons=out_cons, out_msa=out_msa, max_n_cons=max_n_cons) #, out_pog=out_pog)
# output result
if out_cons:
for i in range(res.n_cons):
print(">Consensus_sequence_{}".format(i+1))
print(res.cons_seq[i])
if out_msa:
res.print_msa()
print("\n\n==== Second exmaple: 1 consensus sequence ====\n")
seqs=[
'CGTCAATCTATCGAAGCATACGCGGGCAGAGCCGAAGACCTCGGCAATCCA',
'CCACGTCAATCTATCGAAGCATACGCGGCAGCCGAACTCGACCTCGGCAATCAC',
'CGTCAATCTATCGAAGCATACGCGGCAGAGCCCGGAAGACCTCGGCAATCAC',
'CGTCAATGCTAGTCGAAGCAGCTGCGGCAGAGCCGAAGACCTCGGCAATCAC',
'CGTCAATCTATCGAAGCATTCTACGCGGCAGAGCCGACCTCGGCAATCAC',
'CGTCAATCTAGAAGCATACGCGGCAAGAGCCGAAGACCTCGGCCAATCAC',
'CGTCAATCTATCGGTAAAGCATACGCTCTGTAGCCGAAGACCTCGGCAATCAC',
'CGTCAATCTATCTTCAAGCATACGCGGCAGAGCCGAAGACCTCGGCAATC',
'CGTCAATGGATCGAGTACGCGGCAGAGCCGAAGACCTCGGCAATCAC',
'CGTCAATCTAATCGAAGCATACGCGGCAGAGCCGTCTACCTCGGCAATCACGT'
]
#@parameters of msa
#seqs: multiple sequences
out_cons=True # generate consensus sequence, set as False to disable
out_msa=True # generate row-column multiple sequence alignment, set as False to disable
# out_pog="example2.png" # generate plot of alignment graph, set None to disable
max_n_cons = 1
# multiple sequence alignment for 'seqs'
res=a.msa(seqs, out_cons=out_cons, out_msa=out_msa, max_n_cons=max_n_cons) #, out_pog=out_pog)
# output result
if out_cons:
for i in range(res.n_cons):
print(">Consensus_sequence_{}".format(i+1))
print(res.cons_seq[i])
if out_msa:
for i in range(res.n_seq):
print(">Seq_{}".format(i+1))
print(res.msa_seq[i])
for i in range(res.n_cons):
print(">Consensus_sequence_{}".format(i+1))
print(res.msa_seq[res.n_seq+i])
|