File: cabpoa.pxd

package info (click to toggle)
abpoa 1.5.5-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 1,048 kB
  • sloc: ansic: 8,445; sh: 762; makefile: 194; python: 123
file content (184 lines) | stat: -rw-r--r-- 6,982 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
from libc.stdint cimport int8_t, uint8_t, int32_t, int64_t, uint32_t, uint64_t
from libc.stdio cimport FILE

cdef extern from "simd_instruction.h":
    int simd_check()

cdef extern from "abpoa.h":
    cdef int ABPOA_GLOBAL_MODE "ABPOA_GLOBAL_MODE"
    cdef int ABPOA_LOCAL_MODE "ABPOA_LOCAL_MODE"
    cdef int ABPOA_EXTEND_MODE "ABPOA_EXTEND_MODE"

    # gap mode
    cdef int ABPOA_LINEAR_GAP "ABPOA_LINEAR_GAP"
    cdef int ABPOA_AFFINE_GAP "ABPOA_AFFINE_GAP"
    cdef int ABPOA_CONVEX_GAP "ABPOA_CONVEX_GAP"

    cdef int ABPOA_EXTRA_B "ABPOA_EXTRA_B"
    cdef float ABPOA_EXTRA_F "ABPOA_EXTRA_F"

    cdef char *ABPOA_CIGAR_STR "ABPOA_CIGAR_STR"
    cdef int ABPOA_CMATCH "ABPOA_CMATCH"
    cdef int ABPOA_CINS "ABPOA_CINS"
    cdef int ABPOA_CDEL "ABPOA_CDEL"      
    cdef int ABPOA_CDIFF "ABPOA_CDIFF"     
    cdef int ABPOA_CSOFT_CLIP "ABPOA_CSOFT_CLIP"
    cdef int ABPOA_CHARD_CLIP "ABPOA_CHARD_CLIP"

    cdef int ABPOA_SRC_NODE_ID "ABPOA_SRC_NODE_ID"
    cdef int ABPOA_SINK_NODE_ID "ABPOA_SINK_NODE_ID"

    cdef int ABPOA_OUT_CONS "ABPOA_OUT_CONS"
    cdef int ABPOA_OUT_MSA "ABPOA_OUT_MSA"
    cdef int ABPOA_OUT_CONS_MSA "ABPOA_OUT_CONS_MSA"
    cdef int ABPOA_OUT_GFA "ABPOA_OUT_GFA"
    cdef int ABPOA_OUT_CONS_GFA "ABPOA_OUT_CONS_GFA"

    cdef int ABPOA_HB "ABPOA_HB"
    cdef int ABPOA_MF "ABPOA_MF"

    ctypedef struct abpoa_res_t:
        int n_cigar, m_cigar
        uint64_t *graph_cigar
        int node_s, node_e, query_s, query_e # for local and  extension mode
        int n_aln_bases, n_matched_bases
        uint32_t best_score


    ctypedef struct abpoa_para_t:
        int m
        int *mat # score matrix
        char *mat_fn
        int use_score_matrix
        int match, max_mat, mismatch, min_mis, gap_open1, gap_open2, gap_ext1, gap_ext2
        int inf_min
        int k, w, min_w
        int wb # 1st part of extra band width
        float wf # 2nd part of extra band width. w=wb+wf*L (L is sequence length)
        int zdrop, end_bonus # from minimap2
        # int simd_flag # available SIMD instruction
        # alignment mode
        uint8_t ret_cigar, rev_cigar, out_msa, out_cons, out_gfa, out_fq, use_read_ids, amb_strand # mode: 0: global, 1: local, 2: extend
        uint8_t sub_aln, use_qv, disable_seeding, progressive_poa
        char *incr_fn
        char *out_pog
        int align_mode, gap_mode, max_n_cons, cons_algrm
        double min_freq # for diploid data
        int verbose


    ctypedef struct abpoa_node_t:
        int node_id
        int in_edge_n, in_edge_m
        int *in_id
        int out_edge_n, out_edge_m
        int *out_id
        int *out_weight
        int *read_weight
        int n_read, m_read, n_span_read
        uint64_t **read_ids
        int read_ids_n # for diploid
        int aligned_node_n, aligned_node_m
        int *aligned_node_id # mismatch; aligned node will have same rank
        uint8_t base # 0~m

    ctypedef struct abpoa_graph_t:
        abpoa_node_t *node
        int node_n, node_m, index_rank_m
        int *index_to_node_id
        int *node_id_to_index 
        int *node_id_to_max_pos_left
        int *node_id_to_max_pos_right
        int *node_id_to_max_remain
        int *node_id_to_msa_rank
        uint8_t is_topological_sorted, is_called_cons, is_set_msa_rank

    ctypedef struct abpoa_cons_t:
        int n_cons, n_seq, msa_len
        int *clu_n_seq
        int **clu_read_ids
        int *cons_len
        int **cons_node_ids
        uint8_t **cons_base
        uint8_t **msa_base
        int **cons_cov
        int **cons_phred_score;

    ctypedef struct abpoa_str_t:
        int l, m
        char *s

    ctypedef struct abpoa_seq_t:
        int n_seq, m_seq
        abpoa_str_t *seq
        abpoa_str_t *name
        abpoa_str_t *comment
        abpoa_str_t *qual
        uint8_t *is_rc

    ctypedef struct abpoa_simd_matrix_t:
        pass
    
    ctypedef struct abpoa_t:
        abpoa_graph_t *abg
        abpoa_seq_t *abs
        abpoa_simd_matrix_t *abm
        abpoa_cons_t *abc

    # init for abpoa parameters
    abpoa_para_t *abpoa_init_para()
    void abpoa_set_mat_from_file(abpoa_para_t *abpt, char *mtx_fn)
    void abpoa_post_set_para(abpoa_para_t *abpt)
    void abpoa_free_para(abpoa_para_t *abpt)


    # init for alignment
    abpoa_t *abpoa_init()
    void abpoa_free(abpoa_t *ab)

    # do msa for a set of input sequences
    int abpoa_msa(abpoa_t *ab, abpoa_para_t *abpt, int n_seqs, char **seq_names, int *seq_lens, uint8_t **seqs, int ** qual_weights, FILE *out_fp)
    int abpoa_msa1(abpoa_t *ab, abpoa_para_t *abpt, char *read_fn, FILE *out_fp)

    # clean alignment graph
    void abpoa_reset(abpoa_t *ab, abpoa_para_t *abpt, int qlen)

    # restore graph from GFA/MSA file
    abpoa_t *abpoa_restore_graph(abpoa_t *ab, abpoa_para_t *abpt)

    # align a sequence to a graph
    int abpoa_align_sequence_to_graph(abpoa_t *ab, abpoa_para_t *abpt, uint8_t *query, int qlen, abpoa_res_t *res)

    # align to sub-graph
    void abpoa_subgraph_nodes(abpoa_t *ab, abpoa_para_t *abpt, int inc_beg, int inc_end, int *exc_beg, int *exc_end)
    int abpoa_align_sequence_to_subgraph(abpoa_t *ab, abpoa_para_t *abpt, int beg_node_id, int end_node_id, uint8_t *query, int qlen, abpoa_res_t *res)

    # add an alignment to a graph
    int abpoa_add_graph_node(abpoa_graph_t *abg, uint8_t base)
    void abpoa_add_graph_edge(abpoa_graph_t *abg, int from_id, int to_id, int check_edge, int w, uint8_t add_read_id, uint8_t add_read_weight, int read_id, int read_ids_n, int tot_read_n)
    int abpoa_add_graph_alignment(abpoa_t *ab, abpoa_para_t *abpt, uint8_t *query, int *weight, int qlen, int *qpos_to_node_id, abpoa_res_t res, int read_id, int tot_read_n, int inc_both_ends)
    int abpoa_add_subgraph_alignment(abpoa_t *ab, abpoa_para_t *abpt, int beg_node_id, int end_node_id, uint8_t *query, int *weight, int qlen, int *qpos_to_node_id, abpoa_res_t res, int read_id, int tot_read_n, int inc_both_ends)

    void abpoa_BFS_set_node_index(abpoa_graph_t *abg, int src_id, int sink_id)
    void abpoa_BFS_set_node_remain(abpoa_graph_t *abg, int src_id, int sink_id)
    void abpoa_topological_sort(abpoa_graph_t *abg, abpoa_para_t *abpt)

    # generate consensus sequence from graph
    # para:
    #   out_fp: consensus sequence output in FASTA format, set as NULL to disable
    void abpoa_generate_consensus(abpoa_t *ab, abpoa_para_t *abpt)
    void abpoa_output_fx_consensus(abpoa_t *ab, abpoa_para_t *abpt, FILE *out_fp)


    # generate column multiple sequence alignment from graph
    void abpoa_generate_rc_msa(abpoa_t *ab, abpoa_para_t *abpt)
    void abpoa_output_rc_msa(abpoa_t *ab, abpoa_para_t *abpt, FILE *out_fp)

    # generate full graph in GFA format
    void abpoa_generate_gfa(abpoa_t *ab, abpoa_para_t *abpt, FILE *out_fp)

    # output to out_fp
    void abpoa_output(abpoa_t *ab, abpoa_para_t *abpt, FILE *out_fp)

    # generate DOT graph plot 
    void abpoa_dump_pog(abpoa_t *ab, abpoa_para_t *abpt)