File: maf.h

package info (click to toggle)
phast 1.5%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 13,008 kB
  • sloc: ansic: 54,195; makefile: 358; sh: 337; perl: 321
file content (493 lines) | stat: -rw-r--r-- 29,254 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
/***************************************************************************
 * PHAST: PHylogenetic Analysis with Space/Time models
 * Copyright (c) 2002-2005 University of California, 2006-2010 Cornell 
 * University.  All rights reserved.
 *
 * This source code is distributed under a BSD-style license.  See the
 * file LICENSE.txt for details.
 ***************************************************************************/

/* $Id: maf.h,v 1.7 2009-01-09 22:01:00 mt269 Exp $ */

/** \file maf.h 
    Reading of alignments from MAF ("Multiple Alignment Format")
    files to generate Sufficient Statistics.
    (See http://www.bx.psu.edu/miller_lab.)  These functions are primarily
    concerned with extracting sufficient statistics from a MAF file
    (see sufficient_stats.c).  They avoid representing the alignment
    explicitly, and as a result allow large MAF files (e.g., spanning
    whole mammalian chromosomes) to be read and stored fairly
    efficiently.  A "reference sequence" alignment is currently
    assumed, with the reference sequence appearing first in each
    alignment block, and always in the positive (rather than reverse
    complemented) orientation (this is the convention with MULTIZ).
    \ingroup msa
*/

#ifndef MAF_H
#define MAF_H

#include "stdio.h"
#include "msa.h"
#include "hashtable.h"
#include "gff.h"

/** Hold data for a single block within a MAF file */
typedef struct {
  String *text;  /**< Maf Block contents, text[i] contains line i of the block */
  int numline, /**< Number of lines in Maf Block */
  seqlen;      /**< Sequence length of sequence in Maf Block */

  int *specmap;  /**< Maps Maf Block contents (by line) to species.
		      specmap[i]=j if line[i] of block contains information
                    about species j.  If line i isn't species-specific 
		    (ie, the first line), specmap[i] = -1 */
  char **seq;  /**< Sequence data for all species within Maf Block.
		seq[i] points to sequence for species i (pointer to
		  position in text array) */
  char **quality;  /**< Quality scores for all species within Maf Block.
			quality[i] points to string of quality scores for
                      species[i], or NULL if no data */
  char **qline,  /**< Pointer to lines starting with 'q' for each species  */
	**sline, /**< Pointer to lines starting with 's' for each species  */
	**iline, /**< Pointer to lines starting with 'i' for each species  */
	**eline; /**< Pointer to lines starting with 'e' for each species  */


} MAF_BLOCK;
                 
/** @name MAF File reading 
   \{ */
/** Read subset of an Alignment from a MAF file; subset selected by sequence and feature names. 
   @pre The MAF file must be sorted with respect to the reference sequence.  
   @param[in] F MAF file
   @param[in] REFSEQ (Optional) reference sequence.  If
                                        non-NULL, the indicated file will
                                        be used to define bases in regions
                                        of no alignment (not represented in
                                        the MAF).  File format is expected
                                        to be FASTA.  Ignored if
                                        store_order == FALSE.  If NULL and
                                        store_order == TRUE, then bases in
                                        reference seq not present in MAF
                                        are represented as Ns 
   @param[in] tuple_size tuple size for sufficient statistics 
   @param[in] alphabet (Optional) alphabet for alignment; if NULL, DEFAULT_ALPABET is assumed 
   @param[in] gff (Optional) GFF_Set.  If non-NULL,
                                        category-specific counts will be
                                        collected (cm must be non-NULL
                                        also).  The gff is assumed to use
                                        the indexing system of the
                                        reference sequence (sequence 1).
                                        Currently, a non-NULL gff implies
                                        gap_strip_mode == 1 (projection
                                        onto reference sequence).
   @param[in] cm Used for category-specific counts, ignored otherwise
   @param[in] cycle_size Label site categories
    					12...<cycle_size>...12...<cycle_size>
                                        instead of using gff and cm.
                                        Useful when stats are to be
                                        collected for non-overlapping
                                        tuples.  Use -1 to ignore.
   @param[in] store_order Whether to store order in which
                                        tuples occur.  Storing order
                                        requires more memory and larger
                                        files, and is not necessary in many
                                        cases.
   @param[in] reverse_groups  Tag defining groups in gff;
                      indicates groups on negative strand
                      should be reverse complemented.
                      Ignored if NULL.  Useful when
                      collecting counts for
                      strand-specific categories.  Can't
                      be used if store_order == TRUE
   @param[in] gap_strip_mode Gap stripping mode.  Currently,
                      if store_order == 1, may only have
                      value NO_STRIP or 1 (indicating
                      projection onto sequence 1, the
                      reference sequence).  This is
                      simply to avoid some complexity in
                      coordinate mapping. 
   @param[in] keep_overlapping If TRUE, keep overlapping blocks,
                      otherwise keep only first instance.
                      Must be FALSE if store_order ==
                      TRUE or gff != NULL or 
                      cycle_size != -1 
   @param[in] cats_to_do If non-NULL, only loads elements of the
                      alignment with features in this list.  
                      Requires gff != NULL and cm != NULL
   @param[in] seqnames If non-NULL, this is the list of sequence names
                             to keep (if seq_keep==TRUE) or discard 
                             (if seq_keep==FALSE)
   @param[in] seq_keep Only used if seqnames != NULL, determines whether
                             to keep only or discard seqnames
   @note NOTE: for now, if a GFF is defined, then all blocks are projected
     onto the reference seq (gff != NULL -> gap_strip_mode == 1).  This
     simplifies things somewhat, and it's rare that you want
     category-specific counts without projecting (because gaps in the
     reference sequence make it difficult to assign sites to categories
     rationally). 
   @note The alignment won't be constructed explicitly; instead, a sufficient-statistics representation will be extracted directly from the MAF.  
   @warning Any blocks falling out of order, or which are redundant with previous blocks, will be discarded.
 */
MSA *maf_read_cats_subset(FILE *F, FILE *REFSEQF, int tuple_size, char *alphabet,
		   GFF_Set *gff, CategoryMap *cm, int cycle_size, int store_order, 
		   char *reverse_groups, int gap_strip_mode, int keep_overlapping,
			  List* cats_to_do, List *seqnames, int seq_keep);

/** Read a subset of an Alignment from a MAF file subset selected by feature names.
   @pre The MAF file must be sorted with respect to the reference sequence.  
   @param[in] F MAF file
   @param[in] REFSEQ (Optional) reference sequence.  If
                                        non-NULL, the indicated file will
                                        be used to define bases in regions
                                        of no alignment (not represented in
                                        the MAF).  File format is expected
                                        to be FASTA.  Ignored if
                                        store_order == FALSE.  If NULL and
                                        store_order == TRUE, then bases in
                                        reference seq not present in MAF
                                        are represented as Ns 
   @param[in] tuple_size tuple size for sufficient statistics 
   @param[in] alphabet (Optional) alphabet for alignment; if NULL, DEFAULT_ALPABET is assumed 
   @param[in] gff (Optional) GFF_Set.  If non-NULL,
                                        category-specific counts will be
                                        collected (cm must be non-NULL
                                        also).  The gff is assumed to use
                                        the indexing system of the
                                        reference sequence (sequence 1).
                                        Currently, a non-NULL gff implies
                                        gap_strip_mode == 1 (projection
                                        onto reference sequence).
   @param[in] cm Used for category-specific counts, ignored otherwise
   @param[in] cycle_size Label site categories
    					12...<cycle_size>...12...<cycle_size>
                                        instead of using gff and cm.
                                        Useful when stats are to be
                                        collected for non-overlapping
                                        tuples.  Use -1 to ignore.
   @param[in] store_order Whether to store order in which
                                        tuples occur.  Storing order
                                        requires more memory and larger
                                        files, and is not necessary in many
                                        cases.
   @param[in] reverse_groups  Tag defining groups in gff;
                      indicates groups on negative strand
                      should be reverse complemented.
                      Ignored if NULL.  Useful when
                      collecting counts for
                      strand-specific categories.  Can't
                      be used if store_order == TRUE
   @param[in] gap_strip_mode Gap stripping mode.  Currently,
                      if store_order == 1, may only have
                      value NO_STRIP or 1 (indicating
                      projection onto sequence 1, the
                      reference sequence).  This is
                      simply to avoid some complexity in
                      coordinate mapping. 
   @param[in] keep_overlapping If TRUE, keep overlapping blocks,
                      otherwise keep only first instance.
                      Must be FALSE if store_order ==
                      TRUE or gff != NULL or 
                      cycle_size != -1 
   @param[in] cats_to_do If non-NULL, only loads elements of the
                      alignment with features in this list.  
                      Requires gff != NULL and cm != NULL
   @note NOTE: for now, if a GFF is defined, then all blocks are projected
     onto the reference seq (gff != NULL -> gap_strip_mode == 1).  This
     simplifies things somewhat, and it's rare that you want
     category-specific counts without projecting (because gaps in the
     reference sequence make it difficult to assign sites to categories
     rationally). 
   @note The alignment won't be constructed explicitly; instead, a sufficient-statistics representation will be extracted directly from the MAF.  
   @warning Any blocks falling out of order, or which are redundant with previous blocks, will be discarded.
 */
MSA *maf_read_cats(FILE *F, FILE *REFSEQF, int tuple_size, char *alphabet,
		   GFF_Set *gff, CategoryMap *cm, int cycle_size, int store_order, 
		   char *reverse_groups, int gap_strip_mode, int keep_overlapping,
		   List* cats_to_do);

/** Read An Alignment from a MAF file. 
   @pre The MAF file must be sorted with respect to the reference sequence.  
   @param[in] F MAF file
   @param[in] REFSEQ (Optional) reference sequence.  If
                                        non-NULL, the indicated file will
                                        be used to define bases in regions
                                        of no alignment (not represented in
                                        the MAF).  File format is expected
                                        to be FASTA.  Ignored if
                                        store_order == FALSE.  If NULL and
                                        store_order == TRUE, then bases in
                                        reference seq not present in MAF
                                        are represented as Ns 
   @param[in] tuple_size tuple size for sufficient statistics 
   @param[in] alphabet (Optional) alphabet for alignment; if NULL, DEFAULT_ALPABET is assumed 
   @param[in] gff (Optional) GFF_Set.  If non-NULL,
                                        category-specific counts will be
                                        collected (cm must be non-NULL
                                        also).  The gff is assumed to use
                                        the indexing system of the
                                        reference sequence (sequence 1).
                                        Currently, a non-NULL gff implies
                                        gap_strip_mode == 1 (projection
                                        onto reference sequence).
   @param[in] cm Used for category-specific counts, ignored otherwise
   @param[in] cycle_size Label site categories
    					12...<cycle_size>...12...<cycle_size>
                                        instead of using gff and cm.
                                        Useful when stats are to be
                                        collected for non-overlapping
                                        tuples.  Use -1 to ignore.
   @param[in] store_order Whether to store order in which
                                        tuples occur.  Storing order
                                        requires more memory and larger
                                        files, and is not necessary in many
                                        cases.
   @param[in] reverse_groups  Tag defining groups in gff;
                      indicates groups on negative strand
                      should be reverse complemented.
                      Ignored if NULL.  Useful when
                      collecting counts for
                      strand-specific categories.  Can't
                      be used if store_order == TRUE
   @param[in] gap_strip_mode Gap stripping mode.  Currently,
                      if store_order == 1, may only have
                      value NO_STRIP or 1 (indicating
                      projection onto sequence 1, the
                      reference sequence).  This is
                      simply to avoid some complexity in
                      coordinate mapping. 
   @param[in] keep_overlapping If TRUE, keep overlapping blocks,
                      otherwise keep only first instance.
                      Must be FALSE if store_order ==
                      TRUE or gff != NULL or 
                      cycle_size != -1 
   @note NOTE: for now, if a GFF is defined, then all blocks are projected
     onto the reference seq (gff != NULL -> gap_strip_mode == 1).  This
     simplifies things somewhat, and it's rare that you want
     category-specific counts without projecting (because gaps in the
     reference sequence make it difficult to assign sites to categories
     rationally). 
   @note The alignment won't be constructed explicitly; instead, a sufficient-statistics representation will be extracted directly from the MAF.  
   @warning Any blocks falling out of order, or which are redundant with previous blocks, will be discarded.
 */
MSA *maf_read(FILE *F, FILE *REFSEQF, int tuple_size, char *alphabet,
	      GFF_Set *gff, CategoryMap *cm, int cycle_size, int store_order, 
	      char *reverse_groups, int gap_strip_mode, int keep_overlapping);

/** Read a subset Alignment from a MAF file; subset selected by feature name; not necessarily read in order wrt reference sequence.
   @pre The MAF file must be sorted with respect to the reference sequence.  
   @param[in] F MAF file
   @param[in] REFSEQ (Optional) reference sequence.  If
                                        non-NULL, the indicated file will
                                        be used to define bases in regions
                                        of no alignment (not represented in
                                        the MAF).  File format is expected
                                        to be FASTA.  Ignored if
                                        store_order == FALSE.  If NULL and
                                        store_order == TRUE, then bases in
                                        reference seq not present in MAF
                                        are represented as Ns 
   @param[in] tuple_size tuple size for sufficient statistics 
   @param[in] alphabet (Optional) alphabet for alignment; if NULL, DEFAULT_ALPABET is assumed 
   @param[in] gff (Optional) GFF_Set.  If non-NULL,
                                        category-specific counts will be
                                        collected (cm must be non-NULL
                                        also).  The gff is assumed to use
                                        the indexing system of the
                                        reference sequence (sequence 1).
                                        Currently, a non-NULL gff implies
                                        gap_strip_mode == 1 (projection
                                        onto reference sequence).
   @param[in] cm Used for category-specific counts, ignored otherwise
   @param[in] cycle_size Label site categories
    					12...<cycle_size>...12...<cycle_size>
                                        instead of using gff and cm.
                                        Useful when stats are to be
                                        collected for non-overlapping
                                        tuples.  Use -1 to ignore.
   @param[in] store_order Whether to store order in which
                                        tuples occur.  Storing order
                                        requires more memory and larger
                                        files, and is not necessary in many
                                        cases.
   @param[in] reverse_groups  Tag defining groups in gff;
                      indicates groups on negative strand
                      should be reverse complemented.
                      Ignored if NULL.  Useful when
                      collecting counts for
                      strand-specific categories.  Can't
                      be used if store_order == TRUE
   @param[in] gap_strip_mode Gap stripping mode.  Currently,
                      if store_order == 1, may only have
                      value NO_STRIP or 1 (indicating
                      projection onto sequence 1, the
                      reference sequence).  This is
                      simply to avoid some complexity in
                      coordinate mapping. 
   @param[in] keep_overlapping If TRUE, keep overlapping blocks,
                      otherwise keep only first instance.
                      Must be FALSE if store_order ==
                      TRUE or gff != NULL or 
                      cycle_size != -1 
   @param[in] cats_to_do If non-NULL, only loads elements of the
                      alignment with features in this list.  
                      Requires gff != NULL and cm != NULL
   @note NOTE: for now, if a GFF is defined, then all blocks are projected
     onto the reference seq (gff != NULL -> gap_strip_mode == 1).  This
     simplifies things somewhat, and it's rare that you want
     category-specific counts without projecting (because gaps in the
     reference sequence make it difficult to assign sites to categories
     rationally). 
   @note The alignment won't be constructed explicitly; instead, a sufficient-statistics representation will be extracted directly from the MAF.  
   @warning Any blocks falling out of order, or which are redundant with previous blocks, will be discarded.
 */
MSA *maf_read_unsorted(FILE *f, FILE *REFSEQF, int tuple_size, char *alphabet,
		  GFF_Set *gff, CategoryMap *cm, int cycle_size, int store_order,
		  char *reverse_groups, int gap_strip_mode, int keep_overlapping,
		  List *cats_to_do);

/** Read An Alignment from a MAF file; not necessarily read in order wrt reference sequence.
   @pre The MAF file must be sorted with respect to the reference sequence.  
   @param[in] F MAF file
   @param[in] REFSEQ (Optional) reference sequence.  If
                                        non-NULL, the indicated file will
                                        be used to define bases in regions
                                        of no alignment (not represented in
                                        the MAF).  File format is expected
                                        to be FASTA.  Ignored if
                                        store_order == FALSE.  If NULL and
                                        store_order == TRUE, then bases in
                                        reference seq not present in MAF
                                        are represented as Ns 
   @param[in] tuple_size tuple size for sufficient statistics 
   @param[in] alphabet (Optional) alphabet for alignment; if NULL, DEFAULT_ALPABET is assumed 
   @param[in] gff (Optional) GFF_Set.  If non-NULL,
                                        category-specific counts will be
                                        collected (cm must be non-NULL
                                        also).  The gff is assumed to use
                                        the indexing system of the
                                        reference sequence (sequence 1).
                                        Currently, a non-NULL gff implies
                                        gap_strip_mode == 1 (projection
                                        onto reference sequence).
   @param[in] cm Used for category-specific counts, ignored otherwise
   @param[in] cycle_size Label site categories
    					12...<cycle_size>...12...<cycle_size>
                                        instead of using gff and cm.
                                        Useful when stats are to be
                                        collected for non-overlapping
                                        tuples.  Use -1 to ignore.
   @param[in] store_order Whether to store order in which
                                        tuples occur.  Storing order
                                        requires more memory and larger
                                        files, and is not necessary in many
                                        cases.
   @param[in] reverse_groups  Tag defining groups in gff;
                      indicates groups on negative strand
                      should be reverse complemented.
                      Ignored if NULL.  Useful when
                      collecting counts for
                      strand-specific categories.  Can't
                      be used if store_order == TRUE
   @param[in] gap_strip_mode Gap stripping mode.  Currently,
                      if store_order == 1, may only have
                      value NO_STRIP or 1 (indicating
                      projection onto sequence 1, the
                      reference sequence).  This is
                      simply to avoid some complexity in
                      coordinate mapping. 
   @param[in] keep_overlapping If TRUE, keep overlapping blocks,
                      otherwise keep only first instance.
                      Must be FALSE if store_order ==
                      TRUE or gff != NULL or 
                      cycle_size != -1 
   @note NOTE: for now, if a GFF is defined, then all blocks are projected
     onto the reference seq (gff != NULL -> gap_strip_mode == 1).  This
     simplifies things somewhat, and it's rare that you want
     category-specific counts without projecting (because gaps in the
     reference sequence make it difficult to assign sites to categories
     rationally). 
   @note The alignment won't be constructed explicitly; instead, a sufficient-statistics representation will be extracted directly from the MAF.  
   @warning Any blocks falling out of order, or which are redundant with previous blocks, will be discarded.
 */
MSA *maf_read_old(FILE *F, FILE *REFSEQF, int tuple_size, char *alphabet,
              GFF_Set *gff, CategoryMap *cm, int cycle_size, int store_order, 
              char *reverse_groups, int gap_strip_mode, int keep_overlapping);

/** Read a block from an MAF file.
   @param[in] F File containing MAF data to read
   @param[out] mini_msa MAF Block is stored here
   @param[out] name_hash Hash table mapping sequence names to sequence indices (prefix of name wrt '.' character)
   @param[out] start_idx stating coord of reference sequence
   @param[out] length Length of reference sequence
   @param[in] do_toupper Make sequences read in all upper case 
   @note Allocates memory for sequences if they are NULL (as with first block)
   @note Reads to next "a" line or EOF
   @result 0 if successful, EOF if no more blocks available
*/
int maf_read_block(FILE *F, MSA *mini_msa, Hashtable *name_hash,
                   int *start_idx, int *length, int do_toupper);

/** Add sequences from a MAF file to an existing MAF block
   @param[in] F File containing MAF data to read
   @param[in,out] mini_msa MAF Block is stored here
   @param[out] name_hash Hash table mapping sequence names to sequence indices (prefix of name wrt '.' character)
   @param[out] start_idx stating coord of reference sequence
   @param[out] length Length of reference sequence
   @param[in] do_toupper Make sequences read in all upper case 
   @param[in] skip_new_species If mini_msa already contains a given species, don't add it from MAF file
   @note Allocates memory for sequences if they are NULL (as with first block)
   @note Reads to next "a" line or EOF
   @result 0 if successful, EOF if no more blocks available
*/
int maf_read_block_addseq(FILE *f, MSA *mini_msa, Hashtable *name_hash,
			  int *start_idx, int *length, int do_toupper,
			  int skip_new_species);

/** Get partial list of sequence names (roots of names only) and length of refseq.
   @param[in] F File containing MAF Block contents
   @param[out] names List of sequence names
   @param[out] name_hash Hash Table mapping names to sequence index
   @param[out] nseqs Number of sequences in MAF Block
   @param[out] refseqlen Length of Reference Sequence
   @param[in] add_seqs Whether to add sequences to names and name_hash
   @warning May re-order names to put refseq first even if add_seqs == 0
 */
void maf_quick_peek(FILE *f, char ***names, Hashtable *name_hash,
		    int *nseqs, int *refseqlen, int add_seqs);

/** Get complete list of sequence names (roots of names only) and length of refseq.
   @param[in] F File containing MAF Block contents
   @param[out] names List of sequence names
   @param[out] name_hash Hash Table mapping names to sequence index (Hash Table must be preallocated)
   @param[out] nseqs Number of sequences in MAF Block
   @param[out] map (Optional) Coordinate map for the reference sequence (map must be pre-allocated)
   @param[out] redundant_blocks List containing indices of blocks that overlap previous blocks in 'redundant_blocks' list
   @param[in] keep_overlapping Whether to keep overlapping blocks. If 0 overlapping blocks discarded, otherwise retained
   @param[out] refseqlen Length of Reference Sequence
   @warning May re-order names to put refseq first even if add_seqs == 0
*/
void maf_peek(FILE *F, char ***names, Hashtable *name_hash, 
              int *nseqs, msa_coord_map *map, List *redundant_blocks,
              int keep_overlapping, int *refseqlen);
/** \} */

/** Extracts features from gff relevant to a specified interval.
   @pre sub_gff is allocated, with an empty feature list
   @pre gff is sorted
   @param[out] sub_gff Subset of features in gff object
   @param[in] gff Feature Set take subset from
   @param[in] start_idx Starting of interval for which to extract features
   @param[in] end_idx Ending of interval for which to extract features  
   @param[in] gff_idx Feature number to start extracting from in GFF
   @param[in] cm Category Map used for category ranges of features
   @param[in] reverse_compl Whether to reverse complement '-' strands
   @param[in] tuple_size Size of tuples, used in reverse complementing
   @note Shifts all coords such that start_idx is position 1 
*/
void maf_block_sub_gff(GFF_Set *sub_gff, GFF_Set *gff, int start_idx, 
                       int end_idx, int *gff_idx, CategoryMap *cm,
                       int reverse_compl, int tuple_size);

#endif