File: FASTA.h

package info (click to toggle)
snap-aligner 1.0.0%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 4,988 kB
  • sloc: cpp: 36,500; ansic: 5,239; python: 227; makefile: 85; sh: 28
file content (64 lines) | stat: -rw-r--r-- 1,424 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
/*++

Module Name:

    FASTA.h

Abstract:

    FASTA reader

Authors:

    Bill Bolosky, August, 2011

Environment:

    User mode service.

Revision History:

    Adapted from Matei Zaharia's Scala implementation.

--*/

#pragma once

#include "Genome.h"

//
// There are several ways of specifying ALT contigs.  There is an opt-in list of ALTs, an opt-out list of regular chromosomes (these must be mutually
// exclusive), and a size cutoff below which is contig is an ALT.  The opt-in and opt-out lists supersede the size cutoff.
//

	const Genome *
ReadFASTAGenome(
	const char		*fileName,
	const char		*pieceNameTerminatorCharacters,
	bool			 spaceIsAPieceNameTerminator,
	unsigned		 chromosomePaddingSize,
	const char* const*opt_in_alt_names,
	int				 opt_in_alt_names_count,
	const char* const*opt_out_alt_names,
	int				 opt_out_alt_names_count,
	GenomeDistance	 maxSizeForAutomaticALT,
	bool             autoAlt);

//
// The FASTA appending functions return whether the write was successful.
// 
// WARNING: They write very long lines.
// According to Wikipedia, a FASTA file's line limit should be 120, or better, 79.
// Unix workaround if the piece names aren't too long: 'fold -w 79'.
//

    bool
AppendFASTAGenome(const Genome *, FILE *fasta);

//
// This is arbitrary; is there some existing convention?
//
inline const char *diploidFASTASexPrefix(bool male)
{
    return male ? "PATERNAL|" : "MATERNAL|";
}