File: ovStoreFile.H

package info (click to toggle)
canu 1.7.1+dfsg-1~bpo9+1
  • links: PTS, VCS
  • area: main
  • in suites: stretch-backports
  • size: 7,680 kB
  • sloc: cpp: 66,708; perl: 13,682; ansic: 4,020; makefile: 627; sh: 472; python: 39
file content (120 lines) | stat: -rw-r--r-- 3,940 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120

/******************************************************************************
 *
 *  This file is part of canu, a software program that assembles whole-genome
 *  sequencing reads into contigs.
 *
 *  This software is based on:
 *    'Celera Assembler' (http://wgs-assembler.sourceforge.net)
 *    the 'kmer package' (http://kmer.sourceforge.net)
 *  both originally distributed by Applera Corporation under the GNU General
 *  Public License, version 2.
 *
 *  Canu branched from Celera Assembler at its revision 4587.
 *  Canu branched from the kmer project at its revision 1994.
 *
 *  This file is derived from:
 *
 *    src/stores/ovStore.H
 *
 *  Modifications by:
 *
 *    Brian P. Walenz beginning on 2016-OCT-24
 *      are a 'United States Government Work', and
 *      are released in the public domain
 *
 *  File 'README.licenses' in the root directory of this distribution contains
 *  full conditions and disclaimers for each license.
 */

#ifndef AS_OVSTOREFILE_H
#define AS_OVSTOREFILE_H

#include "AS_global.H"
#include "gkStore.H"

#include "ovOverlap.H"


class ovStoreHistogram;


//  The default, no flags, is to open for normal overlaps, read only.  Normal overlaps mean they
//  have only the B id, i.e., they are in a fully built store.
//
//  Output of overlapper (input to store building) should be ovFileFullWrite.  The specialized
//  ovFileFullWriteNoCounts is used internally by store creation.
//
enum ovFileType {
  ovFileNormal              = 0,  //  Reading of b_id overlaps (aka store files)
  ovFileNormalWrite         = 1,  //  Writing of b_id overlaps
  ovFileFull                = 2,  //  Reading of a_id+b_id overlaps (aka dump files)
  ovFileFullWrite           = 3,  //  Writing of a_id+b_id overlaps
  ovFileFullWriteNoCounts   = 4   //  Writing of a_id+b_id overlaps, omitting the counts of olaps per read
};


class ovFile {
public:
  ovFile(gkStore     *gkpName,
         const char  *name,
         ovFileType   type = ovFileNormal,
         uint32       bufferSize = 1 * 1024 * 1024);
  ~ovFile();

  void    writeBuffer(bool force=false);
  void    writeOverlap(ovOverlap *overlap);
  void    writeOverlaps(ovOverlap *overlaps, uint64 overlapLen);

  void    readBuffer(void);
  bool    readOverlap(ovOverlap *overlap);
  uint64  readOverlaps(ovOverlap *overlaps, uint64 overlapMax);

  void    seekOverlap(off_t overlap);

  //  The size of an overlap record is 1 or 2 IDs + the size of a word times the number of words.
  uint64  recordSize(void) {
    return(sizeof(uint32) * ((_isNormal) ? 1 : 2) + sizeof(ovOverlapWORD) * ovOverlapNWORDS);
  };

  //  For use in conversion, force snappy compression.  By default, it is ENABLED, and we cannot
  //  read older ovb files.
#ifdef SNAPPY
  void    enableSnappy(bool enabled) {
    _useSnappy = enabled;
  };
#endif

  //  Move the stats in our histogram to the one supplied, and remove our data
  void    transferHistogram(ovStoreHistogram *copy);

private:
  gkStore                *_gkp;
  ovStoreHistogram       *_histogram;

  uint32                  _bufferLen;    //  length of valid data in the buffer
  uint32                  _bufferPos;    //  position the read is at in the buffer
  uint32                  _bufferMax;    //  allocated size of the buffer
  uint32                 *_buffer;

#ifdef SNAPPY
  size_t                  _snappyLen;
  char                   *_snappyBuffer;
#endif

  bool                    _isOutput;     //  if true, we can writeOverlap()
  bool                    _isSeekable;   //  if true, we can seekOverlap()
  bool                    _isNormal;     //  if true, 3 words per overlap, else 4
#ifdef SNAPPY
  bool                    _useSnappy;    //  if true, compress with snappy before writing
#endif

  compressedFileReader   *_reader;
  compressedFileWriter   *_writer;

  char                    _prefix[FILENAME_MAX];
  FILE                   *_file;
};


#endif  //  AS_OVSTOREFILE_H