File: srf.h

package info (click to toggle)
staden-io-lib 1.14.11-6
  • links: PTS, VCS
  • area: main
  • in suites: bullseye, buster, sid
  • size: 14,116 kB
  • sloc: ansic: 50,895; makefile: 341; sh: 219; perl: 198
file content (231 lines) | stat: -rw-r--r-- 7,141 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
/*
 * Copyright (c) 2007-2009 Genome Research Ltd.
 * Author(s): James Bonfield
 * 
 * Redistribution and use in source and binary forms, with or without 
 * modification, are permitted provided that the following conditions are met:
 * 
 *    1. Redistributions of source code must retain the above copyright notice,
 *       this list of conditions and the following disclaimer.
 * 
 *    2. Redistributions in binary form must reproduce the above
 *       copyright notice, this list of conditions and the following
 *       disclaimer in the documentation and/or other materials provided
 *       with the distribution.
 * 
 *    3. Neither the names Genome Research Ltd and Wellcome Trust Sanger
 *    Institute nor the names of its contributors may be used to endorse
 *    or promote products derived from this software without specific
 *    prior written permission.
 * 
 * THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS
 * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH
 * LTD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#ifndef _SRF_H_
#define _SRF_H_

#include "io_lib/hash_table.h"
#include "io_lib/ztr.h"
#include "io_lib/mFILE.h"

/*
   define PATH_MAX in case it is not available
   (for example for hurd_i386)
*/
#ifndef PATH_MAX
#define PATH_MAX 4096
#endif

#define SRF_MAGIC		"SSRF"
#define SRF_VERSION             "1.3"

#define SRFB_CONTAINER 		'S'
#define SRFB_XML		'X'
#define SRFB_TRACE_HEADER	'H'
#define SRFB_TRACE_BODY		'R'
#define SRFB_INDEX		'I'

/* Lack of index => 8 zero bytes at end of file to indicate zero length */
#define SRFB_NULL_INDEX		'\0'

/*--- Public structures */

/* Container header - several per file */
typedef struct {
    int block_type;
    char version[256];
    char container_type;
    char base_caller[256];
    char base_caller_version[256];
} srf_cont_hdr_t;

/* Trace header - several per container */
typedef struct {
    int block_type; 
    char read_prefix_type;
    char id_prefix[256];
    uint32_t trace_hdr_size;
    unsigned char *trace_hdr;
} srf_trace_hdr_t;

/* Trace body - several per trace header */
typedef struct {
    int block_type;
    int read_id_length;
    char read_id[256];
    unsigned char flags;
    uint32_t trace_size;
    unsigned char *trace;
} srf_trace_body_t;

/* XML - NCBI TraceInfo data block */
typedef struct {
    uint32_t xml_len;
    char *xml;
} srf_xml_t;

#define SRF_READ_FLAG_BAD_MASK       (1<<0)
#define SRF_READ_FLAG_WITHDRAWN_MASK (1<<1)
#define SRF_READ_FLAG_USER_MASK      (7<<5)

/* Indexing */
typedef struct {
    char     magic[4];
    char     version[4];
    uint64_t size;
    uint32_t n_container;
    uint32_t n_data_block_hdr;
    uint64_t n_buckets;
    int8_t   index_type;
    int8_t   dbh_pos_stored_sep;
    char     dbh_file[256];
    char     cont_file[256];
    int      index_hdr_sz; /* size of the above data on disk */
} srf_index_hdr_t;

/* In-memory index itself */
#define SRF_INDEX_NAME_BLOCK_SIZE 10000000

typedef struct {
  size_t  used;
  size_t  space;
  char   *names;
} srf_name_block_t;

typedef struct {
    char ch_file[PATH_MAX+1];
    char th_file[PATH_MAX+1];
    Array ch_pos;
    Array th_pos;
    Array name_blocks;
    int dbh_pos_stored_sep;
    HashTable *db_hash;
} srf_index_t;

/* Master SRF object */
typedef struct {
    FILE *fp;

    /* Cached copies of each of the most recent chunk types loaded */
    srf_cont_hdr_t    ch;
    srf_trace_hdr_t   th;
    srf_trace_body_t  tb;
    srf_xml_t         xml;
    srf_index_hdr_t   hdr;

    /* Private: cached data for use by srf_next_ztr */
    ztr_t *ztr;
    mFILE *mf;
    long mf_pos, mf_end;
} srf_t;

#define SRF_INDEX_MAGIC    "Ihsh"
#define SRF_INDEX_VERSION  "1.01"


/*--- Initialisation */
srf_t *srf_create(FILE *fp);
srf_t *srf_open(char *fn, char *mode);
void srf_destroy(srf_t *srf, int auto_close);

/*--- Base type I/O methods */

int srf_write_pstring(srf_t *srf, char *str);
int srf_write_pstringb(srf_t *srf, char *str, int length);
int srf_read_pstring(srf_t *srf, char *str);

int srf_read_uint32(srf_t *srf, uint32_t *val);
int srf_write_uint32(srf_t *srf, uint32_t val);

int srf_read_uint64(srf_t *srf, uint64_t *val);
int srf_write_uint64(srf_t *srf, uint64_t val);


/*--- Mid level I/O - srf block */
srf_cont_hdr_t *srf_construct_cont_hdr(srf_cont_hdr_t *ch,
				       char *bc,
				       char *bc_version);
void srf_destroy_cont_hdr(srf_cont_hdr_t *ch);
int srf_read_cont_hdr(srf_t *srf, srf_cont_hdr_t *ch);
int srf_write_cont_hdr(srf_t *srf, srf_cont_hdr_t *ch);

int srf_read_xml(srf_t *srf, srf_xml_t *xml);
int srf_write_xml(srf_t *srf, srf_xml_t *xml);

srf_trace_hdr_t *srf_construct_trace_hdr(srf_trace_hdr_t *th,
					 char *prefix,
					 unsigned char *header,
					 uint32_t header_sz);
void srf_destroy_trace_hdr(srf_trace_hdr_t *th);
int srf_read_trace_hdr(srf_t *srf, srf_trace_hdr_t *th);
int srf_write_trace_hdr(srf_t *srf, srf_trace_hdr_t *th);

srf_trace_body_t *srf_construct_trace_body(srf_trace_body_t *th,
					   char *suffix,
					   int suffix_len,
					   unsigned char *body,
					   uint32_t body_size,
					   unsigned char flags);
void srf_destroy_trace_body(srf_trace_body_t *th);
int srf_write_trace_body(srf_t *srf, srf_trace_body_t *th);
int srf_read_trace_body(srf_t *srf, srf_trace_body_t *th, int no_trace);

int srf_read_index_hdr(srf_t *srf, srf_index_hdr_t *hdr, int no_seek);
int srf_write_index_hdr(srf_t *srf, srf_index_hdr_t *hdr);
srf_index_t *srf_index_create(char *ch_file, char *th_file, int dbh_sep);
void srf_index_destroy(srf_index_t *idx);
void srf_index_stats(srf_index_t *idx, FILE *fp);
int srf_index_add_cont_hdr(srf_index_t *idx, uint64_t pos);
int srf_index_add_trace_hdr(srf_index_t *idx, uint64_t pos);
int srf_index_add_trace_body(srf_index_t *idx, char *name, uint64_t pos);
int srf_index_write(srf_t *srf, srf_index_t *idx);

/*--- Higher level I/O functions */
mFILE *srf_next_trace(srf_t *srf, char *name);
ztr_t *srf_next_ztr_flags(srf_t *srf, char *name, int filter_mask, int *flags);
ztr_t *srf_next_ztr(srf_t *srf, char *name, int filter_mask);

ztr_t *partial_decode_ztr(srf_t *srf, mFILE *mf, ztr_t *z);
ztr_t *ztr_dup(ztr_t *src);

int srf_next_block_type(srf_t *srf); /* peek ahead */
int srf_next_block_details(srf_t *srf, uint64_t *pos, char *name);

int srf_find_trace(srf_t *srf, char *trace,
		   uint64_t *cpos, uint64_t *hpos, uint64_t *dpos);

int construct_trace_name(char *fmt,
			 unsigned char *suffix, int suffix_len,
			 char *name, int name_len);

#endif /* _SRF_H_ */