File: slow5.h

package info (click to toggle)
libslow5lib 0.7.0%2Bdfsg-3
  • links: PTS, VCS
  • area: main
  • in suites: sid, trixie
  • size: 25,092 kB
  • sloc: ansic: 11,825; python: 1,179; sh: 547; makefile: 91; cpp: 40
file content (809 lines) | stat: -rwxr-xr-x 31,763 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
/**
 * @file slow5.h
 * @brief SLOW5 API
 * @author Sasha Jenner (jenner.sasha@gmail.com), Hasindu Gamaarachchi (hasindu@garvan.org.au), Hiruna Samarakoon
 * @date 27/02/2021
 */

/* IMPORTANT: The comments in this header file are NOT the API documentation
The API documentation is available at https://hasindu2008.github.io/slow5tools/
The comments here are possibly not upto date
*/

/*
MIT License

Copyright (c) 2020 Hasindu Gamaarachchi
Copyright (c) 2020 Sasha Jenner
Copyright (c) 2020 Hiruna Samarakoon

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
*/

// Header with slow5 file definitions
// TODO structure pack to min size
// TODO fix and add function descriptions
// TODO remove unnessary header inclusions

#ifndef SLOW5_H
#define SLOW5_H

#include <stdio.h>
#include <stdint.h>
#include <stdbool.h>
#include <math.h>
#include "klib/khash.h"
#include "klib/kvec.h"
#include "slow5_press.h"
#include "slow5_defs.h"
#include "slow5_error.h"

#ifdef __cplusplus
extern "C" {
#endif

/**************************************************************************************************
 ***  struct definitions **************************************************************************
 **************************************************************************************************/

/**
* @enum slow5_fmt
* File formats we are dealing with
*/
enum slow5_fmt {
    SLOW5_FORMAT_UNKNOWN, ///< the format is unknown, usually the case before parsing the file extension
    SLOW5_FORMAT_ASCII,   ///< the format is ASCII SLOW5
    SLOW5_FORMAT_BINARY   ///< the format is binary SLOW5 (that is, BLOW5)
};

/**
* @struct slow5_fmt_meta
* SLOW5 file meta data
*/
struct slow5_fmt_meta {
    const char *name;       ///< path of the SLOW5 file
    enum slow5_fmt format;  ///< the format of the SLOW5 file
};

static const struct slow5_fmt_meta SLOW5_FORMAT_META[] = {
    { SLOW5_ASCII_NAME,   SLOW5_FORMAT_ASCII    },
    { SLOW5_BINARY_NAME,  SLOW5_FORMAT_BINARY   }
};

/*** SLOW5 Header *********************************************************************************/

/**
* @struct slow5_version
* @brief SLOW5 file version
*/
struct slow5_version {
    uint8_t major;  ///< major version
    uint8_t minor;  ///< minor version
    uint8_t patch;  ///< patch version
};

static const struct slow5_version SLOW5_VERSION_STRUCT = SLOW5_VERSION_ARRAY;

// SLOW5 auxiliary types
// DO NOT rearrange! See subtracting SLOW5_INT8_T_ARRAY in SLOW5_TO_PRIM_TYPE
// if adding more in future, primitive types must be added after SLOW5_CHAR and arrays after SLOW5_STRING
// both the primitive type and the array type must be simultaneously added
enum slow5_aux_type {
    SLOW5_INT8_T = 0,
    SLOW5_INT16_T,
    SLOW5_INT32_T,
    SLOW5_INT64_T,
    SLOW5_UINT8_T,
    SLOW5_UINT16_T,
    SLOW5_UINT32_T,
    SLOW5_UINT64_T,
    SLOW5_FLOAT,
    SLOW5_DOUBLE,
    SLOW5_CHAR,
    SLOW5_ENUM,

    SLOW5_INT8_T_ARRAY,
    SLOW5_INT16_T_ARRAY,
    SLOW5_INT32_T_ARRAY,
    SLOW5_INT64_T_ARRAY,
    SLOW5_UINT8_T_ARRAY,
    SLOW5_UINT16_T_ARRAY,
    SLOW5_UINT32_T_ARRAY,
    SLOW5_UINT64_T_ARRAY,
    SLOW5_FLOAT_ARRAY,
    SLOW5_DOUBLE_ARRAY,
    SLOW5_STRING,
    SLOW5_ENUM_ARRAY,
};

#define SLOW5_IS_PTR(type)              ((type) >= SLOW5_INT8_T_ARRAY)
#define SLOW5_TO_PRIM_TYPE(ptr_type)    ((enum slow5_aux_type) ((ptr_type) - SLOW5_INT8_T_ARRAY))

/* NULL (missing value) representation */
#define SLOW5_INT8_T_NULL     INT8_MAX
#define SLOW5_INT16_T_NULL    INT16_MAX
#define SLOW5_INT32_T_NULL    INT32_MAX
#define SLOW5_INT64_T_NULL    INT64_MAX
#define SLOW5_UINT8_T_NULL    UINT8_MAX
#define SLOW5_UINT16_T_NULL   UINT16_MAX
#define SLOW5_UINT32_T_NULL   UINT32_MAX
#define SLOW5_UINT64_T_NULL   UINT64_MAX
#define SLOW5_FLOAT_NULL      nanf("")
#define SLOW5_DOUBLE_NULL     nan("")
#define SLOW5_CHAR_NULL       0
#define SLOW5_ENUM_NULL       SLOW5_UINT8_T_NULL


// Type with corresponding size
struct slow5_aux_type_meta {
    enum slow5_aux_type type;
    uint8_t size;
    const char *type_str;
};

#define SLOW5_TO_STR(x) #x
#define SLOW5_AUX_TYPE_META_PRIM(aux_type, raw_type) { aux_type, sizeof (raw_type), SLOW5_TO_STR(raw_type) }
#define SLOW5_AUX_TYPE_META_ARRAY(aux_type, raw_type) { aux_type, sizeof (raw_type), SLOW5_TO_STR(raw_type) "*" }
//any modifications to slow5_aux_type should follow by appropriate modifications to this.
//the order should be identical to that in slow5_aux_type
static const struct slow5_aux_type_meta SLOW5_AUX_TYPE_META[] = {
    // Needs to be the same order as the enum definition
    SLOW5_AUX_TYPE_META_PRIM(   SLOW5_INT8_T,           int8_t      ),
    SLOW5_AUX_TYPE_META_PRIM(   SLOW5_INT16_T,          int16_t     ),
    SLOW5_AUX_TYPE_META_PRIM(   SLOW5_INT32_T,          int32_t     ),
    SLOW5_AUX_TYPE_META_PRIM(   SLOW5_INT64_T,          int64_t     ),
    SLOW5_AUX_TYPE_META_PRIM(   SLOW5_UINT8_T,          uint8_t     ),
    SLOW5_AUX_TYPE_META_PRIM(   SLOW5_UINT16_T,         uint16_t    ),
    SLOW5_AUX_TYPE_META_PRIM(   SLOW5_UINT32_T,         uint32_t    ),
    SLOW5_AUX_TYPE_META_PRIM(   SLOW5_UINT64_T,         uint64_t    ),
    SLOW5_AUX_TYPE_META_PRIM(   SLOW5_FLOAT,            float       ),
    SLOW5_AUX_TYPE_META_PRIM(   SLOW5_DOUBLE,           double      ),
    SLOW5_AUX_TYPE_META_PRIM(   SLOW5_CHAR,             char        ),
    { SLOW5_ENUM,       sizeof (uint8_t),   "enum"  },

    SLOW5_AUX_TYPE_META_ARRAY(  SLOW5_INT8_T_ARRAY,     int8_t      ),
    SLOW5_AUX_TYPE_META_ARRAY(  SLOW5_INT16_T_ARRAY,    int16_t     ),
    SLOW5_AUX_TYPE_META_ARRAY(  SLOW5_INT32_T_ARRAY,    int32_t     ),
    SLOW5_AUX_TYPE_META_ARRAY(  SLOW5_INT64_T_ARRAY,    int64_t     ),
    SLOW5_AUX_TYPE_META_ARRAY(  SLOW5_UINT8_T_ARRAY,    uint8_t     ),
    SLOW5_AUX_TYPE_META_ARRAY(  SLOW5_UINT16_T_ARRAY,   uint16_t    ),
    SLOW5_AUX_TYPE_META_ARRAY(  SLOW5_UINT32_T_ARRAY,   uint32_t    ),
    SLOW5_AUX_TYPE_META_ARRAY(  SLOW5_UINT64_T_ARRAY,   uint64_t    ),
    SLOW5_AUX_TYPE_META_ARRAY(  SLOW5_FLOAT_ARRAY,      float       ),
    SLOW5_AUX_TYPE_META_ARRAY(  SLOW5_DOUBLE_ARRAY,     double      ),
    SLOW5_AUX_TYPE_META_ARRAY(  SLOW5_STRING,           char        ),
    { SLOW5_ENUM_ARRAY, sizeof (uint8_t),   "enum*" },
};

// Auxiliary attribute to position map: attribute string -> index position
KHASH_MAP_INIT_STR(slow5_s2ui32, uint32_t)

/**
* @struct slow5_aux_meta
* SLOW5 auxiliary field metadata (information available in the SLOW5 header)
*/
typedef struct slow5_aux_meta {
    uint32_t num;                       ///< number of auxiliary fields
    size_t cap;                         ///< capacity of the arrays: attrs, types and sizes

    khash_t(slow5_s2ui32) *attr_to_pos; ///< hash table that maps field name string -> index position in the following arrays.
    char **attrs;                       ///< field names
    enum slow5_aux_type *types;         ///< field datatypes
    uint8_t *sizes;                     ///< field datatype sizes, for arrays this stores the size (in bytes) of the corresponding primitive type (TODO: this is probably redundant)

    /* the following are NULL if no auxiliary datatype is an enum type */
    char ***enum_labels;                ///< array of enum labels stored as strings
    uint8_t *enum_num_labels;           ///< array of number of enum labels
} slow5_aux_meta_t;

// Header data map: attribute string -> data string
KHASH_MAP_INIT_STR(slow5_s2s, char *)
// Header data attributes set
KHASH_SET_INIT_STR(slow5_s)

/**
* @struct slow5_hdr_data
* SLOW5 header data (constant attributes in FAST5 files)
*/
struct slow5_hdr_data {
    uint32_t num_attrs;                   ///< Number of data attributes
    khash_t(slow5_s) *attrs;              ///< Set of the data attribute keys for internal access(incase of multiple read groups, the union of keys from all read groups)
    kvec_t(khash_t(slow5_s2s) *) maps;    ///< Dynamic vector of hash maps (attribute key string -> attribute value string). Length of the vector is requal to  num_read_groups. Index in the vector corresponds to the read group number. The keys that are not relevant to a particular read group are not stored in this hash map.
};
typedef struct slow5_hdr_data slow5_hdr_data_t;

/**
* @struct slow5_hdr
* SLOW5 header
*/
struct slow5_hdr {
	struct slow5_version version;       ///< SLOW5 file version
    uint32_t num_read_groups;           ///< Number of read groups
    slow5_hdr_data_t data;              ///< Header data (constant fields in FAST5 files). Not to be directly accessed, use provided functions instead.
    slow5_aux_meta_t *aux_meta;         ///< Auxiliary field metadata. Not to be directly accessed, use provided functions instead.
};
typedef struct slow5_hdr slow5_hdr_t;

/*** SLOW5 record *********************************************************************************/

// SLOW5 primary record columns stored as an enum to keep  the order of the columns.
// TODO: make the first one is set to zero
enum slow5_cols {
    SLOW5_COLS_FOREACH(SLOW5_GENERATE_ENUM)
    SLOW5_COLS_NUM
};

/**
* @struct slow5_rec_aux_data
* SLOW5 auxiliary field data (represents a single SLOW5 auxiliary field of a particular read record)
*/
struct slow5_rec_aux_data {
    uint64_t len;       ///< number of elements in a array (if a primitive type this is always 1)
    uint64_t bytes;     ///< total number of bytes in data (currently, the allocated size which is equal to the amount of data in it)
    enum slow5_aux_type type; ///< data type of the auxiliary attribute
    uint8_t *data;      ///< raw data
};

// Header data map: auxiliary attribute string -> auxiliary data
KHASH_MAP_INIT_STR(slow5_s2a, struct slow5_rec_aux_data)

typedef uint64_t slow5_rec_size_t; //size of the whole record (in bytes)
typedef uint16_t slow5_rid_len_t;  //length of the read ID string (does not include null character)
typedef khash_t(slow5_s2a) slow5_aux_data_t;  //Auxiliary field name string -> auxiliary field data value

/**
* @struct slow5_rec
* SLOW5 record data struct (represents a single SLOW5 record)
*/
struct slow5_rec {
    slow5_rid_len_t read_id_len;        ///< length of the read ID string (does not include null character)
    SLOW5_COLS_FOREACH(SLOW5_GENERATE_STRUCT) ///< macro magic that generates the primary fields (see below)
                                        ///< char* read_id;
                                        ///< uint32_t read_group;
                                        ///< double digitisation;
                                        ///< double offset;
                                        ///< double range;
                                        ///< double sampling_rate;
                                        ///< uint64_t len_raw_signal;
                                        ///< int16_t* raw_signal;
    slow5_aux_data_t *aux_map;               ///< Auxiliary field name string -> auxiliary field data value. Not to be directly accessed, use provided functions instead.
};
typedef struct slow5_rec slow5_rec_t;

/*** SLOW5 file handler ***************************************************************************/

/**
* @struct slow5_file
* SLOW5 file meta data
*/
struct slow5_file_meta {
    const char *pathname;       ///< file path
    int fd;                     ///< file descriptor
    uint64_t start_rec_offset;  ///< offset (in bytes) of the first SLOW5 record (skipping the SLOW5 header; used for indexing)
    char *fread_buffer;         ///< buffer for fread
    const char *mode;           ///< file mode
};
typedef struct slow5_file_meta slow5_file_meta_t;

typedef struct slow5_idx slow5_idx_t;

/**
* @struct slow5_file
* SLOW5 file structure
*/
struct slow5_file {
    FILE *fp;                   ///< file pointer
    enum slow5_fmt format;      ///< whether SLOW5, BLOW5 etc...
    slow5_press_t *compress;    ///< compression related metadata
    slow5_hdr_t *header;        ///< SLOW5 header
    slow5_idx_t *index;         ///< SLOW5 index (NULL if not applicable)
    slow5_file_meta_t meta;     ///< file metadata
};
typedef struct slow5_file slow5_file_t;


/**************************************************************************************************
 ***  High-level API ******************************************************************************
 **************************************************************************************************/

/*
IMPORTANT: The high-level is stable
existing function prototypes must NOT be changed as such changes affects the backward compatibility
newer functions can be added while keeping the existing ones intact
*/

/**
 * Open a slow5 file with a specific mode given it's pathname.
 *
 * Attempt to guess the file's slow5 format from the pathname's extension.
 * Return NULL on error.
 *
 * If successful, return a slow5 file structure with the header parsed.
 * slow5_close() should be called when finished with the structure.
 *
 *
 * @param   pathname    relative or absolute path to slow5 file
 * @param   mode        "r" for reading, "w" for writing a new file, "a" for appending to an existing file
 * @return              slow5 file structure
 */
slow5_file_t *slow5_open(const char *pathname, const char *mode);

/**
 * Close a slow5 file and free its memory.
 * If the file had been opened for writing or appending, EOF marker will be written
 *
 * @param   s5p slow5 file structure
 * @return      same as fclose()
 */
int slow5_close(slow5_file_t *s5p);

/**
 * Create the index file for slow5 file.
 * Overwrites if already exists.
 *
 * @param   s5p slow5 file structure
 * @return  0 if successful,  <-1> on error
 */
int slow5_idx_create(slow5_file_t *s5p);

/**
 * Loads the index file for slow5 file.
 * Creates the index if not found.
 *
 * Return -1 on error,
 * 0 on success.
 *
 * @param   s5p slow5 file structure
 * @return  error codes described above
 */
int slow5_idx_load(slow5_file_t *s5p);

/**
 * Unloads an index associted to a slow5_file_t using slow5_idx_load and free the memory.
 *
 * @param   s5p slow5 file structure
 */
void slow5_idx_unload(slow5_file_t *s5p);

/**
 * Get a header data attribute for a particular read_group.
 *
 * Returns NULL if the attribute name doesn't exist
 * or the read group is out of range
 * or an input parameter is NULL.
 *
 * @param   attr        attribute name
 * @param   read_group  the read group
 * @param   header      slow5 header
 * @return  the attribute's value, or NULL on error
 */
char *slow5_hdr_get(const char *attr, uint32_t read_group, const slow5_hdr_t *header);

/**
 * Get a read entry from a slow5 file corresponding to a read_id.
 *
 * Allocates memory for *read if it is NULL.
 * Otherwise, the data in *read is freed and overwritten.
 * slow5_rec_free() should always be called when finished with the structure.
 *
 * Require the slow5 index to be loaded using slow5_idx_load
 *
 * Return:
 *  >=0   the read was successfully found and stored
 *  <0   error code
 *
 * Errors:
 * SLOW5_ERR_NOTFOUND   read_id was not found in the index
 * SLOW5_ERR_ARG        read_id, read or s5p is NULL
 * SLOW5_ERR_IO         other error when reading the slow5 file
 * SLOW5_ERR_RECPARSE   parsing error
 * SLOW5_ERR_NOIDX      the index has not been loaded
 *
 * @param   read_id the read identifier
 * @param   read    address of a slow5_rec pointer
 * @param   s5p     slow5 file
 * @return  error code described above
 */
int slow5_get(const char *read_id, slow5_rec_t **read, slow5_file_t *s5p);

/**
 * Get the read entry under the current file pointer of a slow5 file.
 *
 * Allocates memory for *read if it is NULL.
 * Otherwise, the data in *read is freed and overwritten.
 * slow5_rec_free() should be called when finished with the structure.
 *
 * Return value:
 * >=0  the read was successfully found and stored
 * <0   error code
 *
 * Errors:
 * SLOW5_ERR_EOF        EOF reached
 * SLOW5_ERR_ARG        read_id, read or s5p is NULL
 * SLOW5_ERR_IO         other error when reading the slow5 file
 * SLOW5_ERR_RECPARSE   record parsing error
 *
 * @param   read    address of a slow5_rec_t pointer
 * @param   s5p     slow5 file
 * @return  error code described above
 */
int slow5_get_next(slow5_rec_t **read, slow5_file_t *s5p);


/**
 * Free a slow5 record.
 *
 * @param   read    address of a slow5_rec_t pointer
 */
void slow5_rec_free(slow5_rec_t *read);


/**
 * Get an auxiliary field in a SLOW5 record as an 8-bit signed integer.
 *
 * @param   read    address of a slow5_rec_t pointer
 * @param   field   auxiliary field name
 * @param   err     error code, 0 on success, <0 on failure and slow5_errno is set
 *                  SLOW5_ERR_ARG   if read or field is NULL
 *                  SLOW5_ERR_NOAUX if no auxiliary hash map for the record
 *                  SLOW5_ERR_NOFLD if the field was not found
 *                  SLOW5_ERR_TYPE if the desired return type does not match the field's type
 * @return  field data value or SLOW5_INT8_T_NULL on failure
 */
int8_t slow5_aux_get_int8(const slow5_rec_t *read, const char *field, int *err);
int16_t slow5_aux_get_int16(const slow5_rec_t *read, const char *field, int *err);
int32_t slow5_aux_get_int32(const slow5_rec_t *read, const char *field, int *err);
int64_t slow5_aux_get_int64(const slow5_rec_t *read, const char *field, int *err);
uint8_t slow5_aux_get_uint8(const slow5_rec_t *read, const char *field, int *err);
uint16_t slow5_aux_get_uint16(const slow5_rec_t *read, const char *field, int *err);
uint32_t slow5_aux_get_uint32(const slow5_rec_t *read, const char *field, int *err);
uint64_t slow5_aux_get_uint64(const slow5_rec_t *read, const char *field, int *err);
float slow5_aux_get_float(const slow5_rec_t *read, const char *field, int *err);
double slow5_aux_get_double(const slow5_rec_t *read, const char *field, int *err);
char slow5_aux_get_char(const slow5_rec_t *read, const char *field, int *err);
uint8_t slow5_aux_get_enum(const slow5_rec_t *read, const char *field, int *err);

/**
 * Get an auxiliary field in a SLOW5 record as an 8-bit signed integer array.
 *
 * @param   read    address of a slow5_rec_t pointer
 * @param   field   auxiliary field name
 * @param   len     number of data values in the returned array
 * @param   err     error code, 0 on success, <0 on failure and slow5_errno is set
 *                  SLOW5_ERR_ARG   if read or field is NULL
 *                  SLOW5_ERR_NOAUX if no auxiliary hash map for the record
 *                  SLOW5_ERR_NOFLD if the field was not found
 *                  SLOW5_ERR_TYPE if the desired return type does not match the field's type
 * @return  pointer to the array of data values or NULL on error
 */
int8_t *slow5_aux_get_int8_array(const slow5_rec_t *read, const char *field, uint64_t *len, int *err);
int16_t *slow5_aux_get_int16_array(const slow5_rec_t *read, const char *field, uint64_t *len, int *err);
int32_t *slow5_aux_get_int32_array(const slow5_rec_t *read, const char *field, uint64_t *len, int *err);
int64_t *slow5_aux_get_int64_array(const slow5_rec_t *read, const char *field, uint64_t *len, int *err);
uint8_t *slow5_aux_get_uint8_array(const slow5_rec_t *read, const char *field, uint64_t *len, int *err);
uint16_t *slow5_aux_get_uint16_array(const slow5_rec_t *read, const char *field, uint64_t *len, int *err);
uint32_t *slow5_aux_get_uint32_array(const slow5_rec_t *read, const char *field, uint64_t *len, int *err);
uint64_t *slow5_aux_get_uint64_array(const slow5_rec_t *read, const char *field, uint64_t *len, int *err);
float *slow5_aux_get_float_array(const slow5_rec_t *read, const char *field, uint64_t *len, int *err);
double *slow5_aux_get_double_array(const slow5_rec_t *read, const char *field, uint64_t *len, int *err);
char *slow5_aux_get_string(const slow5_rec_t *read, const char *field, uint64_t *len, int *err);
uint8_t *slow5_aux_get_enum_array(const slow5_rec_t *read, const char *field, uint64_t *len, int *err);

/****** Writing SLOW5 files ******.
 * This is just around the corner.
 * However, this is being procrastinated until someone requests. If anyone is interested please open a GitHub issue.
***/

/**
 * Adds a new header data attribute.
 *
 * Returns -1 if an input parameter is NULL.
 * Returns -2 if the attribute already exists.
 * Returns -3 if internal error.
 * Returns 0 other.
 *
 * @param   attr        attribute name
 * @param   header      pointer to the header
 * @return  0 on success, <0 on error as described above
 */
int slow5_hdr_add(const char *attr, slow5_hdr_t *header);

/**
 * Sets a header data attribute for a particular read_group.
 *
 * Doesn't take memory ownership of the value given.
 *
 * Returns -1 if the attribute name doesn't exist
 * or the read group is out of range
 * or an input parameter is NULL.
 * Returns 0 other.
 *
 * @param   attr        attribute name
 * @param   value       new attribute value
 * @param   read_group  the read group
 * @param   header      slow5 header
 * @return  0 on success, -1 on error
 */
int slow5_hdr_set(const char *attr, const char *value, uint32_t read_group, slow5_hdr_t *header);

/**
 * Adds an auxiliary field to a SLOW5 header.
 * Return
 *
 * 0    success
 * -1   null input
 * -2   other failure
 * -3   use slow5_aux_meta_add_enum instead if type is SLOW5_ENUM or SLOW5_ENUM_ARRAY
 * TODO this error checking is bad, reorder parameters
 * @param   field       field name
 * @param   type        slow5 data type
 * @param   header      pointer to the header
 * @return  0 on success, <0 on error
 */
int slow5_aux_add(const char *field, enum slow5_aux_type type, slow5_hdr_t *header);

/**
 * Writes the associated SLOW5 header to a SLOW5 file.
 *
 * On success, the number of bytes written is returned.
 * On error, -1 is returned.
 *
 * @param   s5p              slow5 file structure
 * @return  number of bytes written, -1 on error
 */
int slow5_hdr_write(slow5_file_t *s5p);

/**
 * Initialises an empty SLOW5 record.
 * To be freed with slow5_rec_free().
 *
 * @return  ptr to the record
 */
static inline slow5_rec_t *slow5_rec_init(void) {
    slow5_rec_t *read = (slow5_rec_t *) calloc(1, sizeof *read);

    return read;
}

// sets an auxiliary field (a primitive datatype) of a SLOW5 record
// For non-array types
// Return
// -1   input invalid
// -2   field not found
// -3   type is an array type
// -4   data is invalid (eg enum out of range)
// these two functions require the "field" to be lifetime scope. Can be fixed by pointing to header aux attribute list.
// https://github.com/hasindu2008/slow5lib/commit/63c81a25689b608275277b66300dde824d371bf7
int slow5_aux_set(slow5_rec_t *read, const char *field, const void *data, slow5_hdr_t *header);
//sets an auxiliary field (string datatype) of a SLOW5 record
int slow5_aux_set_string(slow5_rec_t *read, const char *field, const char *data, slow5_hdr_t *header);

/**
 * Writes a SLOW5 record to a SLOW5 file.
 *
 *
 * On success, the number of bytes written is returned.
 * On error, -1 is returned.
 *
 * @param   read             slow5_rec pointer
 * @param   s5p              slow5 file structure
 * @return  number of bytes written, -1 on error
 */
int slow5_write(slow5_rec_t *read, slow5_file_t *s5p);

//set compression for writing
//should be immediately done after opening a blow5 for writing (mode 'w')
int slow5_set_press(slow5_file_t *s5p, enum slow5_press_method rec_press, enum slow5_press_method sig_press);


/**************************************************************************************************
 ***  Low-level API *******************************************************************************
 **************************************************************************************************/

//set the log verbosity level. the log is printed to the standard error.
//sets a global variable, so not thread safe
void slow5_set_log_level(enum slow5_log_level_opt log_level);

//set the exit condition for slow5lib
//sets a global variable, so not thread safe
void slow5_set_exit_condition(enum slow5_exit_condition_opt exit_condition);


//get the list of hdr data keys in sorted order (only the returned pointer must be freed, not the ones inside - subjet to change)
//len is the numberof elements
//returns null if no attributes
const char **slow5_get_hdr_keys(const slow5_hdr_t *header,uint64_t *len);

//gets the list of read ids from the SLOW5 index
//the list of read is is a pointer and must not be freed by user
//*len will have the number of read ids
//NULL will be returned in case of error
char **slow5_get_rids(const slow5_file_t *s5p, uint64_t *len);

//get the pointer to auxilliary field names
char **slow5_get_aux_names(const slow5_hdr_t *header,uint64_t *len);
//get the pointer to auxilliary field types
enum slow5_aux_type *slow5_get_aux_types(const slow5_hdr_t *header,uint64_t *len);
/**
 * get the enum labels for a specific auxiliary field and set the number of labels in *n
 * return NULL on error and slow5_errno set to
 * SLOW5_ERR_ARG    if header, field NULL, n can be NULL
 * SLOW5_ERR_NOAUX  if auxiliary header is NULL
 * SLOW5_ERR_TYPE   if the enum labels or num_labels array is NULL, or the field type is not an enum type
 * SLOW5_ERR_NOFLD  if the auxiliary field was not found
 * SLOW5_ERR_MEM    memory allocation error
 */
char **slow5_get_aux_enum_labels(const slow5_hdr_t *header, const char *field, uint8_t *n);

int slow5_get_next_bytes(char **mem, size_t *bytes, slow5_file_t *s5p);

int slow5_decode(char **mem, size_t *bytes, slow5_rec_t **read, slow5_file_t *s5p);

int slow5_encode(char **mem, size_t *bytes, slow5_rec_t *read, slow5_file_t *s5p);

int slow5_write_bytes(char *mem, size_t bytes, slow5_file_t *s5p);

/*
IMPORTANT: The following low-level API functions are not yet finalised or documented, until someone requests.
If anyone is interested, please open a GitHub issue, rather than trying to figure out from the code.
Function prototypes can be changed without notice or completely removed. So do NOT use these functions in your code.
these functions are used by slow5tools and pyslow5 - so any change to a function here means slow5tools and pyslow5 must be fixed.
*/

/**
 * Open a slow5 file of a specific format with a mode given it's pathname.
 *
 * Return NULL if pathname or mode is NULL, or if the format specified doesn't match the file.
 * slow5_open_with(pathname, mode, SLOW5_FORMAT_UNKNOWN) is equivalent to slow5_open(pathname, mode).
 *
 * Otherwise, return a slow5 file structure with the header parsed.
 * slow5_close() should be called when finished with the structure.
 *
 * TODO: same issues as in slow5_open are applicable to this
 *
 * @param   pathname    relative or absolute path to slow5 file
 * @param   mode        same mode as in fopen()
 * @param   format      format of the slow5 file
 * @return              slow5 file structure
 */
slow5_file_t *slow5_open_with(const char *pathname, const char *mode, enum slow5_fmt format);

/**
 * Add a new header data attribute.
 *
 * Returns -1 if an input parameter is NULL.
 * Returns -2 if the attribute already exists.
 * Returns 0 other.
 *
 * @param   attr    attribute name
 * @param   header  slow5 header
 * @return  0 on success, <0 on error as described above
 */
int slow5_hdr_add_attr(const char *attr, slow5_hdr_t *header);

/**
 * Add a new header read group.
 *
 * All values are set to NULL for the new read group.
 *
 * Returns -1 if an input parameter is NULL.
 * Returns the new read group number otherwise.
 *
 * @param   header  slow5 header
 * @return  < 0 on error as described above
 */
// TODO check return type but should be large enough to return -1 and the largest read group
int64_t slow5_hdr_add_rg(slow5_hdr_t *header);


/**
 * Get the header in the specified format.
 *
 * Returns NULL if s5p is NULL
 * or format is SLOW5_FORMAT_UNKNOWN
 * or an internal error occurs.
 *
 * @param   header          slow5 header
 * @param   format          slow5 format to write the entry in
 * @param   comp            compression method
 * @param   written number of bytes written to the returned buffer
 * @return  malloced memory storing the slow5 header representation,
 *          to use free() on afterwards
 */
void *slow5_hdr_to_mem(slow5_hdr_t *header, enum slow5_fmt format, slow5_press_method_t comp, size_t *written);

/**
 * Print the header in the specified format to a file pointer.
 *
 * On success, the number of bytes written is returned.
 * On error, -1 is returned.
 *
 * @param   fp              output file pointer
 * @param   header          slow5 header
 * @param   format          slow5 format to write the entry in
 * @param   comp            compression method
 * @return  number of bytes written, -1 on error
 */
int slow5_hdr_fwrite(FILE *fp, slow5_hdr_t *header, enum slow5_fmt format, slow5_press_method_t comp);
static inline int slow5_hdr_print(slow5_hdr_t *header, enum slow5_fmt format, slow5_press_method_t comp) {
    return slow5_hdr_fwrite(stdout, header, format, comp);
}



/**
 * Get the read entry in the specified format.
 *
 * Returns NULL if read is NULL,
 * or format is SLOW5_FORMAT_UNKNOWN,
 * or the read attribute values are invalid
 *
 * @param   read        slow5_rec pointer
 * @param   format      slow5 format to write the entry in
 * @param   written     number of bytes written to the returned buffer
 * @param   compress    compress structure
 * @return  malloced string to use free() on, NULL on error
 */
void *slow5_rec_to_mem(slow5_rec_t *read, slow5_aux_meta_t *aux_meta, enum slow5_fmt format, slow5_press_t *compress, size_t *n);

/**
 * Print a read entry in the specified format to a file pointer.
 *
 * On success, the number of bytes written is returned.
 * On error, -1 is returned.
 *
 * @param   fp      output file pointer
 * @param   read    slow5_rec pointer
 * @param   format  slow5 format to write entry in
 * @param   compress
 * @return  number of bytes written, -1 on error
 */
int slow5_rec_fwrite(FILE *fp, slow5_rec_t *read, slow5_aux_meta_t *aux_meta, enum slow5_fmt format, slow5_press_t *compress);
static inline int slow5_rec_print(slow5_rec_t *read, slow5_aux_meta_t *aux_meta, enum slow5_fmt format, slow5_press_t *compress) {
    return slow5_rec_fwrite(stdout, read, aux_meta, format, compress);
}

/**
 * Print the binary end of file to a file pointer.
 *
 * On success, the number of bytes written is returned.
 * On error, -1 is returned.
 *
 * @param   fp      output file pointer
 * @return  number of bytes written, -1 on error
 */
ssize_t slow5_eof_fwrite(FILE *fp);
static inline ssize_t slow5_eof_print(void) {
    return slow5_eof_fwrite(stdout);
}

// Return
// 0    success
// -1   input invalid
// -2   failure
int slow5_convert(slow5_file_t *from, FILE *to_fp, enum slow5_fmt to_format, slow5_press_method_t to_compress);



#ifdef __cplusplus
}
#endif


#endif