1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686
|
// Copyright (c) 2016-2024 The Regents of the University of Michigan
// Part of GSD, released under the BSD 2-Clause License.
#ifndef GSD_H
#define GSD_H
#include <stdbool.h>
#include <stdint.h>
#include <string.h>
#ifdef __cplusplus
extern "C"
{
#endif
/*! \file gsd.h
\brief Declare GSD data types and C API
*/
/// Identifiers for the gsd data chunk element types
enum gsd_type
{
/// Unsigned 8-bit integer.
GSD_TYPE_UINT8 = 1,
/// Unsigned 16-bit integer.
GSD_TYPE_UINT16,
/// Unsigned 32-bit integer.
GSD_TYPE_UINT32,
/// Unsigned 53-bit integer.
GSD_TYPE_UINT64,
/// Signed 8-bit integer.
GSD_TYPE_INT8,
/// Signed 16-bit integer.
GSD_TYPE_INT16,
/// Signed 32-bit integer.
GSD_TYPE_INT32,
/// Signed 64-bit integer.
GSD_TYPE_INT64,
/// 32-bit floating point number.
GSD_TYPE_FLOAT,
/// 64-bit floating point number.
GSD_TYPE_DOUBLE,
/// 8-bit character.
GSD_TYPE_CHARACTER
};
/// Flag for GSD file open options
enum gsd_open_flag
{
/// Open for both reading and writing
GSD_OPEN_READWRITE = 1,
/// Open only for reading
GSD_OPEN_READONLY,
/// Open only for writing
GSD_OPEN_APPEND
};
/// Error return values
enum gsd_error
{
/// Success.
GSD_SUCCESS = 0,
/// IO error. Check ``errno`` for details
GSD_ERROR_IO = -1,
/// Invalid argument passed to function.
GSD_ERROR_INVALID_ARGUMENT = -2,
/// The file is not a GSD file.
GSD_ERROR_NOT_A_GSD_FILE = -3,
/// The GSD file version cannot be read.
GSD_ERROR_INVALID_GSD_FILE_VERSION = -4,
/// The GSD file is corrupt.
GSD_ERROR_FILE_CORRUPT = -5,
/// GSD failed to allocated memory.
GSD_ERROR_MEMORY_ALLOCATION_FAILED = -6,
/// The GSD file cannot store any additional unique data chunk names.
GSD_ERROR_NAMELIST_FULL = -7,
/** This API call requires that the GSD file opened in with the mode GSD_OPEN_APPEND or
GSD_OPEN_READWRITE.
*/
GSD_ERROR_FILE_MUST_BE_WRITABLE = -8,
/** This API call requires that the GSD file opened the mode GSD_OPEN_READ or
GSD_OPEN_READWRITE.
*/
GSD_ERROR_FILE_MUST_BE_READABLE = -9,
};
enum
{
/** v1 file: Size of a GSD name in memory. v2 file: The name buffer size is a multiple of
GSD_NAME_SIZE.
*/
GSD_NAME_SIZE = 64
};
enum
{
/// Reserved bytes in the header structure
GSD_RESERVED_BYTES = 80
};
/** GSD file header
The in-memory and on-disk storage of the GSD file header. Stored in the first 256 bytes of
the file.
@warning All members are **read-only** to the caller.
*/
struct gsd_header
{
/// Magic number marking that this is a GSD file.
uint64_t magic;
/// Location of the chunk index in the file.
uint64_t index_location;
/// Number of index entries that will fit in the space allocated.
uint64_t index_allocated_entries;
/// Location of the name list in the file.
uint64_t namelist_location;
/// Number of bytes in the namelist divided by GSD_NAME_SIZE.
uint64_t namelist_allocated_entries;
/// Schema version: from gsd_make_version().
uint32_t schema_version;
/// GSD file format version from gsd_make_version().
uint32_t gsd_version;
/// Name of the application that generated this file.
char application[GSD_NAME_SIZE];
/// Name of data schema.
char schema[GSD_NAME_SIZE];
/// Reserved for future use.
char reserved[GSD_RESERVED_BYTES];
};
/** Index entry
An index entry for a single chunk of data.
@warning All members are **read-only** to the caller.
*/
struct gsd_index_entry
{
/// Frame index of the chunk.
uint64_t frame;
/// Number of rows in the chunk.
uint64_t N;
/// Location of the chunk in the file.
int64_t location;
/// Number of columns in the chunk.
uint32_t M;
/// Index of the chunk name in the name list.
uint16_t id;
/// Data type of the chunk: one of gsd_type.
uint8_t type;
/// Flags (for internal use).
uint8_t flags;
};
/** Name/id mapping
A string name paired with an ID. Used for storing sorted name/id mappings in a hash map.
*/
struct gsd_name_id_pair
{
/// Pointer to name (actual name storage is allocated in gsd_handle)
char* name;
/// Next name/id pair with the same hash
struct gsd_name_id_pair* next;
/// Entry id
uint16_t id;
};
/** Name/id hash map
A hash map of string names to integer identifiers.
*/
struct gsd_name_id_map
{
/// Name/id mappings
struct gsd_name_id_pair* v;
/// Number of entries in the mapping
size_t size;
};
/** Array of index entries
May point to a mapped location of index entries in the file or an in-memory buffer.
*/
struct gsd_index_buffer
{
/// Indices in the buffer
struct gsd_index_entry* data;
/// Number of entries in the buffer
size_t size;
/// Number of entries available in the buffer
size_t reserved;
/// Pointer to mapped data (NULL if not mapped)
void* mapped_data;
/// Number of bytes mapped
size_t mapped_len;
};
/** Byte buffer
Used to buffer of small data chunks held for a buffered write at the end of a frame. Also
used to hold the names.
*/
struct gsd_byte_buffer
{
/// Data
char* data;
/// Number of bytes in the buffer
size_t size;
/// Number of bytes available in the buffer
size_t reserved;
};
/** Name buffer
Holds a list of string names in order separated by NULL terminators. In v1 files, each name
is 64 bytes. In v2 files, only one NULL terminator is placed between each name.
*/
struct gsd_name_buffer
{
/// Data
struct gsd_byte_buffer data;
/// Number of names in the list
size_t n_names;
};
/** File handle
A handle to an open GSD file.
This handle is obtained when opening a GSD file and is passed into every method that
operates on the file.
@warning All members are **read-only** to the caller.
*/
struct gsd_handle
{
/// File descriptor
int fd;
/// The file header
struct gsd_header header;
/// Mapped data chunk index
struct gsd_index_buffer file_index;
/// Index entries to append to the current frame
struct gsd_index_buffer frame_index;
/// Buffered index entries to append to the current frame
struct gsd_index_buffer buffer_index;
/// Buffered write data
struct gsd_byte_buffer write_buffer;
/// List of names stored in the file
struct gsd_name_buffer file_names;
/// List of names added in the current frame
struct gsd_name_buffer frame_names;
/// The index of the last frame in the file
uint64_t cur_frame;
/// Size of the file (in bytes)
int64_t file_size;
/// Flags passed to gsd_open() when opening this handle
enum gsd_open_flag open_flags;
/// Access the names in the namelist
struct gsd_name_id_map name_map;
/// Number of index entries pending in the current frame.
uint64_t pending_index_entries;
/// Maximum write buffer size (bytes).
uint64_t maximum_write_buffer_size;
/// Number of index entries to buffer before flushing.
uint64_t index_entries_to_buffer;
};
/** Specify a version.
@param major major version
@param minor minor version
@return a packed version number aaaa.bbbb suitable for storing in a gsd file version entry.
*/
uint32_t gsd_make_version(unsigned int major, unsigned int minor);
/** Create a GSD file.
@param fname File name (UTF-8 encoded).
@param application Generating application name (truncated to 63 chars).
@param schema Schema name for data to be written in this GSD file (truncated to 63 chars).
@param schema_version Version of the scheme data to be written (make with
gsd_make_version()).
@post Create an empty gsd file in a file of the given name. Overwrite any existing file at
that location.
The generated gsd file is not opened. Call gsd_open() to open it for writing.
@return
- GSD_SUCCESS (0) on success. Negative value on failure:
- GSD_ERROR_IO: IO error (check errno).
*/
int gsd_create(const char* fname,
const char* application,
const char* schema,
uint32_t schema_version);
/** Create and open a GSD file.
@param handle Handle to open.
@param fname File name (UTF-8 encoded).
@param application Generating application name (truncated to 63 chars).
@param schema Schema name for data to be written in this GSD file (truncated to 63 chars).
@param schema_version Version of the scheme data to be written (make with
gsd_make_version()).
@param flags Either GSD_OPEN_READWRITE, or GSD_OPEN_APPEND.
@param exclusive_create Set to non-zero to force exclusive creation of the file.
@post Create an empty gsd file with the given name. Overwrite any existing file at that
location.
Open the generated gsd file in *handle*.
The file descriptor is closed if there when an error opening the file.
@return
- GSD_SUCCESS (0) on success. Negative value on failure:
- GSD_ERROR_IO: IO error (check errno).
- GSD_ERROR_NOT_A_GSD_FILE: Not a GSD file.
- GSD_ERROR_INVALID_GSD_FILE_VERSION: Invalid GSD file version.
- GSD_ERROR_FILE_CORRUPT: Corrupt file.
- GSD_ERROR_MEMORY_ALLOCATION_FAILED: Unable to allocate memory.
*/
int gsd_create_and_open(struct gsd_handle* handle,
const char* fname,
const char* application,
const char* schema,
uint32_t schema_version,
enum gsd_open_flag flags,
int exclusive_create);
/** Open a GSD file.
@param handle Handle to open.
@param fname File name to open (UTF-8 encoded).
@param flags Either GSD_OPEN_READWRITE, GSD_OPEN_READONLY, or GSD_OPEN_APPEND.
@pre The file name *fname* is a GSD file.
@post Open a GSD file and populates the handle for use by API calls.
The file descriptor is closed if there is an error opening the file.
@return
- GSD_SUCCESS (0) on success. Negative value on failure:
- GSD_ERROR_IO: IO error (check errno).
- GSD_ERROR_NOT_A_GSD_FILE: Not a GSD file.
- GSD_ERROR_INVALID_GSD_FILE_VERSION: Invalid GSD file version.
- GSD_ERROR_FILE_CORRUPT: Corrupt file.
- GSD_ERROR_MEMORY_ALLOCATION_FAILED: Unable to allocate memory.
*/
int gsd_open(struct gsd_handle* handle, const char* fname, enum gsd_open_flag flags);
/** Truncate a GSD file.
@param handle Open GSD file to truncate.
After truncating, a file will have no frames and no data chunks. The file size will be that
of a newly created gsd file. The application, schema, and schema version metadata will be
kept. Truncate does not close and reopen the file, so it is suitable for writing restart
files on Lustre file systems without any metadata access.
@return
- GSD_SUCCESS (0) on success. Negative value on failure:
- GSD_ERROR_IO: IO error (check errno).
- GSD_ERROR_NOT_A_GSD_FILE: Not a GSD file.
- GSD_ERROR_INVALID_GSD_FILE_VERSION: Invalid GSD file version.
- GSD_ERROR_FILE_CORRUPT: Corrupt file.
- GSD_ERROR_MEMORY_ALLOCATION_FAILED: Unable to allocate memory.
*/
int gsd_truncate(struct gsd_handle* handle);
/** Close a GSD file.
@param handle GSD file to close.
@pre *handle* was opened by gsd_open().
@post Writable files: All data and index entries buffered before the previous call to
gsd_end_frame() is written to the file (see gsd_flush()).
@post The file is closed.
@post *handle* is freed and can no longer be used.
@warning Ensure that all gsd_write_chunk() calls are completed with gsd_end_frame() before
closing the file.
@return
- GSD_SUCCESS (0) on success. Negative value on failure:
- GSD_ERROR_IO: IO error (check errno).
- GSD_ERROR_INVALID_ARGUMENT: *handle* is NULL.
*/
int gsd_close(struct gsd_handle* handle);
/** Complete the current frame.
@param handle Handle to an open GSD file
@pre *handle* was opened by gsd_open().
@post The current frame counter is increased by 1.
@post Flush the write buffer if it has overflowed. See gsd_flush().
@return
- GSD_SUCCESS (0) on success. Negative value on failure:
- GSD_ERROR_IO: IO error (check errno).
- GSD_ERROR_INVALID_ARGUMENT: *handle* is NULL.
- GSD_ERROR_FILE_MUST_BE_WRITABLE: The file was opened read-only.
- GSD_ERROR_MEMORY_ALLOCATION_FAILED: Unable to allocate memory.
*/
int gsd_end_frame(struct gsd_handle* handle);
/** Flush the write buffer.
@param handle Handle to an open GSD file
@pre *handle* was opened by gsd_open().
@post All data buffered by gsd_write_chunk() are present in the file.
@post All index entries buffered by gsd_write_chunk() prior to the last call to
gsd_end_frame() are present in the file.
@return
- GSD_SUCCESS (0) on success. Negative value on failure:
- GSD_ERROR_IO: IO error (check errno).
- GSD_ERROR_INVALID_ARGUMENT: *handle* is NULL.
- GSD_ERROR_FILE_MUST_BE_WRITABLE: The file was opened read-only.
- GSD_ERROR_MEMORY_ALLOCATION_FAILED: Unable to allocate memory.
*/
int gsd_flush(struct gsd_handle* handle);
/** Add a data chunk to the current frame.
@param handle Handle to an open GSD file.
@param name Name of the data chunk.
@param type type ID that identifies the type of data in *data*.
@param N Number of rows in the data.
@param M Number of columns in the data.
@param flags set to 0, non-zero values reserved for future use.
@param data Data buffer.
@pre *handle* was opened by gsd_open().
@pre *name* is a unique name for data chunks in the given frame.
@pre data is allocated and contains at least `N * M * gsd_sizeof_type(type)` bytes.
@post When there is space in the buffer: The given data is present in the write buffer.
Otherwise, the data is present at the end of the file.
@post The index is present in the buffer.
@note If the GSD file is version 1.0, the chunk name is truncated to 63 bytes. GSD version
2.0 files support arbitrarily long names.
@note *N* == 0 is allowed. When *N* is 0, *data* may be NULL.
@return
- GSD_SUCCESS (0) on success. Negative value on failure:
- GSD_ERROR_IO: IO error (check errno).
- GSD_ERROR_INVALID_ARGUMENT: *handle* is NULL, *N* == 0, *M* == 0, *type* is invalid, or
*flags* != 0.
- GSD_ERROR_FILE_MUST_BE_WRITABLE: The file was opened read-only.
- GSD_ERROR_NAMELIST_FULL: The file cannot store any additional unique chunk names.
- GSD_ERROR_MEMORY_ALLOCATION_FAILED: failed to allocate memory.
*/
int gsd_write_chunk(struct gsd_handle* handle,
const char* name,
enum gsd_type type,
uint64_t N,
uint32_t M,
uint8_t flags,
const void* data);
/** Find a chunk in the GSD file.
@param handle Handle to an open GSD file
@param frame Frame to look for chunk
@param name Name of the chunk to find
@pre *handle* was opened by gsd_open() in read or readwrite mode.
The found entry contains size and type metadata and can be passed to gsd_read_chunk() to
read the data.
@return A pointer to the found chunk, or NULL if not found.
@note gsd_find_chunk() calls gsd_flush() when the file is writable.
*/
const struct gsd_index_entry*
gsd_find_chunk(struct gsd_handle* handle, uint64_t frame, const char* name);
/** Read a chunk from the GSD file.
@param handle Handle to an open GSD file.
@param data Data buffer to read into.
@param chunk Chunk to read.
@pre *handle* was opened in read or readwrite mode.
@pre *chunk* was found by gsd_find_chunk().
@pre *data* points to an allocated buffer with at least `N * M * gsd_sizeof_type(type)`
bytes.
@return
- GSD_SUCCESS (0) on success. Negative value on failure:
- GSD_ERROR_IO: IO error (check errno).
- GSD_ERROR_INVALID_ARGUMENT: *handle* is NULL, *data* is NULL, or *chunk* is NULL.
- GSD_ERROR_FILE_MUST_BE_READABLE: The file was opened in append mode.
- GSD_ERROR_FILE_CORRUPT: The GSD file is corrupt.
@note gsd_read_chunk() calls gsd_flush() when the file is writable.
*/
int gsd_read_chunk(struct gsd_handle* handle, void* data, const struct gsd_index_entry* chunk);
/** Get the number of frames in the GSD file.
@param handle Handle to an open GSD file
@pre *handle* was opened by gsd_open().
@return The number of frames in the file, or 0 on error.
*/
uint64_t gsd_get_nframes(struct gsd_handle* handle);
/** Query size of a GSD type ID.
@param type Type ID to query.
@return Size of the given type in bytes, or 0 for an unknown type ID.
*/
size_t gsd_sizeof_type(enum gsd_type type);
/** Search for chunk names in a gsd file.
@param handle Handle to an open GSD file.
@param match String to match.
@param prev Search starting point.
@pre *handle* was opened by gsd_open()
@pre *prev* was returned by a previous call to gsd_find_matching_chunk_name()
To find the first matching chunk name, pass NULL for prev. Pass in the previous found string
to find the next after that, and so on. Chunk names match if they begin with the string in
*match*. Chunk names returned by this function may be present in at least one frame.
@return Pointer to a string, NULL if no more matching chunks are found found, or NULL if
*prev* is invalid
@note gsd_find_matching_chunk_name() calls gsd_flush() when the file is writable.
*/
const char*
gsd_find_matching_chunk_name(struct gsd_handle* handle, const char* match, const char* prev);
/** Upgrade a GSD file to the latest specification.
@param handle Handle to an open GSD file
@pre *handle* was opened by gsd_open() with a writable mode.
@pre There are no pending data to write to the file in gsd_end_frame()
@return
- GSD_SUCCESS (0) on success. Negative value on failure:
- GSD_ERROR_IO: IO error (check errno).
- GSD_ERROR_INVALID_ARGUMENT: *handle* is NULL
- GSD_ERROR_FILE_MUST_BE_WRITABLE: The file was opened in read-only mode.
*/
int gsd_upgrade(struct gsd_handle* handle);
/** Get the maximum write buffer size.
@param handle Handle to an open GSD file
@pre *handle* was opened by gsd_open().
@return The maximum write buffer size in bytes, or 0 on error.
*/
uint64_t gsd_get_maximum_write_buffer_size(struct gsd_handle* handle);
/** Set the maximum write buffer size.
@param handle Handle to an open GSD file
@param size Maximum number of bytes to allocate in the write buffer (must be greater than
0).
@pre *handle* was opened by gsd_open().
@return
- GSD_SUCCESS (0) on success. Negative value on failure:
- GSD_ERROR_INVALID_ARGUMENT: *handle* is NULL
- GSD_ERROR_INVALID_ARGUMENT: size == 0
*/
int gsd_set_maximum_write_buffer_size(struct gsd_handle* handle, uint64_t size);
/** Get the number of index entries to buffer.
@param handle Handle to an open GSD file
@pre *handle* was opened by gsd_open().
@return The number of index entries to buffer, or 0 on error.
*/
uint64_t gsd_get_index_entries_to_buffer(struct gsd_handle* handle);
/** Set the number of index entries to buffer.
@param handle Handle to an open GSD file
@param number Number of index entries to buffer before automatically flushing in
`gsd_end_frame()` (must be greater than 0).
@pre *handle* was opened by gsd_open().
@note GSD may allocate more than this number of entries in the buffer, as needed to store
all index entries for the already buffered frames and the current frame.
@return
- GSD_SUCCESS (0) on success. Negative value on failure:
- GSD_ERROR_INVALID_ARGUMENT: *handle* is NULL
- GSD_ERROR_INVALID_ARGUMENT: number == 0
*/
int gsd_set_index_entries_to_buffer(struct gsd_handle* handle, uint64_t number);
#ifdef __cplusplus
}
#endif
#endif // #ifndef GSD_H
|