File: parse_by_chunk.c

package info (click to toggle)
zsv 1.3.0-4
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 49,168 kB
  • sloc: ansic: 175,811; cpp: 56,301; sh: 3,635; makefile: 3,049; javascript: 577; cs: 90; awk: 70; python: 41; sql: 15
file content (100 lines) | stat: -rw-r--r-- 2,511 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <zsv.h>

/** zsv example parsing in chunks
 *
 * This sample code shows how to use `zsv_parse_bytes()`
 * to parse a chunk of bytes, instead of using `zsv_parse_more()`
 * to pull data from a stream
 *
 * Note that, when given a choice, using `zsv_parse_more()`
 * may be slightly more efficient / performant as is requires
 * less memory copying
 *
 * In this example, we just count rows, but you could substitute in any
 * row handler you want
 **/

/**
 * Create a structure to hold our data while we parse
 * In this case, we are just going to keep track of row count
 */
struct chunk_parse_data {
  unsigned count;
};

/**
 * Our row handler function will take a pointer to our data
 * and increment its count by 1
 */
static void chunk_parse_row(void *dat) {
  struct chunk_parse_data *data = dat;
  data->count++;
}

/**
 * Main routine will output a help message if called with -h or --help,
 * otherwise will read from stdin and run through the CSV parser
 */
int main(int argc, const char *argv[]) {
  if (argc > 1 && (!strcmp(argv[1], "-h") || !strcmp(argv[1], "--help"))) {
    printf("Usage: parse_by_chunk < file.csv\n\n");
    printf("Reads stdin in chunks, parses each chunk using `zsv_parse_bytes()`,\n");
    printf("and outputs the number of rows parsed.\n");
    return 0;
  }

  FILE *f = stdin; /* read from stdin */

  /**
   * create a vanilla parser
   */
  zsv_parser p = zsv_new(NULL);
  if (!p)
    fprintf(stderr, "Out of memory!");
  else {
    /**
     * Configure the parser to use our row handler, and to pass
     * it our data when it's called
     */
    struct chunk_parse_data d = {0};
    zsv_set_row_handler(p, chunk_parse_row);
    zsv_set_context(p, &d);

    /**
     * Allocate a buffer that we will fetch data from and pass to the parser.
     * In this example we use a heap buffer, but we could just as well
     * have allocated it on the stack
     */
    int chunk_size = 4096;
    unsigned char *buff = malloc(chunk_size);

    /**
     * Read and parse each chunk until the end of the stream is reached
     */
    while (1) {
      size_t bytes_read = fread(buff, 1, chunk_size, f);
      if (!bytes_read)
        break;
      zsv_parse_bytes(p, buff, bytes_read);
    }

    /**
     * Finish any remaining parsing
     */
    zsv_finish(p);

    /**
     * Clean up
     */
    zsv_delete(p);
    free(buff);

    /**
     * Print result
     */
    printf("Count: %u\n", d.count);
  }
}