File: apop_text_to_db.c

package info (click to toggle)
apophenia 1.0%2Bds-10
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 3,152 kB
  • sloc: ansic: 19,483; makefile: 378; awk: 124; sh: 105; javascript: 35; sed: 32
file content (96 lines) | stat: -rw-r--r-- 3,839 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
/** \file 
 A command line script to read a text file into a database.

Copyright (c) 2006--2007, 2013 by Ben Klemens.  Licensed under the GPLv2; see COPYING.  */

#include "apop_internal.h"
#include <unistd.h>

int *break_down(char *in){
    int *out = NULL;
    int ctr = 0;
    char *cp = strtok (in, ",");
    while (cp != NULL) {
      out = realloc(out, sizeof(int)*(ctr+1));
      out[ctr++] = atoi(cp);
      cp = strtok (NULL, ",");
    }
    return out;
}

int main(int argc, char **argv){
    int c;
    char *msg;
    int colnames = 'y',
        rownames = 0,
        tab_exists_check = 0;
    char **field_names = NULL;

	Asprintf(&msg, "Usage: %s [-d delimiters] text_file table_name dbname\n"
"\n"
"If the input text file name is a single dash, -, then read from STDIN.\n"
"Input must be plain ASCII or UTF-8.\n"
" -d\t\tthe single-character delimiters to use, e.g., -d \" ,\" or -d \"\\t\" (which you \n"
  " \t\t\twill almost certainly have to write as -d \"\\\\t\") (default: \"|,\\t\", meaning \n"
  " \t\t\tthat any of a pipe, comma, or tab will delimit separate entries)\n"
" -nc\t\tdata does not include column names\n"
" -n regex\t\tcase-insensitive regular expression indicating Null values (default: NaN)\n"
" -m\t\tuse a MySQL database (default: SQLite)\n"
" -f\t\tfixed width field ends: -f\"3,8,12,17\" (first char is one, not zero)\n"
" -u\t\tmysql username\n"
" -p\t\tmysql password\n"
" -r\t\tdata includes row names\n"
" -v\t\tverbosity\n"
" -N\t\ta comma-separated list of column names: -N\"apple,banana,carrot,durian\"\n"
" -en\t\tif table exists, do nothing and exit\n"
" -ed\t\tif table exists, retain the table, delete all data, refill with the new data (i.e., call 'delete * from your_table')\n"
" -eo\t\tif table exists, overwrite the table from scratch (deleting the previous table entirely)\n"
" -ea\t\tif table exists, append new data to the existing table\n"
" -h\t\tdisplay this help and exit\n"
"\n"
, argv[0]);
    int * field_list = NULL;
    char if_exists = 'n';

	if(argc<3){
		printf("%s", msg);
		return 0;
	}
	while ((c = getopt (argc, argv, "n:d:e:f:hmp:ru:vN:O")) != -1)
        if (c=='n') {
              if (optarg[0]=='c') colnames='n';
              else                apop_opts.nan_string = optarg;
        }
		else if (c=='N') {
            apop_data *field_name_data;
            apop_regex(optarg, " *([^,]*[^ ]) *(,|$) *", &field_name_data);
            Apop_stopif(!field_name_data, return 1, 0, "'%s' should be a "
                    "comma-delimited list of field names, but I had trouble "
                    "parsing it as such.", optarg);
            apop_data_transpose(field_name_data);
            field_names = field_name_data->text[0];
        }
        else if (c=='d') strcpy(apop_opts.input_delimiters, optarg);
		else if (c=='f') field_list = break_down(optarg);
		else if (c=='h') {printf("%s", msg); return 0;}
		else if (c=='m') apop_opts.db_engine = 'm';
		else if (c=='u') strcpy(apop_opts.db_user, optarg);
		else if (c=='p') strcpy(apop_opts.db_pass, optarg);
		else if (c=='r') rownames++;
		else if (c=='v') apop_opts.verbose=2;
		else if (c=='O') tab_exists_check++; //deprecated as of December 2013.
		else if (c=='e') {
            if (optarg[0]=='n')       if_exists='n'; //the default anyway.
            else if (optarg[0]=='d')  if_exists='d';
            else if (optarg[0]=='a')  if_exists='a';
            else if (optarg[0]=='o') {if_exists='o';
                                      tab_exists_check++;
                                     }

        }
	apop_db_open(argv[optind + 2]);
    if (tab_exists_check) apop_table_exists(argv[optind+1],1);
    apop_query("begin");
	apop_text_to_db(argv[optind], argv[optind+1], rownames, colnames, field_names, .field_ends=field_list, .if_table_exists=if_exists);
    apop_query("commit");
}