File: dawk.c

package info (click to toggle)
allegro5 2%3A5.0.10-3
  • links: PTS, VCS
  • area: main
  • in suites: jessie, jessie-kfreebsd
  • size: 14,856 kB
  • ctags: 15,948
  • sloc: ansic: 87,540; cpp: 9,693; objc: 3,491; python: 2,057; sh: 829; makefile: 93; perl: 37; pascal: 24
file content (322 lines) | stat: -rw-r--r-- 6,456 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
/* Support for awk-style processing in C. */

#include <ctype.h>
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

/* I'd prefer to use the POSIX regexp interface, but for portability we need a
 * fallback, and supporting two regexp implementations is asking for trouble.
 * T-Rex is small and quick enough so we use it everywhere.  However it
 * doesn't seem to behave properly on some expressions (which we can work
 * around when the regexps are fixed) so I couldn't recommend it in general.
 */
#include "dawk.h"
#include "trex.h"

typedef struct {
   const char     *regex;
   TRex           *reg;
} re_cache_t;

void              (*d_cleanup)(void);

static int        d_argc;
static char       **d_argv;
static int        d_file_num;

static FILE       *d_file;
dstr              d_filename;
int               d_line_num;

FILE              *d_stdout;
#define D_STDOUT  (d_stdout ? d_stdout : stdout)

#define MAX_RE_CACHE 16
static re_cache_t d_regex_cache[MAX_RE_CACHE];
static dstr       d_submatches[MAX_MATCH];
dstr              d_before_match;
dstr              d_after_match;


/* Abort with an error message. */
void d_doabort(const char *filename, int line, const char *msg1,
   const char *msg2)
{
   fprintf(stderr, "%s:%d: %s%s\n", filename, line, msg1, msg2);
   if (d_cleanup) {
      d_cleanup();
   }
   exit(EXIT_FAILURE);
}


/* Prepare to read input from the files listed in argv. */
void d_init(int argc, char *argv[])
{
   d_argc = argc;
   d_argv = argv;
   d_file_num = 0;
   d_close_input();
}


/* Open a single file for reading. */
void d_open_input(const char *filename)
{
   if (d_file) {
      fclose(d_file);
   }
   d_file = fopen(filename, "r");
   if (!d_file) {
      d_abort("could not open file for reading: ", filename);
   }
   d_assign(d_filename, filename);
   d_line_num = 0;
   d_file_num = -1;
}


/* Close input file. */
void d_close_input(void)
{
   if (d_file) {
      fclose(d_file);
   }
   d_file = NULL;
   d_assign(d_filename, "");
}


/* Read the next line from the current input file(s). */
bool d_getline(dstr var)
{
   char *p = var;
   int c;

   /* Open the next file if necessary. */
   if (!d_file) {
      if (d_file_num == -1 || d_file_num + 1 >= d_argc) {
         return false;
      }
      d_file_num++;
      d_file = fopen(d_argv[d_file_num], "r");
      if (!d_file) {
         d_abort("could not open file for reading: ", d_argv[d_file_num]);
      }
      d_assign(d_filename, d_argv[d_file_num]);
      d_line_num = 0;
   }

   for (;;) {
      c = fgetc(d_file);
      if (c == EOF || c == '\n') {
         break;
      }
      *p++ = c;
      if (p - var >= MAX_STRING - 1) {
         fprintf(stderr, "dawk: string length limit reached\n");
         break;
      }
   }
   *p = '\0';

   if (c == EOF) {
      fclose(d_file);
      d_file = NULL;
      if (p == var) {
         return d_getline(var);
      }
   }
   else if (c == '\n') {
      d_line_num++;
      /* Remove trailing CR if present. */
      if (p > var && p[-1] == '\r') {
         p[-1] = '\0';
      }
   }

   return true;
}


/* Open a file for writing. */
void d_open_output(const char *filename)
{
   FILE *f;

   d_close_output();
   f = fopen(filename, "w");
   if (!f) {
      d_abort("error opening file for writing: ", filename);
   }
   d_stdout = f;
}


/* Close the output file, reverting to standard output. */
void d_close_output(void)
{
   if (d_stdout && d_stdout != stdout) {
      fclose(d_stdout);
   }
   d_stdout = NULL;
}


/* Print a line to the output file, with newline. */
void d_print(const char *s)
{
   fprintf(D_STDOUT, "%s\n", s);
}


/* Print formatted output to the output file. */
void d_printf(const char *format, ...)
{
   va_list ap;
   va_start(ap, format);
   vfprintf(D_STDOUT, format, ap);
   va_end(ap);
}


/* Assign a string. */
void d_assign(dstr to, const dstr from)
{
   /* Might be overlapping. */
   memmove(to, from, strlen(from) + 1);
}


/* Assign a length-delimited string. */
void d_assignn(dstr to, const dstr from, size_t n)
{
   /* Might be overlapping. */
   memmove(to, from, n);
   to[n] = '\0';
}


static re_cache_t *compile_regex(const char *regex)
{
   re_cache_t *re;
   int i;

   for (i = 0; i < MAX_RE_CACHE; i++) {
      re = &d_regex_cache[i];
      if (re->regex == NULL) {
         re->regex = regex;
         re->reg = trex_compile(regex, NULL);
         if (re->reg == NULL) {
            d_abort("error compiling regular expression: ", regex);
         }
      }
      if (re->regex == regex) {
         return re;
      }
   }

   d_abort("too many regular expressions", "");
   return NULL;
}


/* Match a string against the given regular expression.
 * Returns true on a successful match.
 */
bool d_match(dstr line, const char *regex)
{
   re_cache_t *re;
   TRexMatch match;
   int i;

   re = compile_regex(regex);

   if (!trex_search(re->reg, line, NULL, NULL)) {
      return false;
   }

   trex_getsubexp(re->reg, 0, &match);
   d_assignn(d_before_match, line, match.begin - line);
   d_assign(d_after_match, match.begin + match.len);

   for (i = 0; i < MAX_MATCH; i++) {
      if (trex_getsubexp(re->reg, i, &match)) {
         strncpy(d_submatches[i], match.begin, match.len);
         d_submatches[i][match.len] = '\0';
      }
      else {
         d_submatches[i][0] = '\0';
      }
   }

   return true;
}


/* Return a submatch from the previous d_match call. */
const char *d_submatch(int i)
{
   return d_submatches[i];
}


static const char *strrchr2(const char *s, char c1, char c2)
{
   const char *p = s + strlen(s);
   for (; p >= s; p--) {
      if (*p == c1 || *p == c2)
         return p;
   }
   return NULL;
}


void d_basename(const char *filename, const char *newext, dstr output)
{
   const char *start;
   char *dot;

   start = strrchr2(filename, '/', '\\');
   if (start)
      strcpy(output, start + 1);
   else
      strcpy(output, filename);

   if (newext) {
      dot = strrchr(output, '.');
      if (dot)
         strcpy(dot, newext);
      else
         strcat(output, newext);
   }
}


void d_tolower(const dstr src, dstr dest)
{
   const char *s = src;
   char *d = dest;
   for (; *s; s++, d++) {
      *d = tolower(*s);
   }
   *d = '\0';
}


void d_delchr(dstr str, char c)
{
   const char *r = str;
   char *w = str;
   for (; *r; r++) {
      if (*r != c) {
         *w = *r;
         w++;
      }
   }
   *w = '\0';
}

/* vim: set sts=3 sw=3 et: */