File: hxcite.c

package info (click to toggle)
html-xml-utils 6.1-1
  • links: PTS, VCS
  • area: main
  • in suites: wheezy
  • size: 1,620 kB
  • sloc: ansic: 10,027; sh: 2,135; lex: 189; yacc: 125; perl: 123; makefile: 122
file content (327 lines) | stat: -rw-r--r-- 9,715 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
/*
 * cite - adds hyperlinks to bibliographic references in HTML
 *
 * The programs looks for strings of the form [[name]] (i.e., a
 * bibliographic label inside a double pair of square brackets), e.g.,
 * [[Knuth84]] or [[LieBos97]]. The label will be looked up in a
 * bibliography database and if it is found, the string will be
 * replaced by a pattern which is typically of the form <a
 * href="...">[name]</a>, but the pattern can be changed
 * with a command line option.
 *
 * If the string is of the form {{name}}, the name will be looked up,
 * but the string will be copied unchanged.
 *
 * If the label is not found, a warning is printed and the string is
 * left unchanged.
 *
 * All labels that are found are also stored, one label per line, in a
 * separate file with extension .aux. This file can be used by mkbib
 * to create the bibliography by extracting the corresponding
 * bibliographic entries from the database.
 *
 * The bibliography database must be a refer-style database. Though
 * for the purposes of this program all lines that don't start with
 * "%L" are ignored. Lines with "%L" are assumed to contain a label.
 *
 * Options:
 *
 * -b base
 *     Give the value for %b in the pattern.
 *
 * -p pattern
 *     The replacement for the string [[label]]. The default is
 *
 *     <a href=\"%b#%L\" rel=\"biblioentry\">[%L]<!--{{%m%L}}--></a>
 *
 *     %L will be replaced by the label, %b by the value of the -b
 *     option and %m by the marker (-m option).
 *
 * -a auxfile
 *     The name of the file in which the list of labels will be stored.
 *     Default is the name of the file given as argument, minus its
 *     extension, plus ".aux". If no file is give (input comes from
 *     stdin), the default name is "aux.aux".
 *
 * -m marker
 *     By default, the program looks for "[[name]]", but it can be
 *     made to look for "[[Xname]]" where X is some string, usually a
 *     symbol such as '!' or ='. This allows references to be
 *     classified, e.g., "[[!name]]" for normative references and
 *     "[[name]]" for non-normative references.
 *
 * -c
 *     Assume that every pair "<!--" and "-->" delimit a comment and
 *     do not process any [[label]] that occurs between them. Any
 *     "{{label}}" is processed as normal. This does not actually
 *     parse the input as HTML or XML and thus the program will
 *     mistake occurrences of these two strings inside CDATA sections
 *     or attribute values for comment delimiters.
 *
 * Copyright  1994-2010 World Wide Web Consortium
 * See http://www.w3.org/Consortium/Legal/copyright-software
 *
 * Author: Bert Bos <bert@w3.org>
 * Created: 18 March 2000
 * Version: $Id: hxcite.c,v 1.4 2010/11/18 12:41:37 bbos Exp $
 **/

#include <config.h>
#include <stdlib.h>
#include <errno.h>
#include <stdio.h>
#if STDC_HEADERS
# include <string.h>
#else
# ifndef HAVE_STRCHR
#  define strchr index
#  define strrchr rindex
# endif
# ifndef HAVE_STRSTR
#  include "strstr.e"
# endif
#endif

#ifdef HAVE_ERRNO_H
#  include <errno.h>
#endif
#ifdef HAVE_SEARCH_H
#  include <search.h>
#else
#  include "hash.e"
#endif

#include <ctype.h>
#include "export.h"
#include "heap.e"
#include "types.e"
#include "errexit.e"


/* Warning: arbitrary limits! */
#define LINESIZE 32768
#define HASHSIZE 4096				/* Size of hash table */

static string base = "";			/* URL of bibilography */
static string mark = "";			/* Flag after "'[[" */
static size_t marklen = 0;			/* Length of mark */
static string prog;				/* = argv[0] */
static string pattern =
  "<a href=\"%b#%L\" rel=\"biblioentry\">[%L]<!--{{%m%L}}--></a>";
static FILE *aux;
static Boolean skip_comments = False; /* Whether to skip [[ inside <!----> */


/* label_exists -- check if the label exists in the bibliographic database */
static Boolean label_exists(const string label)
{
  ENTRY e = {label, NULL};

  return hsearch(e, FIND) != NULL;
}


/* valid_label -- check if the label is well-formed */
static Boolean valid_label(const string label)
{
  int i;

  for (i = 0; label[i]; i++)
    if (! isalnum(label[i])
	&& label[i] != '-'
	&& label[i] != '_'
	&& label[i] != '.') return False;
  return True;
}


/* expand_ref -- print the reformatted reference */
static void expand_ref(const string label)
{
  int i;

  /* ToDo: somehow allow sequence numbers for references [1], [2], etc. */
  for (i = 0; pattern[i]; i++) {
    if (pattern[i] != '%') {
      putchar(pattern[i]);
    } else {
      switch (pattern[++i]) {
	case '%': putchar('%'); break;		/* Literal '%' */
	case 'b': printf("%s", base); break;	/* Base URL */
	case 'L': printf("%s", label); break;	/* Label */
	case 'm': printf("%s", mark); break;	/* Mark (-m option) */
	default: break;				/* Error in pattern */
      }
    }
  }
}


/* process_line -- look for citations in a line */
EXPORT void process_line(const string text, const string fname, int lineno,
			 Boolean *in_comment)
{
  string h = text, p, q, label;
  char c;

  /* Loop over occurrences of "[[" + mark + label + "]]"
   and "{{" + mark + label + "}}" */

  while (*in_comment ? (p = strpbrk(h, "-{")) : (p = strpbrk(h, "[{<"))) {

    while (h != p) putchar(*(h++));		/* Print text up to here */

    if (strncmp(p, "-->", 3) == 0) {		/* End of comment */
      putchar(*(h++));
      *in_comment = False;
      continue;
    }
    if (strncmp(p, "<!--", 4) == 0) {		/* Begin of comment */
      putchar(*(h++));
      *in_comment = skip_comments;
      continue;
    }
    if (strncmp(p, "{{", 2) && strncmp(p, "[[", 2)) { /* Not {{ or [[ */
      putchar(*(h++));
      continue;
    }

    /* Is there a corresponding closing bracket? */
    if (! (q = strstr(p + 2, *p == '[' ? "]]" : "}}"))) break;

    c = *p;					/* Remember [ or { */

    if (marklen == 0 || strncmp(p + 2, mark, marklen) == 0) {

      p += 2 + marklen;				/* Skip "[["/"{{" + mark */
      label = newnstring(p, q - p);		/* Extract the label */

      if (! valid_label(label)) {		/* Cannot be a label */
	while (h != q) putchar(*(h++));		/* Copy unchanged */
	putchar(*q); putchar(*(q+1));
      } else if (! label_exists(label)) {	/* No citation found: warn */
	while (h != q) putchar(*(h++));		/* Copy unchanged */
	putchar(*q); putchar(*(q+1));
	fprintf(stderr, "%s:%d: warning: no bib entry found for %s\n",
		fname ? fname : (string)"<stdin>", lineno, label);
      } else if (c == '[') {			/* Label found: expand */
	expand_ref(label);			/* Insert full reference */
	fprintf(aux, "%s\n", label);		/* Store label */
      } else {					/* "{{" so don't expand */
	while (h != q) putchar(*(h++));		/* Copy unchanged */
	putchar(*q); putchar(*(q+1));
	fprintf(aux, "%s\n", label);		/* Store label */
      }
      dispose(label);

    } else {					/* No valid mark */

      while (h != q) putchar(*(h++));		/* Copy unchanged */
      putchar(*q); putchar(*(q+1));
    }
    h = q + 2;
  }

  printf("%s", h);				/* Print rest of text */
}


/* parse_db -- extract all labels from the refer-style database */
static void parse_db(const string db)
{
  char line[LINESIZE];
  FILE *f;
  int e, i;
  ENTRY entry;

  if (!(f = fopen(db,"r"))) errexit("%s: %s: %s\n", prog, db, strerror(errno));

  /* Initialize the hash table */
  if (! hcreate(HASHSIZE)) errexit("%s: %s\n", prog, strerror(errno));

  /* Search for %L lines */
  clearerr(f);
  while (fgets(line, sizeof(line), f)) {
    if (strncmp(line, "%L ", 3) == 0) {
      for (i = strlen(line); i > 0 && isspace(line[i-1]); i--) ;
      if (i > 3) {				/* Ignore empty field */
	line[i] = '\0';
	entry.key = newstring(line + 3);
	if (!hsearch(entry, ENTER)) errexit("%s: %s\n", prog, strerror(errno));
      }
    }
  }
  if ((e = ferror(f))) errexit("%s: %s: %s\n", prog, db, strerror(e));

  if (fclose(f) != 0) errexit("%s: %s: %s\n", prog, db, strerror(errno));
}


/* usage -- print usage message and exit */
static void usage(void)
{
  errexit("Version %s\n\
Usage: %s [-b base] [-p pattern] [-a auxfile] [-c] bib-file [HTML-file]\n",
	  VERSION, prog);
}


int main(int argc, char *argv[])
{
  char line[LINESIZE];
  string h, auxfile = NULL, dbfile = NULL, infile = NULL;
  Boolean in_comment = False;
  int i, e, lineno;
  FILE *f;

  /* Parse command line arguments */
  prog = argv[0];
  for (i = 1; i < argc && argv[i][0] == '-' && !eq(argv[i], "--"); i++) {
    switch (argv[i][1]) {
    case 'b': base = argv[++i]; break;		/* Set base of URL */
    case 'p': pattern = argv[++i]; break;	/* Form of expanded ref */
    case 'a': auxfile = argv[++i]; break;	/* Name of auxfile */
    case 'm': mark = argv[++i]; marklen = strlen(mark); break; /* After "[[" */
    case 'c': skip_comments = True; break;	/* Skip [[ in comments */
    default: usage();
    }
  }
  if (i < argc && eq(argv[i], "--")) i++;

  if (i == argc || argc > i + 2) usage();

  dbfile = argv[i++];
  if (i != argc) infile = argv[i++];

  /* Read the labels from the bibliography database */
  parse_db(dbfile);

  /* Construct auxfile */
  if (! auxfile) {
    if (infile) {
      newarray(auxfile, strlen(infile) + 5);
      strcpy(auxfile, infile);
      if ((h = strrchr(auxfile, '.'))) *h = '\0';
      strcat(auxfile, ".aux");
    } else {
      auxfile = "aux.aux";
    }
  }
  if (! (aux = fopen(auxfile, "w")))
    errexit("%s: %s: %s\n", prog, auxfile, strerror(errno));

  /* Open input file or use stdin */
  f = infile ? fopen(infile, "r") : stdin;
  if (!f) errexit("%s: %s: %s\n", prog, infile, strerror(errno));

  /* Read input line by line */
  clearerr(f);
  lineno = 1;
  while (fgets(line, sizeof(line), f))
    process_line(line, infile, lineno++, &in_comment);
  if ((e = ferror(f))) errexit("%s: %s: %s\n", prog, argv[i], strerror(e));

  fclose(f);
  fclose(aux);
  return 0;
}