File: genid.c

package info (click to toggle)
html-xml-utils 6.1-1
  • links: PTS, VCS
  • area: main
  • in suites: wheezy
  • size: 1,620 kB
  • sloc: ansic: 10,027; sh: 2,135; lex: 189; yacc: 125; perl: 123; makefile: 122
file content (121 lines) | stat: -rw-r--r-- 2,957 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
/*
 * Generate unique IDs.
 *
 * Copyright  2000 World Wide Web Consortium
 * See http://www.w3.org/Consortium/Legal/copyright-software
 *
 * Author: Bert Bos <bert@w3.org>
 * Created: 4 August 2000
 * Version: $Id: genid.c,v 1.11 2009/01/19 19:06:22 bbos Exp $
 **/

#include <config.h>
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#ifdef HAVE_ERRNO_H
#  include <errno.h>
#endif
#include <ctype.h>

#ifdef HAVE_ERRNO_H
#  include <errno.h>
#endif
#ifdef HAVE_SEARCH_H
#  include <search.h>
#else
#  include "search-freebsd.h"
#endif

#if STDC_HEADERS
# include <string.h>
#else
# ifndef HAVE_STRCHR
#  define strchr index
#  define strrchr rindex
# endif
# ifndef HAVE_STRSTR
#  include "strstr.e"
# endif
#endif
#include <export.h>
#include "heap.e"
#include "types.e"
#include "tree.e"
#include "errexit.e"


#define MAXIDLEN 45				/* Max len of a generated ID */

typedef int(*compar_fn_t)(const void *, const void *);

static void *idtree = NULL;			/* Sorted tree of IDs */


/* storeID -- remember the existence of an ID (allocates a copy of the ID) */
EXPORT void storeID(conststring id)
{
  /* Case-insensitive: necessary for HTML, only a little wasteful for XML */
  (void) tsearch(newstring(id), &idtree, (compar_fn_t)strcasecmp);
}


/* gen_id_r -- find some text suitable for an ID recursively */
static void gen_id_r(Tree t, string s, int *len, int maxlen)
{
  int i;
  Tree h;

  assert(s);					/* s at least maxlen long */

  /* Loop over children looking for useful text */
  for (h = t->children; h && *len < maxlen - 1; h = h->sister) {
    switch (h->tp) {
      case Text:
	for (i = 0; *len < maxlen - 1 && h->text[i]; i++)
	  if (isalpha(h->text[i])) s[(*len)++] = tolower(h->text[i]);
	  else if (*len == 0) ;			/* Wait for a letter first */
	  else if (h->text[i]=='-' || h->text[i]=='.') s[(*len)++]=h->text[i];
	  else if (isdigit(h->text[i])) s[(*len)++] = h->text[i];
	  else if (isspace(h->text[i]) && s[*len-1] != '-') s[(*len)++]='-';
	break;
      case Element:				/* Recursive */
	gen_id_r(h, s, len, maxlen);
	break;
      default:
	break;
    }
  }
  s[*len] = '\0';
}

/* gen_id -- try some heuristics to generate an ID for element t */
EXPORT string gen_id(Tree t)
{
  string s;
  int len = 0;

  if (! (s = malloc(MAXIDLEN + 1))) errexit("Out of memory\n");

  assert(MAXIDLEN > 4);
  gen_id_r(t, s, &len, MAXIDLEN - 4);
  if (len == 0) {
    s[len++] = 'x';		/* At least one character */
    s[len] = '\0';
  }
  if (tfind(s, &idtree, (compar_fn_t)strcasecmp)) {
    /* No suitable text found or text is already used elsewhere */
    int seqno = 0;
    do {					/* Try adding digits */
      sprintf(s + len, "%d", seqno);
      seqno++;
    } while (seqno != 10000 && tfind(s, &idtree, (compar_fn_t)strcasecmp));
    if (seqno == 10000) {			/* 10000 tried, giving up... */
      free(s);
      return NULL;
    }
  }
  (void) tsearch(s, &idtree, (compar_fn_t)strcasecmp); /* Store it */
  return s;
}