File: regex.c

package info (click to toggle)
hexer 0.1.5-3
links: PTS, VCS
area: main
in suites: squeeze
size: 724 kB
ctags: 967
sloc: ansic: 12,189; makefile: 99
file content (1751 lines) | stat: -rw-r--r-- 50,764 bytes
/* regex.c	8/19/1995
 * Regular expressions, version 0.3
 *
 * This file provides a set of functions for dealing with binary regular
 * expressions, where binary means that some extesions have been made to
 * the standard UN*X-regular expressions making it possible to search for
 * an arbitrary byte pattern.
 *
 * NOTE:  In some esoteric cases the function `regex_match()' does not find
 *   the longest string that matches a given regex.  Example:
 *   the expression
 *     a*\(abcd\|bc\)
 *   doesn't not match the whole string `aaabcd' but only the substring
 *   `aaabc`.  I'm not sure if this is a bug, since `vi' and 'vim' do the
 *   same.
 */

/* Copyright (c) 1995,1996 Sascha Demetrio
 * Copyright (c) 2009 Peter Pentchev
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 *    If you modify any part of HEXER and resitribute it, you must add
 *    a notice to the `README' file and the modified source files containing
 *    information about the  changes you made.  I do not want to take
 *    credit or be blamed for your modifications.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *    If you modify any part of HEXER and resitribute it in binary form,
 *    you must supply a `README' file containing information about the
 *    changes you made.
 * 3. The name of the developer may not be used to endorse or promote
 *    products derived from this software without specific prior written
 *    permission.
 *
 * HEXER WAS DEVELOPED BY SASCHA DEMETRIO.
 * THIS SOFTWARE SHOULD NOT BE CONSIDERED TO BE A COMMERCIAL PRODUCT.
 * THE DEVELOPER URGES THAT USERS WHO REQUIRE A COMMERCIAL PRODUCT
 * NOT MAKE USE OF THIS WORK.
 *
 * DISCLAIMER:
 * THIS SOFTWARE IS PROVIDED BY THE DEVELOPER ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE DEVELOPER BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

#include <stdio.h>
#include <assert.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>

#include "defs.h"

#undef EOF

#include "regex.h"

#define REGEX_MAXMATCH_DFL (1 << 10) /* default value for `rx_maxmatch'. */
#define REGEX_MAX_PARS (64) /* the maximum number of opened parentheses */
#define REGEX_MAX_BRANCH (64) /* the maximum number of branches per command */
#define REGEX_BLOCKSIZE_EXP 12

#define REGEX_BLOCKSIZE (1 << REGEX_BLOCKSIZE_EXP)
#define REGEX_BLOCKMASK (REGEX_BLOCKSIZE - 1)

/* The linear encoding of the RE generated by `regex_compile()' and
 * executed by `regex_match()' is:
 * (long ro)    `ro' is the offset of the replace code of the expression.
 * <search expression>
 *              the search expression may contain any of the commands below;
 *              each expression and subexpression is terminated with an
 *              `EOX' (end of expression) command.
 * (long 0)     a zero long is placed between the search expression and the
 *              replace expression.  there's no technical reason for that,
 *              it just makes the debugging a little bit easier.
 * <replace expression>
 *              the replace expression is made up of `STRING' and
 *              `BACKREF'-commands. it's terminated with an `EOX'-command.
 *              this part may be empty.
 * (long 0)     a trailing 0, just for fun.
 */

enum regex_e {  /* arguments
                 *   description. */

  ANY_CHAR = 1, /*   matches any character. */
  ANY_OF,       /* (char n) <list of `n' characters>
                 *   matches any of the characters in the list.  the list
		 *   has to be sorted. */
  ANY_BUT,      /* (char n) <list of `n' characters>
                 *   matches any but the listed characters.  the list has to
		 *   be sorted. */
  CHAR,         /* (char c)
                 *   matches the character `c'. */
  STRING,       /* (long n) <string of length `n'>
                 *   matches the given string exactly.  the null-character is
		 *   *not* interpreted as the end of the string. */
  PAR_OPEN,     /*   opening parenthesis. */
  PAR_CLOSE,    /* (long slot)
                 *   closing parenthesis.  the input matched since the last
		 *   unmatched opening parenthesis is stored in slot `n'. */
  BACKREF,      /* (long slot)
                 *   matches the string stored in the given slot exactly.
		 *   if the slot is undefined, it matches the empty string. */
  BRANCH,       /* (long n) (long m)
                 *   the following `n' bytes are two (`EOX'-terminated,
		 *   respectively) expressions, the first of which is `m'
		 *   bytes long.  these expressions are read as two alternate
		 *   branches. */
  REPEAT,       /* (long min) (long max) (long n)
                 *   the following `n' bytes are an `EOX'-terminated
		 *   expression E.  the `REPEAT'-expression R matches, if E
		 *   matches at least `min' succsessive strings.  R matches
		 *   at most `max' succsessive E strings.  if `max < 0',
		 *   `max' is set to `rx_maxmatch'. */
  FIXREPEAT,    /* (long min) (long max) (long len) (long n)
                 *   works like `REPEAT', but all matches found have to be
		 *   of the same (fixed) length `len'.  if `max < 0',
		 *   `max' is set to infinity, i.e. the number of matches
		 *   is not limited by `rx_maxmatch'. */
  BEGIN_WORD,   /*   matches the empty string, if it is preceded immediately
                 *   by the beginning of the buffer/file or a
		 *   non-alphanumerical character that is not an underscore. */
  END_WORD,     /*   matches the empty string, if it is followed immediately
                 *   by the end of the buffer/file or a non-alphanumerical
		 *   character that is not an underscore. */
  BEGIN_LINE,   /*   matches the empty string at the beginning of a line. */
  END_LINE,     /*   matches the empty string at the end of a line.
                 *   `\n' and `\r' are treated as end-of-line markers. */
  EOF,          /*   matches the empty string at the end of the file. */
  EOX           /*   end-of-expression.  */
};

#define SIZE_BRANCH 3
#define SIZE_REPEAT 4
#define SIZE_FIXREPEAT 5

enum error_e {
  E_no_error = 0,
  E_invalid_range,
  E_invalid_character,
  E_unmatched_opening_parenthesis,
  E_unmatched_closing_parenthesis,
  E_operator_without_operand,
  E_malformed_operator,
  E_operand_with_variable_length,
  E_unmatched_bracket,
  E_interrupt
};

char *rx_error_msg[] = {
  "", /* no error */ "invalid range", "invalid character", "unmatched `('",
  "unmatched `)'", "operator without operand", "malformed operator",
  "operand of `++' and `**' must have fixed length", "unmatched `['",
  "interrupted", 0
};

int rx_error;
static int interrupt = 0;
volatile int *rx_interrupt = &interrupt;

#undef isoct
#undef ishex
#define isoct(x) ((x) >= '0' && (x) < '8')
#define ishex(x) (isdigit(x) || (tolower(x) >= 'a' && tolower(x) <= 'f'))

#undef u_char
#undef u_short
#undef u_long
#define u_char unsigned char
#define u_short unsigned short
#define u_long unsigned long

static long (*rx_read)(char *target, long count);
  /* pointer to a function used for reading from the buffer/file.
   */
static long (*rx_seek)(long position);
  /* pointer to a function used for setting the pointer in the buffer/file.
   */
static long (*rx_tell)(void);
  /* pointer to a function returning the current position if the buffer/file.
   */
static long rx_size = 0;
  /* `rx_size == 0': the end of the buffer/file has not yet been hit.
   *   that means there are at least `REGEX_BLOCKSIZE' unread characters
   *   left.
   * `rx_size > 0': the end of the buffer/file has been hit.  the size of
   *   the buffer/file is `rx_size'.
   */
static int rx_start;
  /* character that a match has to start with.  if `rx_start == -1', matches
   * may start with different characters.
   */
static long rx_match_skip;
  /* if `regex_match()' (`regex_match_()' actually) matches an expression
   * (or subexpression), `rx_match_skip' is set to the position next
   * to the last character of that match.
   */

int rx_nomagic = 0;
  /* `rx_nomagic == 1': all special characters exept `*' and `[' have to be
   *   prefixed with `\'.
   */
int rx_allmagic = 0;
  /* `rx_allmagic == 1': special characters don't have to be prefixed
   *   with `\'.
   */
long rx_maxmatch = REGEX_MAXMATCH_DFL;
  /* `rx_maxmatch' is the maximum number of matches a repeat operator
   * can match, i.e. the search string `.*' matches at most `rx_maxmatch'
   * characters.
   */

int rx_special_nl = 0;
  /* if `rx_special_nl' is set, the newline character (0xa) is treated as a
   * special cahacter, i.e. the dot and `ANY_BUT'-ranges ("[^...]") do
   * *not* match a newline character.
   */

static char buffer_[3 * REGEX_BLOCKSIZE];
static char *buffer = buffer_ + REGEX_BLOCKSIZE;
static char *bp;
static long buffer_base;

#define REGEX_ADVANCE {                                                       \
  if (bp >= buffer + REGEX_BLOCKSIZE) {                                       \
    long ii;                                                                   \
    rx_memmove(buffer - REGEX_BLOCKSIZE, buffer, 2 * REGEX_BLOCKSIZE);        \
    buffer_base += REGEX_BLOCKSIZE;                                           \
    rx_seek(buffer_base + REGEX_BLOCKSIZE);                                   \
    ii = rx_read(buffer + REGEX_BLOCKSIZE, REGEX_BLOCKSIZE);                   \
    if (ii < REGEX_BLOCKSIZE) {                                                \
      rx_size = buffer_base + REGEX_BLOCKSIZE + ii;                            \
      memset(buffer + REGEX_BLOCKSIZE + ii, 0, REGEX_BLOCKSIZE - ii);           \
    }                                                                         \
    bp -= REGEX_BLOCKSIZE;                                                    \
  } else if (bp < buffer) {                                                   \
    rx_memmove(buffer + REGEX_BLOCKSIZE, buffer, REGEX_BLOCKSIZE);            \
    buffer_base -= REGEX_BLOCKSIZE;                                           \
    if (position >= REGEX_BLOCKSIZE) {                                        \
      rx_seek(buffer_base - REGEX_BLOCKSIZE);                                 \
      rx_read(buffer - REGEX_BLOCKSIZE, 2 * REGEX_BLOCKSIZE);                 \
    } else {                                                                  \
      rx_seek(buffer_base);                                                   \
      rx_read(buffer, REGEX_BLOCKSIZE);                                       \
    }                                                                         \
    bp += REGEX_BLOCKSIZE;                                                    \
  }                                                                           \
}

#if !HAVE_MEMMOVE
  static void
rx_memmove(char *t, const char *s, const long count)
{
  register long i;

  if (t > s)
    for (i = count - 1; i >= 0; --i) t[i] = s[i];
  else
    for (i = 0; i < count; ++i) t[i] = s[i];
}
/* rx_memmove */
#else
#define rx_memmove(a, b, c) (void)memmove(a, b, c)
#endif

  int
regex_init(long (*read)(char *, long), long (*seek)(long), long (*tell)(void))
{
  rx_read = read;
  rx_seek = seek;
  rx_tell = tell;
  return 0;
}
/* regex_init */

  static int
regex_set_position(long position)
  /* NOTE: `bp' and `buffer_base' *must* be initialized before calling
   *   `regex_set_position'.
   */
{
  long i;

  if (position - buffer_base >= 0) {
    if (position - buffer_base > REGEX_BLOCKSIZE) {
      if (position - buffer_base < 2 * REGEX_BLOCKSIZE) {
	bp = buffer + (position & REGEX_BLOCKMASK);
	rx_memmove(buffer - REGEX_BLOCKSIZE, buffer, 2 * REGEX_BLOCKSIZE);
	buffer_base += REGEX_BLOCKSIZE;
	rx_seek(buffer_base);
	i = rx_read(buffer + REGEX_BLOCKSIZE, REGEX_BLOCKSIZE);
	if (i < REGEX_BLOCKSIZE) {
	  rx_size = buffer_base + i;
	  memset(buffer + REGEX_BLOCKSIZE + i, 0, REGEX_BLOCKSIZE - i);
	}
	return 0;
      } else
        goto rebuild_buffer;
    }
    bp = buffer + (position & REGEX_BLOCKMASK);
    return 0;
  }

rebuild_buffer:
  bp = buffer + (position & REGEX_BLOCKMASK);
  buffer_base = position & ~REGEX_BLOCKMASK;
  rx_seek(buffer_base);
  i = rx_read(buffer, 2 * REGEX_BLOCKSIZE);
  if (i < 2 * REGEX_BLOCKSIZE) {
    rx_size = buffer_base + i;
    memset(buffer + i, 0, 2 * REGEX_BLOCKSIZE - i);
  }
  return 0;
}
/* regex_set_position */

#define RX_NSLOTS 256

static long rx_store_begin[RX_NSLOTS];
static long rx_store_end[RX_NSLOTS];
static char *rx_store_match[RX_NSLOTS];
static int rx_store_initialized = 0;

  static int
regex_store(long slot, long begin, long end)
  /* The text between the positions `begin' (incl.) and `end' (excl.) is
   * stored in slot `slot'.  (Actually only the `begin' and `end' positions
   * are stored.)
   */
{
  assert(slot >= 0 && slot < RX_NSLOTS);
  assert(end >= begin);
  if (!rx_store_initialized) {
    memset(rx_store_begin, 0, RX_NSLOTS * sizeof(long));
    memset(rx_store_end, 0, RX_NSLOTS * sizeof(long));
    memset(rx_store_match, 0, RX_NSLOTS * sizeof(char *));
    rx_store_initialized = 1;
  }
  if (rx_store_match[slot]) {
    free((char *)rx_store_match[slot]);
    rx_store_match[slot] = 0;
  }
  rx_store_begin[slot] = begin;
  rx_store_end[slot] = end;
  return 0;
}
/* regex_store */

  static char *
regex_ref(int slot)
  /* The contents of undefined slots default to an empty string, therefore
   * the return value is always a valid (non-zero) pointer.
   */
{
  long position, count;

  assert(slot >= 0 && slot < RX_NSLOTS);
  if (!rx_store_initialized) {
    memset(rx_store_begin, 0, RX_NSLOTS * sizeof(long));
    memset(rx_store_end, 0, RX_NSLOTS * sizeof(long));
    memset(rx_store_match, 0, RX_NSLOTS * sizeof(char *));
    rx_store_initialized = 1;
  }
  position = rx_store_begin[slot];
  count = rx_store_end[slot] - position;
  if (!rx_store_match[slot] && count) {
    rx_store_match[slot] = malloc(count);
    rx_seek(position);
    rx_read(rx_store_match[slot], count);
    return rx_store_match[slot];
  } else if (rx_store_match[slot])
    return rx_store_match[slot];
  else
    return "";
}
/* regex_ref */

  static long
regex_ref_len(int slot)
  /* The length of an undefined slot is 0.
   */
{
  long position, count;

  assert(slot >= 0 && slot < RX_NSLOTS);
  if (!rx_store_initialized) {
    memset(rx_store_begin, 0, RX_NSLOTS * sizeof(long));
    memset(rx_store_end, 0, RX_NSLOTS * sizeof(long));
    memset(rx_store_match, 0, RX_NSLOTS * sizeof(char *));
    rx_store_initialized = 1;
  }
  position = rx_store_begin[slot];
  count = rx_store_end[slot] - position;
  return count;
}
/* regex_ref_len */

  static void
regex_clear(void)
  /* clear all slots.
   */
{
  int i;

  if (!rx_store_initialized) {
    memset(rx_store_begin, 0, RX_NSLOTS * sizeof(long));
    memset(rx_store_end, 0, RX_NSLOTS * sizeof(long));
    memset(rx_store_match, 0, RX_NSLOTS * sizeof(char *));
    rx_store_initialized = 1;
  } else {
    for (i = 0; i < RX_NSLOTS; ++i) {
      if (rx_store_match[i]) {
	free((char *)rx_store_match[i]);
	rx_store_match[i] = 0;
      }
      rx_store_begin[i] = rx_store_end[i] = 0;
    }
  }
}
/* regex_clear */

  int
regex_reset()
{
  rx_error = 0;

  return 0;
}
/* regex_reset */

  long
regex_search(regex, begin, end, start, direction,
             replace_str, replace_len, match_len)
  long *regex;
  long begin;
  long end;
  long start;
  int direction; /* >=0: forward; <0: reverse. */
  char **replace_str;
  long *replace_len;  /* the replace-string for the match is written to
		       * `*replace_str'/`replace_len'.  the memory for
		       * `*replace_str' is allocated by `regex_match()'
		       * via `malloc()'. */
  long *match_len;
{
  long position;
  int i;
  char *bp1;
  long base1;

  direction = direction < 0 ? -1 : 1;
  if (end < 0) end = (u_long)~0 >> 1;
  position = start;

  /* initialize the buffer */
  bp = buffer + (position & REGEX_BLOCKMASK);
  buffer_base = position & ~REGEX_BLOCKMASK;
  rx_seek(buffer_base);
  i = rx_read(buffer, 2 * REGEX_BLOCKSIZE);
  if (i < 2 * REGEX_BLOCKSIZE) {
    rx_size = buffer_base + i;
    memset(buffer + i, 0, 2 * REGEX_BLOCKSIZE - i);
  } else
    rx_size = 0;

  if (rx_size && end > rx_size) end = rx_size;
  if (rx_size && position >= rx_size) position = rx_size - 1;
  while (position < end && position >= begin) {
    if (rx_start >= 0)
      while (*bp != rx_start && position < end) {
	if (*rx_interrupt) {
	  rx_error = (int)E_interrupt;
          *rx_interrupt = 0;
	  regex_clear();
	  return -1;
	}
	bp += direction;
	position += direction;
	REGEX_ADVANCE;
	if (rx_size && end > rx_size) end = rx_size;
      }
    bp1 = bp;
    base1 = buffer_base;
    if (regex_match(regex, position, replace_str, replace_len, match_len))
      return position;
    if (rx_error) return -1;
    if (rx_size && end > rx_size) end = rx_size;
    position += direction;
    if (base1 == buffer_base) {
      bp = bp1 + direction;
      REGEX_ADVANCE;
    } else {
      regex_set_position(position);
      if (rx_size && end > rx_size) end = rx_size;
    }
  }
  return -1;
}
/* regex_search */

  static int
regex_match_(long *regex, long position, int *parstack, int *parsp)
{
  long *pp = regex;
  long p = position;

  enum regex_e opcode;
  long min, max;
  long operand;
  long operand2;
  char *bp1;
  long base1;
  int i;

  int local_parstack[REGEX_MAX_PARS];
  int local_parsp = *parsp;
    /* Stack of unmatched parentheses while running the regex-code.
     */

  for (i = local_parsp - 1; i >= 0; --i) local_parstack[i] = parstack[i];
  if (rx_error) return 0;
  regex_set_position(p);
  for (; !rx_error;) {
    if (*rx_interrupt) {
      rx_error = (int)E_interrupt;
      regex_clear();
      *rx_interrupt = 0;
      return 0;
    }
    if ((opcode = (enum regex_e)*pp++) == EOX) goto match;
    if (rx_size) {
      if (opcode == EOF && p >= rx_size) goto match;
      if (p > rx_size) goto fail;
      if (p == rx_size && opcode != END_WORD && opcode != END_LINE) goto fail;
    }
    assert(local_parsp >= 0);
    switch (opcode) {
      case ANY_CHAR:
        if (rx_special_nl && *bp == '\n') goto fail;
	++p, ++bp;
	break;
      case CHAR:
	if (*bp++ != *pp++) goto fail;
	++p;
	break;
      case STRING:
        operand = *pp++; /* length of the string. */
	if (rx_size && p + operand > rx_size) goto fail;
	/* if (memcmp(bp, pp, operand)) goto fail; */
        for (; operand; --operand) if (*pp++ != *bp++) goto fail;
        /*
	pp += operand;
	bp += operand;
        */
	p += operand;
	break;
      case ANY_OF:
        operand = *pp++; /* number of characters */
	for (i = 0; i < operand && *bp > *pp; ++pp, ++i);
	if (*bp != *pp) goto fail;
	pp += operand - i;
	++p, ++bp;
	break;
      case ANY_BUT:
        operand = *pp++; /* number of characters */
        for (i = 0; i < operand && *bp > *pp; ++pp, ++i);
	if (*bp == *pp) goto fail;
        if (rx_special_nl && *bp == '\n') goto fail;
	pp += operand - i;
	++p, ++bp;
	break;
      case PAR_OPEN:
        local_parstack[local_parsp++] = p;
	break;
      case PAR_CLOSE:
        operand = *pp++; /* number of the slot. */
	regex_store(operand, local_parstack[--local_parsp], p);
	break;
      case BACKREF:
        operand = *pp++; /* number of the slot. */
	if (memcmp(bp, regex_ref(operand), i = regex_ref_len(operand)))
          goto fail;
	bp += i;
	p += i;
	if (i >= REGEX_BLOCKSIZE) regex_set_position(p);
	break;
      case BRANCH:
        operand = *pp++;
        operand2 = *pp++;
	bp1 = bp;
	base1 = buffer_base;
	if (regex_match_(pp, p, local_parstack, &local_parsp)) {
	  /* see if the first branch does the job... */
	  if (regex_match_(pp + operand, rx_match_skip,
                           local_parstack, &local_parsp)) {
	    p = rx_match_skip;
	    goto match;
	      /* NOTE:  this is not really correct, since we might have
	       *   found a longer match if we had taken the other branch,
	       *   but... `vi' does the same. */
	  }
        }
	/* the first branch didn't work, so the second branch has to. */
	pp += operand2;
	if (base1 == buffer_base) bp = bp1; else regex_set_position(p);
	break;
      case FIXREPEAT:
	min = *pp++;
	max = *pp++;
	operand = *pp++;
	operand2 = *pp++;
	for (i = 0; i < min; ++i) {
	  if (!regex_match_(pp, p, local_parstack, &local_parsp)) goto fail;
	  assert(rx_match_skip - p == operand);
	  p = rx_match_skip;
	}
	if (max < 0) max = (long)((u_long)~0 >> 1);
	if (max > min) {
	  long count;
	  long p_rec;
	  for (count = 0, p_rec = p; i < max; ++i) {
	    if (!regex_match_(pp, p, local_parstack, &local_parsp)) break;
	    ++count;
	    assert(rx_match_skip - p == operand);
	    p = rx_match_skip;
	  }
	  pp += operand2;
	  while (count && !rx_error) {
	    if (regex_match_(pp, p, local_parstack, &local_parsp)) {
	      p = rx_match_skip;
	      goto match;
	    }
	    p = p_rec + --count * operand;
	  }
	  regex_set_position(p);
	} else
	  pp += operand2;
	break;
      case REPEAT:
	min = pp[0];
	max = pp[1];
	operand = pp[2];
        pp += 3;
	for (i = 0; i < min; ++i) {
	  if (!regex_match_(pp, p, local_parstack, &local_parsp)) goto fail;
	  p = rx_match_skip;
	  regex_set_position(p);
	}
	if (max < 0) max = rx_maxmatch;
	if (max > min) {
	  long *stack, sp;
	  stack = (long *)malloc((max - min + 1) * sizeof(long));
	  for (sp = 0; max < 0 ? 1 : i < max; ++i) {
	    if (!regex_match_(pp, p, local_parstack, &local_parsp)) break;
	    stack[sp++] = p;
	    p = rx_match_skip;
	  }
	  pp += operand;
	  while (sp && !rx_error) {
	    if (regex_match_(pp, p, local_parstack, &local_parsp)) {
	      p = rx_match_skip;
	      free((char *)stack);
	      goto match;
	    }
	    p = stack[--sp];
	  }
	  regex_set_position(p);
	  free((char *)stack);
	} else
	  pp += operand;
	break;
      case BEGIN_WORD:
        if (!p) break;
        regex_set_position(--p);
	++p, ++bp;
	if (isalnum(bp[-1]) || bp[-1] == '_') goto fail;
        break;
      case END_WORD:
        if (rx_size && p == rx_size) break;
	if (isalnum(bp[0]) || bp[0] == '_') goto fail;
        if (!isalnum(bp[-1]) && bp[-1] != '_') goto fail;
	break;
      case BEGIN_LINE:
        if (!p) break;
	regex_set_position(--p);
	++p, ++bp;
	if (bp[-1] != '\n') goto fail;
	break;
      case END_LINE:
        if (rx_size && p == rx_size) break;
	if (bp[0] != '\n' && bp[0] != '\r') goto fail;
	break;
      case EOF:
        goto fail;
      default:
        abort();
    }
  } /* for */

  goto fail;

match:
  rx_match_skip = p;
  if (parstack) {
    for (i = local_parsp - 1; i >= 0; --i) parstack[i] = local_parstack[i];
    *parsp = local_parsp;
  }
  return 1;

fail:
  return 0;
}
/* regex_match_ */

  int
regex_match(regex, position, replace_str, replace_len, match_len)
  long *regex;        /* pointer to the regex-code to be processed. */
  long position;      /* position in the input stream. */
  char **replace_str;
  long *replace_len;  /* if a match is found at `position',
                       * the replace-string for the match is written to
		       * `*replace_str'/`replace_len'.  the memory for
		       * `*replace_str' is allocated by `regex_match()'
		       * via `malloc()'. */
  long *match_len;    /* if a match is found at `position',
                       * the length of the match is written to
                       * `*match_len'. */
{
  long *pp = regex + *regex;
  int i;
  int zero = 0;

  if (regex_match_(regex + 1, position, 0, &zero)) {
    *match_len = rx_match_skip - position;
    if (!*pp) {
      *replace_str = (char *)malloc(1);
      **replace_str = 0;
      *replace_len = 0;
    } else {
      char *cp;
      long *pp_rec = pp;
      /* store the match to slot 0. */
      regex_store(0, position, rx_match_skip);
      /* check out how long the replace-string will be. */
      for (*replace_len = 0;;) {
	if (*pp == (long)EOX) {
	  ++pp;
	  break;
	}
	switch ((enum regex_e)*pp++) {
	  case STRING:
	    *replace_len += *pp;
	    pp += *pp + 1;
	    break;
	  case CHAR:
	    ++*replace_len;
	    ++pp;
	    break;
	  case BACKREF:
	    *replace_len += regex_ref_len(*pp++);
	    break;
	  default:
	    abort();
	}
      }
      assert(!*pp);
      cp = *replace_str =
        (char *)malloc(*replace_len + !*replace_len);
      for (pp = pp_rec;;) {
	long reflen;
	if (*pp == (long)EOX) break;
	switch ((enum regex_e)*pp++) {
	  case STRING:
            for (i = 0; i < *pp; ++i) cp[i] = pp[i + 1];
	    cp += *pp;
	    pp += *pp + 1;
	    break;
	  case CHAR:
	    *cp++ = *pp++;
	    break;
	  case BACKREF:
	    memcpy(cp, regex_ref(*pp), reflen = regex_ref_len(*pp));
	    cp += reflen;
	    ++pp;
	    break;
	  default:
	    abort();
	}
      }
    }
    return 1;
  }
  return 0;
}
/* regex_match */

  long *
regex_compile(str, replace)
  char *str;
  char *replace;
  /* Compile the regular expression `str' and return a pointer to the
   * compiled regex-code.  If `replace' is non-zero, the string `replace'
   * is interpreted as a replace-string, which may contain `\'-escape
   * sequences including references to parenthesized matches (`\0',
   * `\1', ... `\9'), where `\0' stands for the whole match.
   */
{
# define REGEX_EOF(position) (rx_size ? (position) >= rx_size : 0)
# define EAT_WHITESPACE { while (*cp == ' ' || *cp == '\t') ++cp; }
# define GET_OCT(o, n) {                                                      \
  for ((o) = 0, i = 0; i < (n) && isoct(*cp) && ((o) << 1) >= 0; ++i, ++cp)   \
    (o) *= 8, (o) += *cp - '0'; }
# define GET_HEX(h, n) {                                                      \
  for ((h) = 0, i = 0; i < (n) && ishex(*cp) && ((h) << 1) >= 0; ++i, ++cp)   \
    (h) *= 16, (h) += *cp > '9' ? tolower(*cp) - 'a' + 10 : *cp - '0'; }
# define GET_DEC(d, n) {                                                      \
  for ((d) = 0, i = 0; i < (n) && isdigit(*cp) && ((d) << 1) >= 0; ++i, ++cp) \
    (d) *= 10, (d) += *cp - '0'; }
# define GET_NUMBER(n) {                                                      \
  if (*cp == '0')                                                             \
    if (*++cp == 'x') { ++cp; GET_HEX((n), 32); } else { GET_OCT((n), 32); }  \
  else { GET_DEC((n), 32); } }

  char *cp, *s;

  int pc;
    /* Parentheses counter; number of unmatched parentheses.
     */
  long *par[1024];
    /* Stack of positions of opening parentheses.
     * For `pc > 0', `par[pc]' points to the first regex-command after the
     * `PAR_OPEN'-command that opened the current parentheses-level `pc'.
     * `par[0]' is equal to `regex'.
     */
  long *branch[1024];
    /* Stack of positions of branches.
     * The top element points to the first regex-command in the branch
     * currently generated (in the current parentheses-level `pc').
     */

  /* We need to know the positions of *all* successive `BRANCH'-commands
   * for *all* levels of parentheses.  This means we need a stack of
   * stacks.  The first stack pointer is `pc', the parentheses counter.
   */
  long *branch_n[REGEX_MAX_PARS][REGEX_MAX_BRANCH];
    /* On par-level `pc', `branch_n[pc][x]' points to the `x'th `BRANCH'-
     * command on that level.
     */
  int branch_len[REGEX_MAX_PARS][REGEX_MAX_BRANCH];
    /* On par-level `pc', `branch_len[pc][x]' holds the length of the `x'th
     * `BRANCH'-command on that level.  If `branch_len[pc][x] < 0' the]
     * length of that branch is variable.
     */
  int branch_n_c[REGEX_MAX_PARS];
    /* On par-level `pc' there are `branch_n_c[pc]' `BRANCH'-commands.
     */

# define BRANCH_LEN_INC(x) {                                                  \
    if (branch_len[pc][branch_n_c[pc]] >= 0)                                  \
      branch_len[pc][branch_n_c[pc]] += (x);                                  \
    exp_len = (x);                                                            \
  }

  long ref_len[256];
    /* If we want to know the length of all strings (if unique) matching
     * an expression which contains back-references, we need to store the
     * lengths of all references.
     */
  long *expression;
    /* Pointer to the beginning of the current expression.  This pointer
     * is relevant for processing operators (like `\?', `\*', `\+', ...).
     */
  int exp_len;
    /* length of the current expression.
     */
  int escape;
    /* Set to 1, if the last read character was a '\\', else 0.
     */
  int start;
  int slot;
    /* Number of slots used by pairs of parentheses.  The slot 0 is reserved
     * for the whole expression.
     */
  static long *regex = 0;
  int regex_size = 0;
  const int regex_blocksize = 1 << 16;
  long *pp;
  char escape_char[128];

  int i, j;

  escape_char['a'] = '\a'; escape_char['b'] = '\b'; escape_char['f'] = '\f';
  escape_char['n'] = '\n'; escape_char['r'] = '\r'; escape_char['t'] = '\t';
  escape_char['v'] = '\v';

  /* we don't know yet, how much memory will be needed to store the resulting
   * regex-code, so we allocate `regex_blocksize' bytes in the first place.
   * if it turns out to be insufficient, we simply cancel the compilation,
   * reallocate the double amount of memory and restart from the beginning.
   * the following macro will take care of watching the memory-consumption:
   */
# define CHECK_PP {                                                           \
    if (pp - regex > regex_size * regex_blocksize - 512) goto compile;        \
  }

  if (regex) free((char *)regex);

compile:
  if (regex_size) {
    regex_size <<= 1;
    regex =
      (long *)realloc(regex, regex_size * regex_blocksize * sizeof(long));
  } else
    ++regex_size, regex =
      (long *)malloc(regex_blocksize * sizeof(long));

  par[0] = regex + 1;
  branch[0] = regex + 1;
  branch_n_c[0] = 0;
  branch_len[0][0] = 0;
  pp = regex;
  cp = str;
  expression = 0;
  exp_len = 0;
  escape = 0;
  start = 1;
  pc = 0;
  slot = 1;

  rx_start = -1;
  branch[-1] = regex;

  ++pp; /* leave space for the replace offset */
  while (*cp) {
    switch (*cp++) {
      /* escape character */
      case '\\':
        if (escape) {
	  expression = pp;
	  *pp++ = CHAR;
	  *pp++ = '\\';
	  if (start) rx_start = '\\';
	  BRANCH_LEN_INC(1);
	} else {
	  ++escape;
	  continue;
	}
	break;

      /* simple escapes */
      case 'a': case 'b': case 'f': case 'n': case 'r': case 't': case 'v':
        if (escape) {
	  expression = pp;
	  *pp++ = CHAR;
	  *pp++ = escape_char[(int)cp[-1]];
	  if (start) rx_start = pp[-1];
	  BRANCH_LEN_INC(1);
	} else
	  goto Default;
	break;
	  
      /* octal, hex, decimal escapes */
      case 'o':
        /* The strings `\0', `\1', ... `\9' are interpreted as back
	 * references, therefore octal escape sequences have to be
	 * prefixed with `\o'.
	 */
        if (escape) {
	  int oct;
	  expression = pp;
	  if (*cp == 'o') { /* octal string */
	    long *length;
	    ++cp;
	    *pp++ = STRING;
	    *(length = pp++) = 1;
	    EAT_WHITESPACE;
	    for (;; ++*length) {
	      GET_OCT(oct, 3);
	      if (oct > 255) { rx_error = (int)E_invalid_character; break; }
	      *pp++ = oct;
	      CHECK_PP;
	      if (start) rx_start = pp[-1], start = 0;
	      EAT_WHITESPACE;
	      if (!isoct(*cp)) break;
	    }
	    BRANCH_LEN_INC(*length);
	  } else { /* octal character */
	    GET_OCT(oct, 3);
	    if (oct > 255) { rx_error = (int)E_invalid_character; break; }
	    *pp++ = CHAR;
	    *pp++ = oct;
	    if (start) rx_start = pp[-1];
	    BRANCH_LEN_INC(1);
	  }
	} else
	  goto Default;
	break;
      case 'x':
        if (escape) {
	  int hex;
	  expression = pp;
	  if (*cp == 'x') { /* hex string */
	    long *length;
	    ++cp;
	    *pp++ = STRING;
	    *(length = pp++) = 1;
	    EAT_WHITESPACE;
	    for (;; ++*length) {
	      GET_HEX(hex, 2);
	      *pp++ = hex;
	      CHECK_PP;
	      if (start) rx_start = pp[-1], start = 0;
	      EAT_WHITESPACE;
	      if (!ishex(*cp)) break;
	    }
	    BRANCH_LEN_INC(*length);
	  } else { /* hex character */
	    GET_HEX(hex, 2);
	    *pp++ = (long)CHAR;
	    *pp++ = hex;
	    if (start) rx_start = pp[-1];
	    BRANCH_LEN_INC(1);
	  }
	} else
	  goto Default;
	break;
      case 'd':  /* special: decimal */
        if (escape) {
	  int dec;
	  expression = pp;
	  if (*cp == 'd') { /* decimal string */
	    long *length;
	    ++cp;
	    *pp++ = STRING;
	    *(length = pp++) = 1;
	    EAT_WHITESPACE;
	    for (;; ++*length) {
	      GET_DEC(dec, 3);
	      if (dec > 255) { rx_error = (int)E_invalid_character; break; }
	      CHECK_PP;
	      *pp++ = dec;
	      if (start) rx_start = pp[-1], start = 0;
	      EAT_WHITESPACE;
	      if (!isdigit(*cp)) break;
	    }
	    BRANCH_LEN_INC(*length);
	  } else { /* decimal character */
	    GET_DEC(dec, 3);
	    if (dec > 255) { rx_error = (int)E_invalid_character; break; }
	    *pp++ = CHAR;
	    *pp++ = dec;
	    if (start) rx_start = pp[-1];
	    BRANCH_LEN_INC(1);
	  }
	} else
	  goto Default;
	break;

      /* ranges */
      case '[':
        if ((escape && rx_nomagic) || (!escape && !rx_nomagic)) {
	  u_char any[1024], table[256];
	  int nescape = 0;
	  i = 0;
	  expression = pp;
	  if (*cp == '^')
	    ++cp, *pp++ = ANY_BUT;
	  else
	    *pp++ = ANY_OF;
	  do {
	    switch (*cp++) {
	      case 0:
	        rx_error = (int)E_unmatched_bracket;
		break;
	      case '\\':
	        if (!nescape) { nescape = 1; continue; } else goto Default1;
		break;
	      case '-':
	        if (*cp == ']' || !i)
		  any[i++] = '-';
		else {
		  if (any[i - 1] > *cp) {
		    rx_error = (int)E_invalid_range;
		    break;
		  }
		  for (j = any[i - 1] + 1; j <= *cp; ++j) any[i++] = j;
		  ++cp;
		}
		break;
	      case 'a': case 'b': case 'f': case 'n': case 'r':
	      case 't': case 'v':
		if (nescape)
		  any[i++] = escape_char[(int)cp[-1]];
		else
		  goto Default1;
		break;
	      case '0': case '1': case '2': case '3': case '4':
	      case '5': case '6': case '7':
	        if (nescape) {
		  /* using back references in a range doesn't make sense, so
		   * we treat them as octal characters. */
		  --cp;
		} else
		  goto Default1;
		/* fall through */
	      case 'o':  /* octal character */
	        if (nescape) {
		  int oct;
		  GET_OCT(oct, 3);
		  if (oct > 255) {
                    rx_error = (int)E_invalid_character;
                    break;
                  }
		  any[i++] = oct;
		} else
		  goto Default1;
		break;
	      case 'd':  /* decimal character */
	        if (nescape) {
		  int dec;
		  GET_DEC(dec, 3);
		  if (dec > 255) {
                    rx_error = (int)E_invalid_character;
                    break;
                  }
		  any[i++] = dec;
		} else
		  goto Default1;
		break;
	      case 'x':  /* hex character */
	        if (nescape) {
		  int hex;
		  GET_HEX(hex, 2);
		  any[i++] = hex;
		} else
		  goto Default1;
		break;
	      default: Default1:
	        any[i++] = cp[-1];
	    }
	    if (rx_error) break;
	    nescape = 0;
	  } while (*cp != ']');
	  if (rx_error) break;
	  ++cp;
	  /* sort the list and remove duplicate entries. */
	  memset(table, 0, 256);
	  for (j = 0; j < i; ++j) table[any[j]] = 1;
	  for (i = 0, j = 0; j < 256; ++j) if (table[j]) any[i++] = j;
	  *pp++ = i;
	  for (j = 0; j < i; ++j) *pp++ = any[j];
	  BRANCH_LEN_INC(1);
	} else
	  goto Default;
	break;

      /* parentheses, branches */
      case '(':
        if ((escape && !rx_allmagic) || (!escape && rx_allmagic)) {
	  expression = 0;
	  *pp++ = PAR_OPEN;
	  ++pc;
	  branch[pc] = par[pc] = pp;
	  branch_len[pc][0] = 0;
	  branch_n_c[pc] = 0;
	} else
	  goto Default;
	break;
      case '|':
        if ((escape && !rx_allmagic) || (!escape && rx_allmagic)) {
	  expression = 0;
	  *pp++ = EOX; /* end of branch */
	  /* insert a `BRANCH'-command at `branch[pc]'. */
	  for (i = -1; pp + i >= branch[pc]; --i)
	    pp[i + SIZE_BRANCH] = pp[i];
	  branch[pc][0] = BRANCH;
	  branch[pc][2] = pp - branch[pc];
	  /* store the position of this `BRANCH' command. */
	  branch_n[pc][branch_n_c[pc]++] = branch[pc];
	  branch_len[pc][branch_n_c[pc]] = 0;
	  pp += SIZE_BRANCH;
	  branch[pc] = pp;
	  if (!pc) rx_start = -1;
	} else
	  goto Default;
	break;
      case ')':
        if ((escape && !rx_allmagic) || (!escape && rx_allmagic)) {
	  /* NOTE: if the parenthesized expression is split up into
	   * multiple branches, the last branch is *not* terminated
	   * by an `EOX'. */
	  *pp++ = PAR_CLOSE;
	  *pp++ = slot++;
	  /* first we'll check if all branches have equal length. */
	  j = branch_len[pc][0];
	  if (branch_n_c[pc]) {
	    for (i = 1; i <= branch_n_c[pc]; ++i)
	      if (j != branch_len[pc][i]) { i = 0; break; }
	  } else
	    i = 1;
	  /* fill in the `n'-field (see description of the `BRANCH'-command)
	   * of all `BRANCH'-commands of the current par-level `pc'. */
	  while (branch_n_c[pc]--) {
	    branch_n[pc][branch_n_c[pc]][1] =
	      pp - (branch_n[pc][branch_n_c[pc]] + SIZE_BRANCH + 2);
	  }
	  expression = par[pc] - 1;
	  if (!pc--) {
	    rx_error = (int)E_unmatched_closing_parenthesis;
	    break;
	  }
	  /* if `i' is set, `j' holds the uniqe length of all branches. */
	  if (i) {
	    BRANCH_LEN_INC(j);
	  } else {
	    branch_len[pc][branch_n_c[pc]] = -1;
	    exp_len = -1;
	  }
	  ref_len[slot - 1] = i ? j : -1;
	} else
	  goto Default;
	break;

      /* back references */
      case '0': case '1': case '2': case '3': case '4':
      case '5': case '6': case '7': case '8': case '9':
        if (escape) {
	  expression = pp;
	  *pp++ = BACKREF;
	  *pp++ = cp[-1] - '0';
	  if (ref_len[cp[-1] - '0'] >= 0) {
	    BRANCH_LEN_INC(ref_len[cp[-1] - '0']);
	  } else {
	    branch_len[pc][branch_n_c[pc]] = -1;
	    exp_len = -1;
	  }
	} else
	  goto Default;
        break;
        
      /* repeat operators */
      case '=': /* ... to stay compatible with VIM */
      case '?':
	if ((escape && !rx_allmagic) || (!escape && rx_allmagic)) {
	  if (!expression) {
	    rx_error = (int)E_operator_without_operand;
	    break;
	  }
	  *pp++ = EOX; /* end of expression */
	  /* insert a `REPEAT'-command at `expression'. */
	  for (i = -1; pp + i >= expression; --i)
	    pp[i + SIZE_REPEAT] = pp[i];
	  expression[0] = REPEAT;
	  expression[1] = 0;
	  expression[2] = 1;
	  expression[3] = pp - expression;
	  pp += SIZE_REPEAT;
	  branch_len[pc][branch_n_c[pc]] = -1;
	  expression = 0;
	} else
	  goto Default;
	break;
      case '+':
	if ((escape && !rx_allmagic) || (!escape && rx_allmagic)) {
	  int no_limit = 0;
	  if (!expression) {
	    rx_error = (int)E_operator_without_operand;
	    break;
	  }
	  if (*cp == '+') {
	    if (exp_len < 0) {
	      rx_error = (int)E_operand_with_variable_length;
	      break;
	    }
	    no_limit = 1;
	    ++cp;
	  }
	  *pp++ = EOX; /* end of expression */
	  if (exp_len < 0) {
	    /* insert a `REPEAT'-command at `expression'. */
	    for (i = -1; pp + i >= expression; --i)
	      pp[i + SIZE_REPEAT] = pp[i];
	    expression[0] = REPEAT;
	    expression[1] = 1;
	    expression[2] = -1;
	    expression[3] = pp - expression;
	    pp += SIZE_REPEAT;
	  } else {
	    /* insert a `FIXREPEAT'-command at `expression'. */
	    for (i = -1; pp + i >= expression; --i)
	      pp[i + SIZE_FIXREPEAT] = pp[i];
	    expression[0] = FIXREPEAT;
	    expression[1] = 1;
	    expression[2] = no_limit ? -1 : rx_maxmatch;
	    expression[3] = exp_len;
	    expression[4] = pp - expression;
	    pp += SIZE_FIXREPEAT;
	    exp_len = -1;
	  }
	  branch_len[pc][branch_n_c[pc]] = -1;
	  expression = 0;
	} else
	  goto Default;
	break;
      case '*':
        if ((escape && rx_nomagic) || (!escape && !rx_nomagic)) {
	  int no_limit = 0;
	  if (!expression) {
	    rx_error = (int)E_operator_without_operand;
	    break;
	  }
	  if (*cp == '*') {
	    if (exp_len < 0) {
	      rx_error = (int)E_operand_with_variable_length;
	      break;
	    }
	    no_limit = 1;
	    ++cp;
	  }
	  *pp++ = EOX; /* end of expression */
	  if (exp_len < 0) {
	    /* insert a `REPEAT'-command at `expression'. */
	    for (i = -1; pp + i >= expression; --i)
	      pp[i + SIZE_REPEAT] = pp[i];
	    expression[0] = REPEAT;
	    expression[1] = 0;
	    expression[2] = -1;
	    expression[3] = pp - expression;
	    pp += SIZE_REPEAT;
	  } else {
	    /* insert a `FIXREPEAT'-command at `expression'. */
	    for (i = -1; pp + i >= expression; --i)
	      pp[i + SIZE_FIXREPEAT] = pp[i];
	    expression[0] = FIXREPEAT;
	    expression[1] = 0;
	    expression[2] = no_limit ? -1 : rx_maxmatch;
	    expression[3] = exp_len;
	    expression[4] = pp - expression;
	    pp += SIZE_FIXREPEAT;
	    exp_len = -1;
	  }
	  branch_len[pc][branch_n_c[pc]] = -1;
	  expression = 0;
	} else
	  goto Default;
	break;
      case '{':
	if ((escape && !rx_allmagic) || (!escape && rx_allmagic)) {
	  long min, max;
	  if (!expression) {
	    rx_error = (int)E_operator_without_operand;
	    break;
	  }
	  EAT_WHITESPACE;
	  GET_NUMBER(min);
	  EAT_WHITESPACE;
          if (*cp == '\\') ++cp;
	  if (*cp == '}')
	    max = min;
	  else {
	    if (*cp++ != ',') { rx_error = (int)E_malformed_operator; break; }
	    EAT_WHITESPACE;
	    GET_NUMBER(max);
	    EAT_WHITESPACE;
	  }
          if (*cp == '\\') ++cp;
	  if ((max && max < min) || *cp++ != '}') {
	    rx_error = (int)E_malformed_operator;
	    break;
	  }
	  if (!max) max = exp_len < 0 ? rx_maxmatch : -1;
	  *pp++ = EOX; /* end of expression */
	  if (exp_len < 0) {
	    /* insert a `REPEAT'-command at `expression'. */
	    for (i = -1; pp + i >= expression; --i)
	      pp[i + SIZE_REPEAT] = pp[i];
	    expression[0] = REPEAT;
	    expression[1] = min;
	    expression[2] = max;
	    expression[3] = pp - expression;
	    pp += SIZE_REPEAT;
	  } else {
	    /* insert a `FIXREPEAT'-command at `expression'. */
	    for (i = -1; pp + i >= expression; --i)
	      pp[i + SIZE_FIXREPEAT] = pp[i];
	    expression[0] = FIXREPEAT;
	    expression[1] = min;
	    expression[2] = max;
	    expression[3] = exp_len;
	    expression[4] = pp - expression;
	    pp += SIZE_FIXREPEAT;
	    exp_len = -1;
	  }
	  if (min != max)
	    branch_len[pc][branch_n_c[pc]] = -1;
	  else
	    BRANCH_LEN_INC(min);
	  expression = 0;
	} else
	  goto Default;
	break;

      /* context specifiers */
      case '<':
	if ((escape && !rx_allmagic) || (!escape && rx_allmagic)) {
	  *pp++ = BEGIN_WORD;
	  expression = 0;
	} else
	  goto Default;
	break;
      case '>':
	if ((escape && !rx_allmagic) || (!escape && rx_allmagic)) {
	  *pp++ = END_WORD;
	  expression = 0;
	} else
	  goto Default;
	break;
      case '^':
        if (!escape) {
	  *pp++ = BEGIN_LINE;
	  expression = 0;
	} else
	  goto Default;
	break;
      case '$':
        if (!escape) {
	  *pp++ = END_LINE;
	  expression = 0;
	} else
	  goto Default;
	break;

      /* any character */
      case '.':
        if ((escape && rx_nomagic) || (!escape && !rx_nomagic)) {
	  expression = pp;
	  *pp++ = ANY_CHAR;
	  BRANCH_LEN_INC(1);
	} else
	  goto Default;
	break;
	
      /* single character */
      default: Default:
        expression = pp;
	if (start) rx_start = cp[-1];
        *pp++ = CHAR;
	*pp++ = cp[-1];
	BRANCH_LEN_INC(1);
	break;
    } /* switch */
    start = escape = 0;
    CHECK_PP;
    if (rx_error) break;
  } /* while */

  if (pc) {
    if (!rx_error) rx_error = (int)E_unmatched_opening_parenthesis;
    goto exit_regex_compile;
  }
    
  if (branch_n_c[0]) {
    /* NOTE: if the whole expression is split up into multiple branches, the
     * last branch is *not* terminated by an `EOX'.
     */
    /* fill in the `n'-field (see description of the `BRANCH'-command)
     * of all `BRANCH'-commands. */
    while (branch_n_c[0]--) {
      branch_n[pc][branch_n_c[0]][1] =
	pp - (branch_n[pc][branch_n_c[0]] + SIZE_BRANCH);
    }
  }
  *pp++ = EOX;
  *pp++ = 0;

  *regex = pp - regex; /* set the replace offset */
  if (!replace) {
    *pp++ = 0;
    goto exit_regex_compile;
  }
  /* compile the `replace'-string:  we only need two commands for the
   * replace code:  `STRING' and `BACKREF'.
   */
  cp = replace;
  escape = 0;
  s = (char *)malloc(strlen(replace) + 1);
  j = 0;
  while (*cp) {
    switch (*cp++) {
      /* escape character */
      case '\\':
        if (!escape) {
	  ++escape;
	  continue;
	} else
	  goto Default2;
	break;

      /* simple escapes */
      case 'a': case 'b': case 'f': case 'n': case 'r': case 't': case 'v':
        if (escape) s[j++] = escape_char[(int)cp[-1]]; else goto Default2;
	break;
	  
      /* octal, hex, decimal escapes */
      case 'o':
        /* The strings `\0', `\1', ... `\9' are interpreted as back
	 * references, therefore octal escape sequences have to be
	 * prefixed with `\o'.
	 */
        if (escape) {
	  int oct;
	  if (*cp == 'o') { /* octal string */
	    ++cp;
	    EAT_WHITESPACE;
	    for (;;) {
	      GET_OCT(oct, 3);
	      if (oct > 255) { rx_error = (int)E_invalid_character; break; }
	      s[j++] = (char)oct;
	      EAT_WHITESPACE;
	      if (!isoct(*cp)) { ++cp; break; }
	    }
	  } else { /* octal character */
	    GET_OCT(oct, 3);
	    if (oct > 255) { rx_error = (int)E_invalid_character; break; }
	    s[j++] = (char)oct;
	  }
	} else
	  goto Default2;
	break;
      case 'x':
        if (escape) {
	  int hex;
	  if (*cp == 'x') { /* hex string */
	    ++cp;
	    EAT_WHITESPACE;
	    for (;;) {
	      GET_HEX(hex, 2);
	      s[j++] = (char)hex;
	      EAT_WHITESPACE;
	      if (!ishex(*cp)) { ++cp; break; }
	    }
	  } else { /* hex character */
	    GET_HEX(hex, 2);
	    s[j++] = (char)hex;
	  }
	} else
	  goto Default2;
	break;
      case 'd':
        if (escape) {
	  int dec;
	  if (*cp == 'd') { /* decimal string */
	    ++cp;
	    EAT_WHITESPACE;
	    for (;;) {
	      GET_DEC(dec, 3);
	      if (dec > 255) { rx_error = (int)E_invalid_character; break; }
	      s[j++] = (char)dec;
	      EAT_WHITESPACE;
	      if (!isdigit(*cp)) { ++cp; break; }
	    }
	  } else { /* decimal character */
	    GET_DEC(dec, 3);
	    if (dec > 255) { rx_error = (int)E_invalid_character; break; }
	    s[j++] = (char)dec;
	  }
	} else
	  goto Default2;
	break;

      /* back references */
      case '0': case '1': case '2': case '3': case '4':
      case '5': case '6': case '7': case '8': case '9':
        if (escape) {
	  if (j) {
	    if (pp - regex + j > regex_size * regex_blocksize - 64)
	      goto compile;
	      /* it is sufficient lo leave 64 bytes of extra space here,
	       * because there won't be any `ANY_OF' of `ANY_BUT' commands
	       * in the replace-code. */
	    *pp++ = STRING;
	    *pp++ = j;
	    for (i = 0; i < j; ++i) *pp++ = s[i];
	    j = 0;
	  }
	  *pp++ = BACKREF;
	  *pp++ = cp[-1] - '0';
	} else
	  goto Default2;
	break;

      default: Default2:
        s[j++] = cp[-1];
	break;
    } /* switch */
    escape = 0;
    if (rx_error) break;
    CHECK_PP;
  } /* while */
  if (j) {
    if (pp - regex + j > regex_size * regex_blocksize - 64) goto compile;
    *pp++ = STRING;
    *pp++ = j;
    for (i = 0; i < j; ++i) *pp++ = s[i];
  }
  *pp++ = EOX;
  *pp = 0;

exit_regex_compile:
  return rx_error ? 0 : regex;
}
/* regex_compile */

#if 0
  static void
regex_list_(regex, count, indent, base)
  unsigned long *regex;
  int count;
  int indent;
  int base;
{
  unsigned long *pp = regex;
  int j;

  for (; pp - regex < count;) {
    printf("%3i : ", (pp - regex) + base);
    for (j = 0; j < indent; ++j) putchar(' ');
    switch (*pp++) {
    case ANY_CHAR:
      printf("ANY_CHAR\n");
      break;
    case ANY_OF: 
      printf("ANY_OF <%lu> [", *pp);
      for (j = 0; j < *pp; ++j)
        if (isprint(pp[j + 1]))
          printf("%lu", pp[j + 1]);
        else
          printf("\\x%02lx", pp[j + 1]);
      printf("]\n");
      pp += *pp + 1;
      break;
    case ANY_BUT:
      printf("ANY_BUT <%lu> [", *pp);
      for (j = 0; j < *pp; ++j)
        if (isprint(pp[j + 1]))
          printf("%lu", pp[j + 1]);
        else
          printf("\\x%02lx", pp[j + 1]);
      printf("]\n");
      pp += *pp + 1;
      break;
    case CHAR:
      printf("CHAR ");
      if (isprint(*pp))
        printf("'%lu'\n", *pp);
      else
        printf("'\\x%02lx'\n", *pp);
      ++pp;
      break;
    case STRING:
      printf("STRING <%lu> \"", *pp);
      for (j = 0; j < *pp; ++j)
        if (isprint(pp[j + 1]))
          printf("%lu", pp[j + 1]);
        else
          printf("\\x%02lx", pp[j + 1]);
      pp += *pp + 1;
      printf("\"\n");
      break;
    case PAR_OPEN:
      printf("PAR_OPEN\n");
      break;
    case PAR_CLOSE:
      printf("PAR_CLOSE <slot=%lu>\n", *pp);
      ++pp;
      break;
    case BACKREF:
      printf("BACKREF <slot=%lu>\n", *pp);
      ++pp;
      break;
    case BRANCH:
      printf("BRANCH <%lu> <%lu>\n", pp[0], pp[1]);
      regex_list_(pp + 2, pp[1], indent + 2, base + (pp - regex) + 2);
      regex_list_(pp + 2 + pp[1], pp[0] - pp[1], indent + 2,
                  base + (pp - regex) + 2 + pp[1]);
      pp += 2 + pp[0];
      break;
    case REPEAT:
      printf("REPEAT <min=%lu> <max=%lu> <%lu>\n", pp[0], pp[1], pp[2]);
      regex_list_(pp + 3, pp[2], indent + 2, base + (pp - regex) + 3);
      pp += 3 + pp[2];
      break;
    case FIXREPEAT:
      printf("FIXREPEAT <min=%lu> <max=%lu> <len=%lu> <%lu>\n",
             pp[0], pp[1], pp[2], pp[3]);
      regex_list_(pp + 4, pp[3], indent + 2, base + (pp - regex) + 4);
      pp += 4 + pp[3];
      break;
    case BEGIN_WORD:
      printf("BEGIN_WORD\n");
      break;
    case END_WORD:
      printf("END_WORD\n");
      break;
    case BEGIN_LINE:
      printf("BEGIN_LINE\n");
      break;
    case END_LINE:
      printf("END_LINE\n");
      break;
    case EOF:
      printf("EOF\n");
      break;
    case EOX:
      printf("EOX\n");
      break;
    case 0:
      printf("--\n");
      return;
    default:
      printf("illegal opcode 0x%lx\n", pp[-1]);
      return;
    }
  }
}
/* regex_list_ */
#endif

/* end of regex.c */


/* VIM configuration: (do not delete this line)
 *
 * vim:bk:nodg:efm=%f\:%l\:%m:hid:icon:
 * vim:sw=2:sm:textwidth=79:ul=1024:wrap:
 */