1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168
|
/***************************************************************
*
* Fuzzy string searching subroutines
*
* Author: John Rex
* Date: August, 1988
* References: (1) Computer Algorithms, by Sara Baase
* Addison-Wesley, 1988, pp 242-4.
* (2) Hall PAV, Dowling GR: "Approximate string matching",
* ACM Computing Surveys, 12:381-402, 1980.
*
* Verified on:
* Datalite, DeSmet, Ecosoft, Lattice, MetaWare, MSC, Turbo, Watcom
*
* Compile time preprocessor switches:
* DEBUG - if defined, include test driver
*
* Usage:
*
* char *pattern, *text; - search for pattern in text
* int degree; - degree of allowed mismatch
* char *start, *end;
* int howclose;
*
* void App_init(pattern, text, degree); - setup routine
* void App_next(&start, &end, &howclose); - find next match
*
* - searching is done when App_next() returns start==NULL
*
**************************************************************/
#define DEBUG 1
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
/* local, static data */
static char *Text, *Pattern; /* pointers to search strings */
static int Textloc; /* current search position in Text */
static int Plen; /* length of Pattern */
static int Degree; /* max degree of allowed mismatch */
static int *Ldiff, *Rdiff; /* dynamic difference arrays */
static int *Loff, *Roff; /* used to calculate start of match */
void App_init(char *pattern, char *text, int degree)
{
int i;
/* save parameters */
Text = text;
Pattern = pattern;
Degree = degree;
/* initialize */
Plen = strlen(pattern);
Ldiff = (int *) malloc(sizeof(int) * (Plen + 1) * 4);
Rdiff = Ldiff + Plen + 1;
Loff = Rdiff + Plen + 1;
Roff = Loff + Plen + 1;
for (i = 0; i <= Plen; i++)
{
Rdiff[i] = i; /* initial values for right-hand column */
Roff[i] = 1;
}
Textloc = -1; /* current offset into Text */
}
void App_next(char **start, char **end, int *howclose)
{
int *temp, a, b, c, i;
*start = NULL;
while (*start == NULL) /* start computing columns */
{
if (Text[++Textloc] == '\0') /* out of text to search! */
break;
temp = Rdiff; /* move right-hand column to left ... */
Rdiff = Ldiff; /* ... so that we can compute new ... */
Ldiff = temp; /* ... right-hand column */
Rdiff[0] = 0; /* top (boundary) row */
temp = Roff; /* and swap offset arrays, too */
Roff = Loff;
Loff = temp;
Roff[1] = 0;
for (i = 0; i < Plen; i++) /* run through pattern */
{
/* compute a, b, & c as the three adjacent cells ... */
if (Pattern[i] == Text[Textloc])
a = Ldiff[i];
else a = Ldiff[i] + 1;
b = Ldiff[i+1] + 1;
c = Rdiff[i] + 1;
/* ... now pick minimum ... */
if (b < a)
a = b;
if (c < a)
a = c;
/* ... and store */
Rdiff[i+1] = a;
}
/* now update offset array */
/* the values in the offset arrays are added to the
current location to determine the beginning of the
mismatched substring. (see text for details) */
if (Plen > 1) for (i=2; i<=Plen; i++)
{
if (Ldiff[i-1] < Rdiff[i])
Roff[i] = Loff[i-1] - 1;
else if (Rdiff[i-1] < Rdiff[i])
Roff[i] = Roff[i-1];
else if (Ldiff[i] < Rdiff[i])
Roff[i] = Loff[i] - 1;
else /* Ldiff[i-1] == Rdiff[i] */
Roff[i] = Loff[i-1] - 1;
}
/* now, do we have an approximate match? */
if (Rdiff[Plen] <= Degree) /* indeed so! */
{
*end = Text + Textloc;
*start = *end + Roff[Plen];
*howclose = Rdiff[Plen];
}
}
if (start == NULL) /* all done - free dynamic arrays */
free(Ldiff);
}
#ifdef DEBUG
void main(int argc, char **argv)
{
char *begin, *end;
int howclose;
if (argc != 4)
{
puts("Usage is: approx pattern text degree\n");
exit(0);
}
App_init(argv[1], argv[2], atoi(argv[3]));
App_next(&begin, &end, &howclose);
while (begin != NULL)
{
printf("Degree %d: %.*s\n", howclose, end-begin+1, begin);
App_next(&begin, &end, &howclose);
}
}
#endif
|