1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212
|
/*
+----------------------------------------------------------------------+
| PHP version 4.0 |
+----------------------------------------------------------------------+
| Copyright (c) 1997, 1998, 1999, 2000 The PHP Group |
+----------------------------------------------------------------------+
| This source file is subject to version 2.02 of the PHP license, |
| that is bundled with this package in the file LICENSE, and is |
| available at through the world-wide-web at |
| http://www.php.net/license/2_02.txt. |
| If you did not receive a copy of the PHP license and are unable to |
| obtain it through the world-wide-web, please send a note to |
| license@php.net so we can mail you a copy immediately. |
+----------------------------------------------------------------------+
| Author: Hartmut Holzgraefe <hartmut@six.de> |
+----------------------------------------------------------------------+
*/
/* $Id: levenshtein.c,v 1.12 2000/08/21 09:50:52 sas Exp $ */
#include <stdlib.h>
#include <ctype.h>
#include <string.h>
/* #include <unicode/ustring.h> */
/* faster, but obfuscated, all operations have a cost of 1 */
unsigned short int fastest_levdist(const char *s1, const char *s2)
{
char *p1, *p2;
int i, j, n;
int l1 = 0, l2 = 0;
char r[512];
const char *tmp;
/* skip equal start sequence, if any */
while (*s1 == *s2) {
if (!*s1)
break;
s1++;
s2++;
}
/* if we already used up one string, then
the result is the length of the other */
if (*s1 == '\0')
return strlen(s2);
if (*s2 == '\0')
return strlen(s1);
/* length count */
while (*s1++)
l1++;
while (*s2++)
l2++;
/* cut of equal tail sequence, if any */
while (*--s1 == *--s2) {
l1--;
l2--;
}
/* reset pointers, adjust length */
s1 -= l1++;
s2 -= l2++;
/* possible dist to great? */
if (abs(l1 - l2) >= 255)
return -1;
/* swap if l2 longer than l1 */
if (l1 < l2) {
tmp = s1;
s1 = s2;
s2 = tmp;
l1 ^= l2;
l2 ^= l1;
l1 ^= l2;
}
/* fill initial row */
n = 1;
for (i = 0, p1 = r; i < l1; i++, *p1++ = n++, p1++) { /*empty */
}
/* calc. rowwise */
for (j = 1; j < l2; j++) {
/* init pointers and col#0 */
p1 = r + !(j & 1);
p2 = r + (j & 1);
n = *p1 + 1;
*p2++ = n;
p2++;
s2++;
/* foreach column */
for (i = 1; i < l1; i++) {
if (*p1 < n)
n = *p1 + (*(s1 + i) != *(s2)); /* replace cheaper than delete? */
p1++;
if (*++p1 < n)
n = *p1 + 1; /* insert cheaper then replace ? */
*p2++ = n++; /* update field and cost for next col's delete */
p2++;
}
}
/* return result */
return n - 1;
}
unsigned short int weighted_levdist(const char *s1, const char *s2,
const int cost_ins, const int cost_rep,
const int cost_del)
{
int *p1, *p2;
int i, j, n, c;
int l1 = 0, l2 = 0;
int r[512];
const char *tmp;
/* skip equal start sequence, if any */
while (*s1 == *s2) {
if (!*s1)
break;
s1++;
s2++;
}
/* if we already used up one string, then
the result is the length of the other */
if (*s1 == '\0')
return strlen(s2);
if (*s2 == '\0')
return strlen(s1);
/* length count */
while (*s1++)
l1++;
while (*s2++)
l2++;
/* cut of equal tail sequence, if any */
while (*--s1 == *--s2) {
l1--;
l2--;
}
/* reset pointers, adjust length */
s1 -= l1++;
s2 -= l2++;
/* possible dist to great? */
if (abs(l1 - l2) >= 255)
return -1;
/* swap if l2 longer than l1 */
if (l1 < l2) {
tmp = s1;
s1 = s2;
s2 = tmp;
l1 ^= l2;
l2 ^= l1;
l1 ^= l2;
}
if ((l1 == 1) && (l2 == 1)) {
n = cost_del + cost_ins;
return n < cost_rep ? n : cost_rep;
}
/* fill initial row */
n = cost_ins;
for (i = 0, p1 = r; i < l1; i++, *p1++ = n, p1++) {
n += cost_ins;
}
/* calc. rowwise */
for (j = 1; j < l2; j++) {
/* init pointers and col#0 */
p1 = r + !(j & 1);
p2 = r + (j & 1);
n = *p1 + cost_del;
*p2++ = n;
p2++;
s2++;
/* foreach column */
for (i = 1; i < l1; i++) {
c = *p1;
if (*(s1 + i) != *(s2))
c += cost_rep;
if (c < n)
n = c; /* replace cheaper than delete? */
p1++;
c = *++p1 + cost_ins;
if (c < n)
n = c; /* insert cheaper then replace ? */
*p2++ = n; /* update field and cost for next col's delete */
n += cost_del; /* update field and cost for next col's delete */
p2++;
}
}
/* return result */
return n -= cost_del;
}
|