File: soundex.c

package info (click to toggle)
f-irc 1.36-1
  • links: PTS
  • area: main
  • in suites: buster, jessie, jessie-kfreebsd, stretch
  • size: 632 kB
  • ctags: 904
  • sloc: ansic: 12,538; makefile: 61
file content (111 lines) | stat: -rw-r--r-- 2,487 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
/* GPLv2 applies
 * SVN revision: $Revision: 671 $
 * (C) 2006-2014 by folkert@vanheusden.com
 */
#include <ctype.h>
#include <stdlib.h>
#include <string.h>

#include "gen.h"
#include "utils.h"
#include "string_array.h"

/* Returns the soundex equivalent to in 
 * adapted from http://physics.nist.gov/cuu/Reference/soundex.html
 */
char *soundex(const char *in)
{
	int index_in = 1, index_out = 1;
	char prev_dig, *out = (char *)malloc(5), cur_char;

	memset(out, '0', 4);

	out[0] = tolower(in[0]);
	prev_dig = out[0];

	while(in[index_in] && index_out < 4) 
	{
		switch(tolower(in[index_in++]))
		{
			case 'b' : cur_char = '1'; break;
			case 'p' : cur_char = '1'; break;
			case 'f' : cur_char = '1'; break;
			case 'v' : cur_char = '1'; break;
			case 'c' : cur_char = '2'; break;
			case 's' : cur_char = '2'; break;
			case 'k' : cur_char = '2'; break;
			case 'g' : cur_char = '2'; break;
			case 'j' : cur_char = '2'; break;
			case 'q' : cur_char = '2'; break;
			case 'x' : cur_char = '2'; break;
			case 'z' : cur_char = '2'; break;
			case 'd' : cur_char = '3'; break;
			case 't' : cur_char = '3'; break;
			case 'l' : cur_char = '4'; break;
			case 'm' : cur_char = '5'; break;
			case 'n' : cur_char = '5'; break;
			case 'r' : cur_char = '6'; break;
			default : cur_char = '*';
		}

		if (cur_char != prev_dig && cur_char != '*')
			out[index_out++] = prev_dig = cur_char;
	}

	out[4] = 0x00;
 
	return out;
}

BOOL fuzzy_match(const char *haystackIn, const char *needle, char *bitmap)
{
	const char *needleS = soundex(needle);
	char *haystack = strdup(haystackIn), *search_start = haystack;
	int loop = 0, len = strlen(haystackIn);
	BOOL match = FALSE;
	string_array_t hsS;

	init_string_array(&hsS);

	for(loop=0; loop<len; loop++)
	{
		if (!isalpha(haystack[loop]))
			haystack[loop] = ' ';
	}

	split_string(haystack, " ", TRUE, &hsS);

	for(loop=0; loop<string_array_get_n(&hsS); loop++)
	{
		const char *word_in = string_array_get(&hsS, loop);
		const char *cur = soundex(word_in);

		if (strcmp(cur, needleS) == 0)
		{
			char *found_at = strstr(search_start, word_in);
			int pos = (int)(found_at - haystack), word_len = strlen(word_in);

			match = TRUE;

			if (bitmap != NULL && pos >= 0 && pos <= len - word_len)
				memset(&bitmap[pos], '1', word_len);

			if (!bitmap)
			{
				myfree(cur);
				break;
			}

			search_start = found_at + word_len;
		}

		myfree(cur);
	}

	free_splitted_string(&hsS);
	myfree(needleS);

	free(haystack);

	return match;
}