File: text_search.c

package info (click to toggle)
haskell-hoogle 5.0.17.3%2Bdfsg1-5
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 496 kB
  • sloc: haskell: 2,890; ansic: 110; sh: 33; xml: 20; makefile: 3
file content (133 lines) | stat: -rw-r--r-- 3,686 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
#include <stdlib.h>
#include <string.h>


int min_int(int a, int b){return a > b ? b : a;}
int max_int(int a, int b){return a < b ? b : a;}

int maximum_int(int xs[], int n)
{
    int mx = 0;
    for (int i = 0; i < n; i++)
        mx = max_int(mx, xs[i]);
    return mx;
}


// Given a list of words, separated by 0 and ending with two 0
// Find the character that occurs in most words, and return the count of it
int text_search_bound(char* xs)
{
    int counts[256];         // number of words I've seen each char in
    int poss[256];           // position I saw last char at
    int pos = 0;             // the current position I am at

    for (int i = 0; i < 256; i++)
    {
        counts[i] = 0;
        poss[i] = -1;
    }

    for (char* cs = xs; ; cs++)
    {
        unsigned char c = *cs;
        if (c == 0)
        {
            if (cs[1] == 0) break;
            pos++;
        }
        else if (poss[c] != pos)
        {
            poss[c] = pos;
            counts[c]++;
        }
    }
    return maximum_int(counts, 256);
}


int count_pointers(char** xs)
{
    int i;
    for (i = 0; xs[i] != NULL; i++)
        ;
    return i;
}

int compare_int(const void *a, const void *b)
{
  const int *da = (const int*) a;
  const int *db = (const int*) b;

  return (*da > *db) - (*da < *db);
}


// Taken from http://stackoverflow.com/questions/23999797/implementing-strnstr/24000056#24000056
char* memmem_(char* haystack, size_t hlen, char* needle, size_t nlen)
{
    if (nlen == 0) return haystack; // degenerate edge case
    if (hlen < nlen) return 0; // another degenerate edge case
    char *hlimit = haystack + hlen - nlen + 1;
    while ((haystack = memchr(haystack, needle[0], hlimit-haystack)))
    {
        if (!memcmp(haystack, needle, nlen)) return haystack;
        haystack++;
    }
    return 0;
}


// haystack is \0 (space if leading upper) (lowercase string), ends with two \0
// needles are a NULL terminated list of lowercase strings, with a leading space if upper
int text_search(char* haystack, char** needles, int exact, int* results)
{
    // information about the needles
    int n = count_pointers(needles);
    int uppers[n];        // is this needle uppercase prefix
    int lengths[n];       // length of this needle
    char* strings[n];     // actual text of this needle

    for (int i = 0; i < n; i++)
    {
        uppers[i] = needles[i][0] == ' ';
        strings[i] = uppers[i] ? &needles[i][1] : needles[i];
        lengths[i] = strlen(strings[i]);
    }

    int index = 0;          // Number of \n's I've already seen in haystack
    int found = 0;          // Number I've written to results

    for (index = 0; ; index++)
    {
        int upper = haystack[0] == ' ';
        if (upper) haystack++;
        int length = strlen(haystack);
        if (length == 0) break;
        char* string = haystack;
        haystack = &haystack[length+1];

        int score = 4;
        for (int i = 0; i < n; i++)
        {
            char* pos = strstr(string, strings[i]); // length, strings[i], lengths[i]);
            if (pos == NULL)
            {
                score = -1;
                break;
            }
            else if (pos == string)
            {
                int complete = lengths[i] == length;
                int cased = uppers[i] == upper;
                score = min_int(score, (!complete ? 2 : 0) + (!cased ? 1 : 0));
            }
        }
        if (score >= 0 && (!exact || score == 0))
            results[found++] = (score << 24) | index;
    }
    qsort(results, found, sizeof(int), compare_int);
    for (int i = 0; i < found; i++)
        results[i] = results[i] & 0xffffff;
    return found;
}