File: pcre2.c

package info (click to toggle)
sqlite3-pcre2 0.1.1~git20220105-e3e02a4a-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 144 kB
  • sloc: sh: 193; ansic: 134; sql: 103; makefile: 58
file content (159 lines) | stat: -rw-r--r-- 5,451 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
/*
 * Initially written by Alexey Tourbin <at@altlinux.org>.
 *
 * The author has dedicated the code to the public domain.  Anyone is free
 * to copy, modify, publish, use, compile, sell, or distribute the original
 * code, either in source code form or as a compiled binary, for any purpose,
 * commercial or non-commercial, and by any means.
 */
#define PCRE2_CODE_UNIT_WIDTH 8
#include <assert.h>
#include <stdlib.h>
#include <string.h>
#include <pcre2.h>
#include <sqlite3ext.h>

SQLITE_EXTENSION_INIT1

typedef struct {
    char *pattern_str;
    int pattern_len;
    pcre2_code *pattern_code;
} cache_entry;

#ifndef CACHE_SIZE
#define CACHE_SIZE 16
#endif

static
void regexp(sqlite3_context *ctx, int argc, sqlite3_value **argv) {
    const char *pattern_str, *subject_str;
    int pattern_len, subject_len;
    pcre2_code *pattern_code;

    assert(argc == 2);
    /* check null */
    if (sqlite3_value_type(argv[0]) == SQLITE_NULL || sqlite3_value_type(argv[1]) == SQLITE_NULL) {
        return;
    }

    pattern_str = (const char *) sqlite3_value_text(argv[0]);
    if (!pattern_str) {
        sqlite3_result_error(ctx, "no pattern", -1);
        return;
    }
    pattern_len = sqlite3_value_bytes(argv[0]);

    subject_str = (const char *) sqlite3_value_text(argv[1]);
    if (!subject_str) {
        sqlite3_result_error(ctx, "no subject", -1);
        return;
    }
    subject_len = sqlite3_value_bytes(argv[1]);

    /* simple LRU cache */
    {
        int i;
        int found = 0;
        cache_entry *cache = sqlite3_user_data(ctx);

        assert(cache);

        for (i = 0; i < CACHE_SIZE && cache[i].pattern_str; i++)
            if (
                pattern_len == cache[i].pattern_len
                && memcmp(pattern_str, cache[i].pattern_str, pattern_len) == 0
            ) {
                found = 1;
                break;
            }
        if (found) {
            if (i > 0) {
                cache_entry c = cache[i];
                memmove(cache + 1, cache, i * sizeof(cache_entry));
                cache[0] = c;
            }
        } else {
            cache_entry c;
            const char *err;
            int error_code;
            PCRE2_SIZE error_position;
            c.pattern_code = pcre2_compile(
                pattern_str,           /* the pattern */
                pattern_len,           /* the length of the pattern */
                0,                     /* default options */
                &error_code,           /* for error number */
                &error_position,       /* for error offset */
                NULL);                 /* use default compile context */
            if (!c.pattern_code) {
                PCRE2_UCHAR error_buffer[256];
                pcre2_get_error_message(error_code, error_buffer, sizeof(error_buffer));
                char *e2 = sqlite3_mprintf(
                    "Cannot compile pattern \"%s\" at offset %d: %s",
                    pattern_str, (int)error_position, error_buffer);
                sqlite3_result_error(ctx, e2, -1);
                sqlite3_free(e2);
                return;
            }
            c.pattern_str = malloc(pattern_len);
            if (!c.pattern_str) {
                sqlite3_result_error(ctx, "malloc: ENOMEM", -1);
                pcre2_code_free(c.pattern_code);
                return;
            }
            memcpy(c.pattern_str, pattern_str, pattern_len);
            c.pattern_len = pattern_len;
            i = CACHE_SIZE - 1;
            if (cache[i].pattern_str) {
                free(cache[i].pattern_str);
                assert(cache[i].pattern_code);
                pcre2_code_free(cache[i].pattern_code);
            }
            memmove(cache + 1, cache, i * sizeof(cache_entry));
            cache[0] = c;
        }
        pattern_code = cache[0].pattern_code;
    }

    {
        int rc;
        pcre2_match_data *match_data;
        assert(pattern_code);

        match_data = pcre2_match_data_create_from_pattern(pattern_code, NULL);
        rc = pcre2_match(
          pattern_code,         /* the compiled pattern */
          subject_str,          /* the subject string */
          subject_len,          /* the length of the subject */
          0,                    /* start at offset 0 in the subject */
          0,                    /* default options */
          match_data,           /* block for storing the result */
          NULL);                /* use default match context */

        assert(rc != 0);  // because we have not set match_data
        if(rc >= 0) {
          // Normal case because we have not set match_data
          sqlite3_result_int(ctx, 1);
        } else if(rc == PCRE2_ERROR_NOMATCH) {
          sqlite3_result_int(ctx, 0);
        } else { // (rc < 0 and the code is not one of the above)
            PCRE2_UCHAR error_buffer[256];
            pcre2_get_error_message(rc, error_buffer, sizeof(error_buffer));
            sqlite3_result_error(ctx, error_buffer, -1);
            return;
        }
        pcre2_match_data_free(match_data);
        return;
    }
}

int sqlite3_extension_init(sqlite3 *db, char **err, const sqlite3_api_routines *api) {
    SQLITE_EXTENSION_INIT2(api)
    cache_entry *cache = calloc(CACHE_SIZE, sizeof(cache_entry));
    if (!cache) {
        *err = "calloc: ENOMEM";
        return 1;
    }
    sqlite3_create_function(db, "REGEXP", 2, SQLITE_UTF8, cache, regexp, NULL, NULL);
    return 0;
}