File: regexp.c

package info (click to toggle)
uwsgi 2.0.29-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 6,684 kB
  • sloc: ansic: 87,027; python: 7,001; cpp: 1,131; java: 708; perl: 678; sh: 585; ruby: 555; makefile: 148; xml: 130; cs: 121; objc: 37; php: 28; erlang: 20; javascript: 11
file content (178 lines) | stat: -rw-r--r-- 4,386 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
#if defined(UWSGI_PCRE) || defined(UWSGI_PCRE2)
#include "uwsgi.h"

extern struct uwsgi_server uwsgi;

void uwsgi_opt_pcre_jit(char *opt, char *value, void *foobar) {
#if defined(PCRE_STUDY_JIT_COMPILE) && defined(PCRE_CONFIG_JIT)
	int has_jit = 0, ret;
	ret = pcre_config(PCRE_CONFIG_JIT, &has_jit);
	if (ret != 0 || has_jit != 1)
		return;
	uwsgi.pcre_jit = PCRE_STUDY_JIT_COMPILE;
#elif defined(PCRE2_CONFIG_JIT)
	int has_jit = 0, ret;
	ret = pcre2_config(PCRE2_CONFIG_JIT, &has_jit);
	if (ret != 0)
		return;
	uwsgi.pcre_jit = has_jit;
#endif
}

int uwsgi_regexp_build(char *re, uwsgi_pcre ** pattern) {

#ifdef UWSGI_PCRE2
	int errnbr;
	size_t erroff;

	*pattern = pcre2_compile((const unsigned char *) re, PCRE2_ZERO_TERMINATED, 0, &errnbr, &erroff, NULL);
#else
	const char *errstr;
	int erroff;

	*pattern = uwsgi_malloc(sizeof(uwsgi_pcre));
	(*pattern)->p = pcre_compile((const char *) re, 0, &errstr, &erroff, NULL);
#endif
#ifdef UWSGI_PCRE2
	if (!(*pattern)) {
		uwsgi_log("pcre error: code %d at offset %d\n", errnbr, erroff);
#else
	if (!((*pattern)->p)) {
		uwsgi_log("pcre error: %s at offset %d\n", errstr, erroff);
#endif
		return -1;
	}

#ifdef UWSGI_PCRE2
	if (uwsgi.pcre_jit) {
		errnbr = pcre2_jit_compile(*pattern, PCRE2_JIT_COMPLETE);
		if (errnbr) {
			pcre2_code_free(*pattern);
			uwsgi_log("pcre JIT compile error code %d\n", errnbr);
			return -1;
		}
#else
	int opt = uwsgi.pcre_jit;

	(*pattern)->extra = (pcre_extra *) pcre_study((const pcre *) (*pattern)->p, opt, &errstr);
	if ((*pattern)->extra == NULL && errstr != NULL) {
		pcre_free((*pattern)->p);
		free(*pattern);
		uwsgi_log("pcre (study) error: %s\n", errstr);
		return -1;
#endif
	}

	return 0;

}

int uwsgi_regexp_match(uwsgi_pcre *pattern, const char *subject, int length) {
#ifdef UWSGI_PCRE2
	return uwsgi_regexp_match_ovec(pattern, subject, length, NULL, 0);
#else
	return pcre_exec((const pcre *) pattern->p, (const pcre_extra *) pattern->extra, subject, length, 0, 0, NULL, 0);
#endif
}

int uwsgi_regexp_match_ovec(uwsgi_pcre *pattern, const char *subject, int length, int *ovec, int n) {

#ifdef UWSGI_PCRE2
	int rc;
	int i;
	pcre2_match_data *match_data;
	size_t *pcre2_ovec;

	match_data = pcre2_match_data_create_from_pattern(pattern, NULL);
	rc = pcre2_match(pattern, (const unsigned char *)subject, length, 0, 0, match_data, NULL);

	/*
	 * Quoting PCRE{,2} spec, "The first pair of integers, ovector[0]
	 * and ovector[1], identify the portion of the subject string matched
	 * by the entire pattern. The next pair is used for the first capturing
	 * subpattern, and so on." Therefore, the ovector size is the number of
	 * capturing subpatterns (INFO_CAPTURECOUNT), from uwsgi_regexp_ovector(),
	 * as matching pairs, plus room for the first pair.
	 */
	if (n > 0) {
		// copy pcre2 output vector to uwsgi output vector
		pcre2_ovec = pcre2_get_ovector_pointer(match_data);
		for (i=0;i<(n+1)*2;i++) {
			ovec[i] = pcre2_ovec[i];
		}
#else
	if (n > 0) {
		return pcre_exec((const pcre *) pattern->p, (const pcre_extra *) pattern->extra, subject, length, 0, 0, ovec, PCRE_OVECTOR_BYTESIZE(n));
#endif
	}

#ifdef UWSGI_PCRE2
	pcre2_match_data_free(match_data);

	return rc;
#else
	return pcre_exec((const pcre *) pattern->p, (const pcre_extra *) pattern->extra, subject, length, 0, 0, NULL, 0);
#endif
}

int uwsgi_regexp_ovector(const uwsgi_pcre *pattern) {

	int n;
#ifdef UWSGI_PCRE2
	if (pcre2_pattern_info(pattern, PCRE2_INFO_CAPTURECOUNT, &n))
#else
	if (pcre_fullinfo((const pcre *) pattern->p, (const pcre_extra *) pattern->extra, PCRE_INFO_CAPTURECOUNT, &n))
#endif
		return 0;

	return n;
}

char *uwsgi_regexp_apply_ovec(char *src, int src_n, char *dst, int dst_n, int *ovector, int n) {

	int i;
	int dollar = 0;

	size_t dollars = n;

	for(i=0;i<dst_n;i++) {
		if (dst[i] == '$') {
			dollars++;
		}
	}

	char *res = uwsgi_malloc(dst_n + (src_n * dollars) + 1);
	char *ptr = res;

	for (i = 0; i < dst_n; i++) {
		if (dollar) {
			if (isdigit((int) dst[i])) {
				int pos = (dst[i] - 48);
				if (pos <= n) {
					pos = pos * 2;
					memcpy(ptr, src + ovector[pos], ovector[pos + 1] - ovector[pos]);
					ptr += ovector[pos + 1] - ovector[pos];
				}
			}
			else {
				*ptr++ = '$';
				*ptr++ = dst[i];
			}
			dollar = 0;
		}
		else {
			if (dst[i] == '$') {
				dollar = 1;
			}
			else {
				*ptr++ = dst[i];
			}
		}
	}

	*ptr++ = 0;

	return res;
}

#endif