File: Regex.k

package info (click to toggle)
kaya 0.2.0-6
  • links: PTS
  • area: main
  • in suites: etch, etch-m68k
  • size: 3,012 kB
  • ctags: 1,307
  • sloc: cpp: 6,691; haskell: 4,833; sh: 2,868; yacc: 768; makefile: 700; perl: 87
file content (237 lines) | stat: -rw-r--r-- 6,724 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
/** -*-C-*-ish
    Kaya standard library
    Copyright (C) 2004, 2005 Edwin Brady

    This file is distributed under the terms of the GNU Lesser General
    Public Licence. See COPYING for licence.
*/

module Regex;

// note: pcre doesn't necessarily work with wide characters. Results are undefined outside ASCII.

//import Strings;
import Builtins;
import Prelude;

%include "regex_glue.h";
%imported "regex_glue";
%link "pcre";

"Regular expression data."
abstract data Regex = Regex(Ptr pcre);

"Match object.
A match operation returns <em>matches</em> if a match is found. The
<em>matches</em> field is an array of sub matches, where <em>matches[0]</em>
is the entire matched string. <em>before</em> and <em>after</em> are the
strings before and after the matched string."
public data Match = matches([String] matches, String before, String after)
                  | noMatch;

"Flags for regular expression compilation"
public data REFlags = IgnoreCase | Extended | Multiline | Ungreedy;
// _CASELESS, _EXTENDED, _MULTILINE, _UNGREEDY

foreign "regex_glue.o" {
    Ptr re_compile(String pattern, Int flagcode) = re_compile;
    Ptr re_match(Ptr pcre, String str) = re_match;
    Bool matched(Ptr mo) = matched;
    [String] getmatches(Ptr mo) = getmatches;
    String getBefore(Ptr mo) = getBefore;
    String getAfter(Ptr mo) = getAfter;
}

"Compile a regular expression"
public Regex compile(String pattern, [REFlags] fl = createArray(1))
{
    flagcode = 0;
    for flag in fl {
      case flag of {
	IgnoreCase -> flagcode = flagcode | 1;
	| Extended -> flagcode = flagcode | 2;
	| Multiline -> flagcode = flagcode | 4;
	| Ungreedy -> flagcode = flagcode | 8;
      }
    }

    Ptr re = re_compile(pattern,flagcode);
    return Regex(re);
}

"Attempt to match a regular expression.
Returns a match object."
public Match match(Regex x, String str)
{
    mo = re_match(x.pcre, str);
    if (matched(mo)) {
	return matches(getmatches(mo),
		       getBefore(mo),
		       getAfter(mo));
    }
    else {
	return noMatch;
    }
}

"Match a string.
Returns simply whether <em>str</em> matches <em>patt</em>."
public Bool quickMatch(String patt, String str, [REFlags] fl = createArray(1))
{
    re = compile(patt,fl);
    case match(re,str) of {
	matches(ms,b,a) -> return true;
      | noMatch -> return false;
    }
}

"DEPRECATED. See quickMatch."
public Bool quick_match(String patt, String str, [REFlags] fl = createArray(1))
    = quickMatch(patt,str,fl);

"Match a regular expression.
As quickMatch, but matches with a compiled regular expression."
public Bool quickMatchWith(Regex re, String str)
{
    case match(re,str) of {
	matches(ms,b,a) -> return true;
      | noMatch -> return false;
    }
}

"DEPRECATED. See quickMatchWith."
public Bool quick_match_re(Regex re, String str)
    = quickMatchWith(re,str);

public data ReplaceFlags = Global;

"Replace all instances.
Replaces all instances of key with new in the String str"
Void replaceAll(Regex keyre, String new, var String str, [ReplaceFlags] fl = createArray(1), [REFlags] mfl = createArray(1))
{
    newstr = "";
    repeat case match(keyre,str) of {
	    matches(xs,before,str) -> newstr += before + 
		                      (substVars(xs,new));
	    | noMatch() -> newstr += str; 
	                   break; // Stop matching
    }
    str = newstr;
}

"Replace all instances.
Replaces all instances of key with new in the String str"
Void replaceAll(String key, String new, var String str, [ReplaceFlags] fl = createArray(1), [REFlags] mfl = createArray(1))
{
    keyre = compile(key,mfl);
    newstr = "";
    repeat case match(keyre,str) of {
	    matches(xs,before,str) -> newstr += before + 
		                      (substVars(xs,new));
	    | noMatch() -> newstr += str; 
	                   break; // Stop matching
    }
    str = newstr;
}

"Replace a pattern with a string.
Replaces first instance (or all instances if <em>fl</em> contains
<em>Global</em>)
of key with new in the String str. (keyre is a compiled regex)
<em>new</em> may contain back references into the key, $1, $2, etc,
which stand for sub-matches."
public Void replace(Regex keyre, String new, var String str, 
		    [ReplaceFlags] fl = createArray(1), [REFlags] mfl = createArray(1))
{
    if (elem(Global,fl)) {
	replaceAll(keyre,new,str,fl,mfl);
    } 
    else
    {
	mo = match(keyre,str);
	case mo of {
	    matches(xs,before,after) -> str = before + (substVars(xs,new)) 
		                              + after;
	    | noMatch() -> ; // Do nothing
	}
    }
}

"Replace a pattern with a string.
Replaces first instance (or all instances if <em>fl</em> contains
<em>Global</em>)
of key with new in the String str.
<em>new</em> may contain back references into the key, $1, $2, etc,
which stand for sub-matches."
public Void replace(String key, String new, var String str, 
		    [ReplaceFlags] fl = createArray(1), [REFlags] mfl = createArray(1))
{
    if (elem(Global,fl)) {
	replaceAll(key,new,str,fl,mfl);
    } 
    else
    {
	keyre = compile(key,mfl);
	mo = match(keyre,str);
	case mo of {
	    matches(xs,before,after) -> str = before + (substVars(xs,new)) 
		                              + after;
	    | noMatch() -> ; // Do nothing
	}
    }
}

"Replace $1, $2, etc in new with elements of vars.
Helper function for <em>replace</em>; avoid using this, as it will be
made private when that is implemented!"
String substVars([String] vars, var String new)
{
    i=1;
    newstr=copy(new);
    fst = shift(vars);
    for x in vars {
	// FIXME: Make "\$" not do any substitution. Somehow.
	replaceAll("\\$"+String(i),x,newstr);
	i++;
    }
    return newstr;
}

"Search for patterns.
Return the entries in <em>xs</em> which match the given pattern."
public [String] grep(String patt, [String] xs, Bool inverse=false) {
    out = [];
    re = compile(patt);
    for x in xs {
	if (quickMatchWith(re,x)!=inverse) {
	    push(out,x);
	}
    }
    return out;
}

"Split a string into substrings.
Using <em>patt</em> as a delimeter, split <em>str</em> into substrings."
public [String] split(String patt, String str) {
    pattre = compile(patt);
    strs = [];
    repeat case match(pattre,str) of {
	matches(xs,before,str) -> if (before!="") { push(strs,before); }
	| noMatch() -> push(strs,str); 
	               break; // Stop matching
    }
    return strs;
}

"Split a string into substrings.
Using the compiled Regex <em>pattre</em> as a delimeter, split
<em>str</em> into substrings."
public [String] split(Regex pattre, String str) {
    strs = [];
    repeat case match(pattre,str) of {
	matches(xs,before,str) -> if (before!="") { push(strs,before); }
	| noMatch() -> push(strs,str); 
	               break; // Stop matching
    }
    return strs;
}