File: regex.m2

package info (click to toggle)
macaulay2 1.21%2Bds-3
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 133,096 kB
  • sloc: cpp: 110,377; ansic: 16,306; javascript: 4,193; makefile: 3,821; sh: 3,580; lisp: 764; yacc: 590; xml: 177; python: 140; perl: 114; lex: 65; awk: 3
file content (119 lines) | stat: -rw-r--r-- 5,616 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
-* Copyright 2020 by Mahrud Sayrafi *-

-- See RegexFlags defined in Macaulay2/d/regex.dd for a list of available flags.
-- More flags can be added there.

needs "methods.m2"

regexSpecialChars = concatenate(
    "([", apply({"\\", "^", "$", ".", "|", "?", "*", "+", "(", ")", "[", "]", "{", "}"}, c -> "\\" | c), "])")

-----------------------------------------------------------------------------
-- Local utilities
-----------------------------------------------------------------------------

setRegexFlags = opts -> (
    if instance(opts, ZZ) then return opts;
    if opts.?POSIX and instance(opts.POSIX, Boolean) then if opts.POSIX
    then RegexFlags#"extended"  - (RegexFlags#"no_bk_refs" | RegexFlags#"no_escape_in_lists")
    else RegexFlags#"ECMAScript" | RegexFlags#"no_mod_s"
    else error "regex: expected true or false for option POSIX => ...")

setMatchFlags = opts -> (
    if instance(opts, ZZ) then return opts;
    if opts.?POSIX and instance(opts.POSIX, Boolean) then if opts.POSIX
    then RegexFlags#"format_sed" | RegexFlags#"match_not_dot_newline"
    else RegexFlags#"format_perl"
    else error "regex: expected true or false for option POSIX => ...")

-----------------------------------------------------------------------------
-- regex
-----------------------------------------------------------------------------

regex' = regex
regex = method(TypicalValue => List, Options => {POSIX => false})
regex(String,         String) := opts -> (re,              str) -> regex(re, 0,    length str, str, opts)
regex(String, ZZ,     String) := opts -> (re, head,        str) -> regex(re, head, length str, str, opts)
regex(String, ZZ, ZZ, String) := opts -> (re, head, range, str) -> (
    tail := length str;
    (regexFlags, matchFlags) := (setRegexFlags opts, setMatchFlags opts);
    if head + range >= tail then return regex'(re, head, tail, str, regexFlags, matchFlags);
    -- When head + range != tail, this is backwards compatible with GNU regex in Extended POSIX flavor;
    -- however, the lookbehind feature of Perl flavor doesn't work in this case.
    matchFlags = matchFlags | (if head + range != tail then RegexFlags#"match_continuous" else 0);
    if range >= 0
    then for lead from 0 to range when head + lead <= tail do (
	ret := regex'(re, head + lead, tail, str, regexFlags, matchFlags);
	if ret =!= null then return ret)
    else for lead from 0 to -range when head - lead >= 0 do (
	ret := regex'(re, head - lead, tail, str, regexFlags, matchFlags);
	if ret =!= null then return ret))
protect symbol regex

-----------------------------------------------------------------------------
-- separate
-----------------------------------------------------------------------------

separate' = separate
separate = method(TypicalValue => List, Options => options regex)
separate(            String) := opts -> (       str) -> separate("\r?\n", str, opts)
separate(String,     String) := opts -> (re,    str) -> (
    regexFlags := if length re == 1 and match(regexSpecialChars, re) then (
	stderr << "warning: unescaped special character '" << re << "' found (and escaped) in call to 'separate'" << endl;
	RegexFlags#"literal") else setRegexFlags opts;
    separate'(re, str, regexFlags, setMatchFlags opts))
separate(String, ZZ, String) := opts -> (re, n, str) -> (
    (offset, tail) := (0, length str);
    while offset <= tail list (
	m := regex(re, offset, tail, str, opts);
	if m#?n
	then first (substring(str, offset, m#n#0 - offset), offset = m#n#0 + max(1, m#n#1))
	else first (substring(str, offset), offset = tail + 1)))
protect symbol separate

-- Deprecated
separateRegexp = separate

-----------------------------------------------------------------------------
-- select
-----------------------------------------------------------------------------

select(String,         String) := List => options regex >> opts ->
    (re,       str) -> select'(re, "$&", str, setRegexFlags opts, setMatchFlags opts)
select(String, String, String) := List => options regex >> opts ->
    (re, form, str) -> select'(re, form, str, setRegexFlags opts, setMatchFlags opts)
protect symbol select

-----------------------------------------------------------------------------
-- match
-----------------------------------------------------------------------------

lastMatch = null
match = method(TypicalValue => Boolean, Options => options regex ++ {Strategy => any})
match(List,   String) := opts -> (rs, str) -> (
    if member(opts.Strategy, {any, all}) then (opts.Strategy)(rs, re -> match(re, str, opts))
    else error concatenate("unknown quantifier for match: ", toString opts.Strategy))
match(String, String) := opts -> (re, str) ->
    null =!= (lastMatch = regex'(re, 0, length str, str, setRegexFlags opts, setMatchFlags opts))

-----------------------------------------------------------------------------
-- replace
-----------------------------------------------------------------------------

-- previously in methods.m2
replace' = replace
replace = method(Options => true)
replace(String, String, String) := String => options regex >>
    opts -> (re, s, r) -> replace'(re, s, r, setRegexFlags opts, setMatchFlags opts)
protect symbol replace

-- previously in html0.m2
toLower = s -> replace("(\\w+)", "\\L$1", s)
toUpper = s -> replace("(\\w+)", "\\U$1", s)

-----------------------------------------------------------------------------
-- regexQuote
-----------------------------------------------------------------------------

regexQuote = method(Dispatch => Thing, TypicalValue => String)
regexQuote String := s -> replace(regexSpecialChars, "\\\\$1", s)