1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155
|
;; Module for regular expression searching/matching in CLISP
;; Bruno Haible 14.4.1995, 18.4.1995 -- 2003
;; Sam Steingold 1999-10-28 -- 2009, 2011, 2017
(defpackage "REGEXP"
(:documentation
"POSIX Regular Expressions - matching, compiling, executing.")
(:use "COMMON-LISP")
(:import-from "SYS" #:text)
(:export #:match #:match-start #:match-end #:match-string #:regexp-quote
#:regexp-matcher
#:regexp-compile #:regexp-exec #:regexp-split #:with-loop-split))
(in-package "REGEXP")
(pushnew "REGEXP" custom:*system-package-list* :test #'string=)
(pushnew :regexp *features*)
(provide "regexp")
(setf (documentation (find-package "REGEXP") 'sys::impnotes) "regexp-mod")
(defstruct (match (:constructor make-match-boa (start end))
(:constructor ))
start end)
;; The following implementation of MATCH compiles the pattern
;; once for every search.
(defun match-once (pattern string &key (start 0) (end nil)
(extended nil) (ignore-case nil)
(newline nil) (nosub nil)
(notbol nil) (noteol nil))
(regexp-exec (regexp-compile pattern :extended extended
:ignore-case ignore-case
:newline newline :nosub nosub)
string :start start :end end :notbol notbol :noteol noteol))
;; The following implementation of MATCH compiles the pattern
;; only once per Lisp session, if it is a literal string.
(defmacro match (pattern string &rest more-forms)
(if (stringp pattern)
`(%MATCH (MATCHER ,pattern) ,string ,@more-forms)
`(MATCH-ONCE ,pattern ,string ,@more-forms)))
(defmacro matcher (pattern)
(declare (string pattern))
`(LOAD-TIME-VALUE (%MATCHER ,pattern)))
(defun %matcher (pattern)
(cons pattern (make-array '(2 2 2 2))))
(defun %match (patternbox string &key (start 0) (end nil)
(extended nil) (ignore-case nil)
(newline nil) (nosub nil)
(notbol nil) (noteol nil))
;; Compile the pattern, if not already done.
(let ((compiled-pattern
(aref (cdr patternbox) (if extended 0 1) (if ignore-case 0 1)
(if newline 0 1) (if nosub 0 1))))
(unless (and compiled-pattern #+ffi (ffi:validp compiled-pattern))
(setq compiled-pattern (regexp-compile (car patternbox)
:extended extended
:ignore-case ignore-case
:newline newline :nosub nosub))
(setf (aref (cdr patternbox) (if extended 0 1) (if ignore-case 0 1)
(if newline 0 1) (if nosub 0 1))
compiled-pattern))
(regexp-exec compiled-pattern string :start start :end end
:notbol notbol :noteol noteol)))
;; Convert a match (of type MATCH) to a substring.
(defun match-string (string match)
(let ((start (match-start match))
(end (match-end match)))
(make-array (- end start)
:element-type 'character
:displaced-to string
:displaced-index-offset start)))
;; Utility function
(defun regexp-quote (string &optional extended)
(let ((qstring (make-array 10 :element-type 'character
:adjustable t :fill-pointer 0)))
(map nil (if extended
(lambda (c)
(case c
((#\$ #\^ #\. #\* #\[ #\] #\\ #\+ #\? #\( #\))
(vector-push-extend #\\ qstring)))
(vector-push-extend c qstring))
(lambda (c)
(case c
((#\$ #\^ #\. #\* #\[ #\] #\\)
(vector-push-extend #\\ qstring)))
(vector-push-extend c qstring)))
string)
qstring))
(defun regexp-split (pattern string &key (start 0) (end nil)
(extended nil) (ignore-case nil)
(newline nil) (nosub nil)
(notbol nil) (noteol nil))
"Split the STRING by the regexp PATTERN.
Return a list of substrings of STRINGS."
(loop
:with compiled =
(if (stringp pattern)
(regexp-compile pattern :extended extended
:ignore-case ignore-case
:newline newline :nosub nosub)
pattern)
:and stop = (or end (length string))
:for match = (regexp-exec compiled string :start start :end end
:notbol notbol :noteol noteol)
:collect
(make-array (- (if match (match-start match) stop) start)
:element-type 'character
:displaced-to string
:displaced-index-offset start)
:while match
:do (let ((new-start (match-end match)))
(when (= start new-start)
(error (TEXT "~S: ~S matches an empty string ~S at ~S:~D")
'regexp-split pattern match string start))
(setq start new-start))))
(defmacro with-loop-split ((var stream pattern
&key (start 0) end
(extended nil) (ignore-case nil)
(newline nil) (nosub nil)
(notbol nil) (noteol nil))
&body forms)
"Read from STREAM one line at a time, binding VAR to the split line.
The line is split with REGEXP-SPLIT using PATTERN."
(ext:with-gensyms ("WLS-" compiled-pattern line nb ne st be en)
`(LOOP
:WITH ,compiled-pattern =
(IF (STRINGP ,pattern)
(REGEXP-COMPILE ,pattern :EXTENDED ,extended
:IGNORE-CASE ,ignore-case
:NEWLINE ,newline :NOSUB ,nosub)
,pattern)
:AND ,ne = ,noteol
:AND ,nb = ,notbol
:AND ,st = ,stream
:AND ,be = ,start
:AND ,en = ,end
:AND ,var
:FOR ,line = (READ-LINE ,st NIL NIL)
:WHILE ,line
:DO (SETQ ,var
(REGEXP-SPLIT ,compiled-pattern ,line :START ,be :END ,en
:NOTBOL ,nb :NOTEOL ,ne))
,@forms)))
(defun regexp-matcher (pattern)
"A valid value for *APROPOS-MATCHER* in the UTF-8 locale."
(let ((compiled (regexp-compile pattern :extended t :ignore-case t)))
(lambda (name) (regexp-exec compiled name :return-type 'boolean))))
|