File: regexp.wrp

package info (click to toggle)
xlispstat 3.52.0-3
  • links: PTS
  • area: main
  • in suites: hamm, slink
  • size: 7,472 kB
  • ctags: 12,480
  • sloc: ansic: 89,534; lisp: 21,690; sh: 1,525; makefile: 520; csh: 1
file content (138 lines) | stat: -rw-r--r-- 4,516 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
(provide "regexp")

(defpackage "REGULAR-EXPRESSIONS"
  (:use "COMMON-LISP")
  (:nicknames "REGEXP"))

(in-package "REGEXP")

(export '(REG_EXTENDED REG_NEWLINE REG_NOSUB REG_ICASE REG_NOTBOL REG_NOTEOL
          regcomp regexec
          regexp regsub url-decode))


;;;;
;;;; Lower-Level Component
;;;;

(wrap:c-lines "#include <sys/types.h>"
              "#include <regex.h>")

(wrap:c-constant REG_EXTENDED "REG_EXTENDED" :integer)
(wrap:c-constant REG_NEWLINE "REG_NEWLINE" :integer)
(wrap:c-constant REG_NOSUB "REG_NOSUB" :integer)
(wrap:c-constant REG_ICASE "REG_ICASE" :integer)
(wrap:c-constant REG_NOTBOL "REG_NOTBOL" :integer)
(wrap:c-constant REG_NOTEOL "REG_NOTEOL" :integer)
(wrap:c-constant REG_NOMATCH "REG_NOMATCH" :integer)

(wrap:c-pointer "regex_t"
           (:make make-regex-t)
           (:get re-nsub "re_nsub" :integer))

(wrap:c-pointer "regmatch_t"
           (:make make-regmatch-t)
           (:get rm-so "rm_so" :integer)
           (:get rm-eo "rm_eo" :integer))

(wrap:c-function base-regcomp "regcomp"
  ((:cptr "regex_t") :string :integer) :integer)

(wrap:c-function base-regexec "regexec"
  ((:cptr "regex_t") :string :integer (:cptr "regmatch_t") :integer) :integer)

(wrap:c-function regfree "regfree" ((:cptr "regex_t")) :void)

(wrap:c-function regerror "regerror"
  (:integer (:cptr "regex_t") :string :integer) :integer)

(defstruct (regexp (:print-function print-regexp)
                   (:constructor (make-regexp (ptr pat))))
  ptr pat)

(defun print-regexp (x s d)
  (format s "#<Regular Expression ~s>" (regexp-pat x)))

(defun regexp-free (x)
  (without-interrupts
   (let ((rex (regexp-ptr x)))
     (unless (null rex)
             (setf (regexp-ptr x) nil)
             (regfree rex)))))

(defun raise-regex-error (fun result rex)
  (let ((size (regerror result rex "" 0))
        (buf (make-string size)))
    (regerror result rex buf size)
    (error "~a in ~a" (subseq buf 0 (- size 1)) fun)))

(defun regcomp (pat flags)
  (system:without-interrupts
    (let* ((rex (make-regex-t))
           (result (base-regcomp rex pat flags))
           (value (make-regexp rex pat)))
      (unless (= result 0)
              (raise-regex-error "regcomp" result rex))
      (system:register-finalizer value #'regexp-free)
      value)))

(defun regexec (r str flags)
  (let* ((rex (regexp-ptr r))
         (nmatch (+ (re-nsub rex) 1))
         (rm (make-regmatch-t nmatch))
         (result (base-regexec rex str nmatch rm flags)))
    (cond
      ((= result 0) (let ((val nil))
                      (dotimes (i nmatch (nreverse val))
                        (let ((so (rm-so rm i))
                              (eo (rm-eo rm i)))
                          (push (if (or (= so -1) (= eo -1))
                                    nil
                                  (cons so eo))
                                val)))))
      ((= result REG_NOMATCH) nil)
      (t (raise-regex-error "regexec" result rex)))))


;;;;
;;;; Higher-Level Component
;;;;

(defun get-substrings (str val)
  (mapcar #'(lambda (x) (if x (subseq str (car x) (cdr x)) nil))
          val))

(defun regexp (pat str &key ignore-case (extended t) index-only)
  (let* ((rex (let ((icase (if ignore-case REG_ICASE 0))
                    (ext (if extended REG_EXTENDED 0)))
                (regcomp pat (logior icase ext))))
         (pairs (regexec rex str 0)))
    (if index-only pairs (get-substrings str pairs))))

(defun regsub (pat str sub &key ignore-case (extended t) all)
  (let ((rex (let ((icase (if ignore-case REG_ICASE 0))
                   (ext (if extended REG_EXTENDED 0)))
               (regcomp pat (logior icase ext))))
        (head "")
        (tail str))
    (loop
     (let ((val (regexec rex tail 0)))
       (unless val (return (concatenate 'string head tail)))
       (let* ((sval (if (stringp sub)
                        sub
                      (apply sub (get-substrings tail val))))
              (start (car (first val)))
              (end (cdr (first val)))
              (tail-head (subseq tail 0 start)))
         (setf head (concatenate 'string head tail-head sval))
         (setf tail (subseq tail end))))
     (unless all (return (concatenate 'string head tail))))))

(defun url-decode (url)
  (flet ((parse-hex (s)
           (let ((*read-base* 16))
             (read-from-string s))))
    (regsub "%([0-9a-hA-H][0-9a-hA-H])"
            (regsub "\\+" url " " :all t)
            #'(lambda (a b) (string (int-char (parse-hex b))))
            :all t)))