1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146
|
;;; "MISCIO" Search for string from port.
; Written 1995, 1996 by Oleg Kiselyov (oleg@ponder.csci.unt.edu)
; Modified 1996, 1997, 1998 by A. Jaffer (jaffer@ai.mit.edu)
;
; This code is in the public domain.
;;; Return the index of the first occurence of a-char in str, or #f
(define (strsrch:string-index str a-char)
(let loop ((pos 0))
(cond
;; whole string has been searched, in vain
((>= pos (string-length str)) #f)
((char=? a-char (string-ref str pos)) pos)
(else (loop (+ 1 pos))))))
(define (string-index-ci str a-char)
(let loop ((pos 0))
(cond
;; whole string has been searched, in vain
((>= pos (string-length str)) #f)
((char-ci=? a-char (string-ref str pos)) pos)
(else (loop (+ 1 pos))))))
(define (string-reverse-index str a-char)
(let loop ((pos (- (string-length str) 1)))
(cond ((< pos 0) #f)
((char=? (string-ref str pos) a-char) pos)
(else (loop (- pos 1))))))
(define (string-reverse-index-ci str a-char)
(let loop ((pos (- (string-length str) 1)))
(cond ((< pos 0) #f)
((char-ci=? (string-ref str pos) a-char) pos)
(else (loop (- pos 1))))))
(define (miscio:substring? pattern str char=?)
(let* ((pat-len (string-length pattern))
(search-span (- (string-length str) pat-len))
(c1 (if (zero? pat-len) #f (string-ref pattern 0)))
(c2 (if (<= pat-len 1) #f (string-ref pattern 1))))
(cond
((not c1) 0) ; empty pattern, matches upfront
((not c2) (strsrch:string-index str c1)) ; one-char pattern
(else ; matching pattern of > two chars
(let outer ((pos 0))
(cond
((> pos search-span) #f) ; nothing was found thru the whole str
((not (char=? c1 (string-ref str pos)))
(outer (+ 1 pos))) ; keep looking for the right beginning
((not (char=? c2 (string-ref str (+ 1 pos))))
(outer (+ 1 pos))) ; could've done pos+2 if c1 == c2....
(else ; two char matched: high probability
; the rest will match too
(let inner ((i-pat 2) (i-str (+ 2 pos)))
(if (>= i-pat pat-len) pos ; the whole pattern matched
(if (char=? (string-ref pattern i-pat)
(string-ref str i-str))
(inner (+ 1 i-pat) (+ 1 i-str))
;; mismatch after partial match
(outer (+ 1 pos))))))))))))
(define (substring? pattern str) (miscio:substring? pattern str char=?))
(define (substring-ci? pattern str) (miscio:substring? pattern str char-ci=?))
(define (find-string-from-port? str <input-port> . max-no-char)
(set! max-no-char (if (null? max-no-char) #f (car max-no-char)))
(letrec
((no-chars-read 0)
(peeked? #f)
(my-peek-char ; Return a peeked char or #f
(lambda () (and (or (not (number? max-no-char))
(< no-chars-read max-no-char))
(let ((c (peek-char <input-port>)))
(cond (peeked? c)
((eof-object? c) #f)
((procedure? max-no-char)
(set! peeked? #t)
(if (max-no-char c) #f c))
((eqv? max-no-char c) #f)
(else c))))))
(next-char (lambda () (set! peeked? #f) (read-char <input-port>)
(set! no-chars-read (+ 1 no-chars-read))))
(match-1st-char ; of the string str
(lambda ()
(let ((c (my-peek-char)))
(and c
(begin (next-char)
(if (char=? c (string-ref str 0))
(match-other-chars 1)
(match-1st-char)))))))
;; There has been a partial match, up to the point pos-to-match
;; (for example, str[0] has been found in the stream)
;; Now look to see if str[pos-to-match] for would be found, too
(match-other-chars
(lambda (pos-to-match)
(if (>= pos-to-match (string-length str))
no-chars-read ; the entire string has matched
(let ((c (my-peek-char)))
(and c
(if (not (char=? c (string-ref str pos-to-match)))
(backtrack 1 pos-to-match)
(begin (next-char)
(match-other-chars (+ 1 pos-to-match)))))))))
;; There had been a partial match, but then a wrong char showed up.
;; Before discarding previously read (and matched) characters, we check
;; to see if there was some smaller partial match. Note, characters read
;; so far (which matter) are those of str[0..matched-substr-len - 1]
;; In other words, we will check to see if there is such i>0 that
;; substr(str,0,j) = substr(str,i,matched-substr-len)
;; where j=matched-substr-len - i
(backtrack
(lambda (i matched-substr-len)
(let ((j (- matched-substr-len i)))
(if (<= j 0)
;; backed off completely to the begining of str
(match-1st-char)
(let loop ((k 0))
(if (>= k j)
(match-other-chars j) ; there was indeed a shorter match
(if (char=? (string-ref str k)
(string-ref str (+ i k)))
(loop (+ 1 k))
(backtrack (+ 1 i) matched-substr-len))))))))
)
(match-1st-char)))
(define (string-subst text old new . rest)
(define sub
(lambda (text)
(set! text
(cond ((equal? "" text) text)
((substring? old text)
=> (lambda (idx)
(string-append
(substring text 0 idx)
new
(sub (substring
text (+ idx (string-length old))
(string-length text))))))
(else text)))
(if (null? rest)
text
(apply string-subst text rest))))
(sub text))
|