1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185
|
;;; The sort package -- delete neighboring duplicate elts
;;; Copyright (c) 1998 by Olin Shivers.
;;; This code is open-source; see the end of the file for porting and
;;; more copyright information.
;;; Olin Shivers 11/98.
;;; Problem:
;;; vector-delete-neighbor-dups pushes N stack frames, where N is the number
;;; of elements in the answer vector. This is arguably a very efficient thing
;;; to do, but it might blow out on a system with a limited stack but a big
;;; heap. We could rewrite this to "chunk" up answers in temp vectors if we
;;; push more than a certain number of frames, then allocate a final answer,
;;; copying all the chunks into the answer. But it's much more complex code.
;;; Exports:
;;; (list-delete-neighbor-dups = lis) -> list
;;; (list-delete-neighbor-dups! = lis) -> list
;;; (vector-delete-neighbor-dups = v [start end]) -> vector
;;; (vector-delete-neighbor-dups! = v [start end]) -> end'
;;; These procedures delete adjacent duplicate elements from a list or
;;; a vector, using a given element equality procedure. The first or leftmost
;;; element of a run of equal elements is the one that survives. The list
;;; or vector is not otherwise disordered.
;;;
;;; These procedures are linear time -- much faster than the O(n^2) general
;;; duplicate-elt deletors that do not assume any "bunching" of elements.
;;; If you want to delete duplicate elements from a large list or vector,
;;; sort the elements to bring equal items together, then use one of these
;;; procedures -- for a total time of O(n lg n).
;;; LIST-DELETE-NEIGHBOR-DUPS
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; Below are multiple versions of the LIST-DELETE-NEIGHBOR-DUPS procedure,
;;; from simple to complex. RECUR's contract: Strip off any leading X's from
;;; LIS, and return that list neighbor-dup-deleted.
;;;
;;; The final version
;;; - shares a common subtail between the input & output list, up to 1024
;;; elements;
;;; - Needs no more than 1024 stack frames.
;;; Simplest version.
;;; - Always allocates a fresh list / never shares storage.
;;; - Needs N stack frames, if answer is length N.
(define (list-delete-neighbor-dups = lis)
(if (pair? lis)
(let ((x0 (car lis)))
(cons x0 (let recur ((x0 x0) (xs (cdr lis)))
(if (pair? xs)
(let ((x1 (car xs))
(x2+ (cdr xs)))
(if (= x0 x1)
(recur x0 x2+) ; Loop, actually.
(cons x1 (recur x1 x2+))))
xs))))
lis))
;;; This version tries to use cons cells from input by sharing longest
;;; common tail between input & output. Still needs N stack frames, for ans
;;; of length N.
(define (list-delete-neighbor-dups = lis)
(if (pair? lis)
(let* ((x0 (car lis))
(xs (cdr lis))
(ans (let recur ((x0 x0) (xs xs))
(if (pair? xs)
(let ((x1 (car xs))
(x2+ (cdr xs)))
(if (= x0 x1)
(recur x0 x2+)
(let ((ans-tail (recur x1 x2+)))
(if (eq? ans-tail x2+) xs
(cons x1 ans-tail)))))
xs))))
(if (eq? ans xs) lis (cons x0 ans)))
lis))
;;; LIST-DELETE-NEIGHBOR-DUPS!
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; This code runs in constant list space, constant stack, and also
;;; does only the minimum SET-CDR!'s necessary.
(define (list-delete-neighbor-dups! = lis)
(if (pair? lis)
(let lp1 ((prev lis) (prev-elt (car lis)) (lis (cdr lis)))
(if (pair? lis)
(let ((lis-elt (car lis))
(next (cdr lis)))
(if (= prev-elt lis-elt)
;; We found the first elts of a run of dups, so we know
;; we're going to have to do a SET-CDR!. Scan to the end of
;; the run, do the SET-CDR!, and loop on LP1.
(let lp2 ((lis next))
(if (pair? lis)
(let ((lis-elt (car lis))
(next (cdr lis)))
(if (= prev-elt lis-elt)
(lp2 next)
(begin (set-cdr! prev lis)
(lp1 lis lis-elt next))))
(set-cdr! prev lis))) ; Ran off end => quit.
(lp1 lis lis-elt next))))))
lis)
(define (vector-delete-neighbor-dups elt= v . maybe-start+end)
(call-with-values
(lambda () (vector-start+end v maybe-start+end))
(lambda (start end)
(if (< start end)
(let* ((x (vector-ref v start))
(ans (let recur ((x x) (i start) (j 1))
(if (< i end)
(let ((y (vector-ref v i))
(nexti (+ i 1)))
(if (elt= x y)
(recur x nexti j)
(let ((ansvec (recur y nexti (+ j 1))))
(vector-set! ansvec j y)
ansvec)))
(make-vector j)))))
(vector-set! ans 0 x)
ans)
'#()))))
;;; Packs the surviving elements to the left, in range [start,end'),
;;; and returns END'.
(define (vector-delete-neighbor-dups! elt= v . maybe-start+end)
(call-with-values
(lambda () (vector-start+end v maybe-start+end))
(lambda (start end)
(if (>= start end)
end
;; To eliminate unnecessary copying (read elt i then write the value
;; back at index i), we scan until we find the first dup.
(let skip ((j start) (vj (vector-ref v start)))
(let ((j+1 (+ j 1)))
(if (>= j+1 end)
end
(let ((vj+1 (vector-ref v j+1)))
(if (not (elt= vj vj+1))
(skip j+1 vj+1)
;; OK -- j & j+1 are dups, so we're committed to moving
;; data around. In lp2, v[start,j] is what we've done;
;; v[k,end) is what we have yet to handle.
(let lp2 ((j j) (vj vj) (k (+ j 2)))
(let lp3 ((k k))
(if (>= k end)
(+ j 1) ; Done.
(let ((vk (vector-ref v k))
(k+1 (+ k 1)))
(if (elt= vj vk)
(lp3 k+1)
(let ((j+1 (+ j 1)))
(vector-set! v j+1 vk)
(lp2 j+1 vk k+1))))))))))))))))
;;; Copyright
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; This code is
;;; Copyright (c) 1998 by Olin Shivers.
;;; The terms are: You may do as you please with this code, as long as
;;; you do not delete this notice or hold me responsible for any outcome
;;; related to its use.
;;;
;;; Blah blah blah. Don't you think source files should contain more lines
;;; of code than copyright notice?
;;;
;;; Code porting
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;
;;; If your Scheme has a faster mechanism for handling optional arguments
;;; (e.g., Chez), you should definitely port over to it. Note that argument
;;; defaulting and error-checking are interleaved -- you don't have to
;;; error-check defaulted START/END args to see if they are fixnums that are
;;; legal vector indices for the corresponding vector, etc.
|