File: delndups.scm

package info (click to toggle)
scheme48 1.9-5
  • links: PTS, VCS
  • area: main
  • in suites: jessie, jessie-kfreebsd, stretch
  • size: 18,276 kB
  • ctags: 16,390
  • sloc: lisp: 88,906; ansic: 87,511; sh: 3,224; makefile: 766
file content (185 lines) | stat: -rw-r--r-- 6,605 bytes parent folder | download | duplicates (9)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
;;; The sort package -- delete neighboring duplicate elts
;;; Copyright (c) 1998 by Olin Shivers.
;;; This code is open-source; see the end of the file for porting and
;;; more copyright information.
;;; Olin Shivers 11/98.

;;; Problem:
;;; vector-delete-neighbor-dups pushes N stack frames, where N is the number
;;; of elements in the answer vector. This is arguably a very efficient thing
;;; to do, but it might blow out on a system with a limited stack but a big
;;; heap. We could rewrite this to "chunk" up answers in temp vectors if we
;;; push more than a certain number of frames, then allocate a final answer,
;;; copying all the chunks into the answer. But it's much more complex code.

;;; Exports:
;;; (list-delete-neighbor-dups  = lis) -> list
;;; (list-delete-neighbor-dups! = lis) -> list
;;; (vector-delete-neighbor-dups  = v [start end]) -> vector
;;; (vector-delete-neighbor-dups! = v [start end]) -> end'

;;; These procedures delete adjacent duplicate elements from a list or
;;; a vector, using a given element equality procedure. The first or leftmost
;;; element of a run of equal elements is the one that survives. The list
;;; or vector is not otherwise disordered.
;;;
;;; These procedures are linear time -- much faster than the O(n^2) general 
;;; duplicate-elt deletors that do not assume any "bunching" of elements.
;;; If you want to delete duplicate elements from a large list or vector,
;;; sort the elements to bring equal items together, then use one of these
;;; procedures -- for a total time of O(n lg n). 

;;; LIST-DELETE-NEIGHBOR-DUPS
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; Below are multiple versions of the LIST-DELETE-NEIGHBOR-DUPS procedure,
;;; from simple to complex. RECUR's contract: Strip off any leading X's from 
;;; LIS, and return that list neighbor-dup-deleted.
;;;
;;; The final version
;;; - shares a common subtail between the input & output list, up to 1024 
;;;   elements;
;;; - Needs no more than 1024 stack frames.

;;; Simplest version. 
;;; - Always allocates a fresh list / never shares storage.
;;; - Needs N stack frames, if answer is length N.
(define (list-delete-neighbor-dups = lis)
  (if (pair? lis)
      (let ((x0 (car lis)))
	(cons x0 (let recur ((x0 x0) (xs (cdr lis)))
		   (if (pair? xs)
		       (let ((x1  (car xs))
			     (x2+ (cdr xs)))
			  (if (= x0 x1)
			      (recur x0 x2+) ; Loop, actually.
			      (cons x1 (recur x1 x2+))))
		       xs))))
      lis))

;;; This version tries to use cons cells from input by sharing longest
;;; common tail between input & output. Still needs N stack frames, for ans
;;; of length N.
(define (list-delete-neighbor-dups = lis)
  (if (pair? lis)
      (let* ((x0 (car lis))
	     (xs (cdr lis))
	     (ans (let recur ((x0 x0) (xs xs))
		    (if (pair? xs)
			(let ((x1  (car xs))
			      (x2+ (cdr xs)))
			  (if (= x0 x1)
			      (recur x0 x2+)
			      (let ((ans-tail (recur x1 x2+)))
				(if (eq? ans-tail x2+) xs
				    (cons x1 ans-tail)))))
			xs))))
	(if (eq? ans xs) lis (cons x0 ans)))

      lis))

;;; LIST-DELETE-NEIGHBOR-DUPS!
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; This code runs in constant list space, constant stack, and also
;;; does only the minimum SET-CDR!'s necessary.

(define (list-delete-neighbor-dups! = lis)
  (if (pair? lis)
      (let lp1 ((prev lis) (prev-elt (car lis)) (lis (cdr lis)))
	(if (pair? lis)
	    (let ((lis-elt (car lis))
		  (next (cdr lis)))
	      (if (= prev-elt lis-elt)

		  ;; We found the first elts of a run of dups, so we know
		  ;; we're going to have to do a SET-CDR!. Scan to the end of
		  ;; the run, do the SET-CDR!, and loop on LP1.
		  (let lp2 ((lis next))
		    (if (pair? lis)
			(let ((lis-elt (car lis))
			      (next (cdr lis)))
			  (if (= prev-elt lis-elt)
			      (lp2 next)
			      (begin (set-cdr! prev lis)
				     (lp1 lis lis-elt next))))
			(set-cdr! prev lis)))	; Ran off end => quit.

		  (lp1 lis lis-elt next))))))
  lis)


(define (vector-delete-neighbor-dups elt= v . maybe-start+end)
  (call-with-values
   (lambda () (vector-start+end v maybe-start+end))
   (lambda (start end)
     (if (< start end)
	 (let* ((x (vector-ref v start))
		(ans (let recur ((x x) (i start) (j 1))
		       (if (< i end)
			   (let ((y (vector-ref v i))
				 (nexti (+ i 1)))
			     (if (elt= x y)
				 (recur x nexti j)
				 (let ((ansvec (recur y nexti (+ j 1))))
				   (vector-set! ansvec j y)
				   ansvec)))
			   (make-vector j)))))
	   (vector-set! ans 0 x)
	   ans)
	 '#()))))


;;; Packs the surviving elements to the left, in range [start,end'),
;;; and returns END'.
(define (vector-delete-neighbor-dups! elt= v . maybe-start+end)
  (call-with-values
   (lambda () (vector-start+end v maybe-start+end))
   (lambda (start end)

     (if (>= start end)
	 end
	 ;; To eliminate unnecessary copying (read elt i then write the value 
	 ;; back at index i), we scan until we find the first dup.
	 (let skip ((j start) (vj (vector-ref v start)))
	   (let ((j+1 (+ j 1)))
	     (if (>= j+1 end)
		 end
		 (let ((vj+1 (vector-ref v j+1)))
		   (if (not (elt= vj vj+1))
		       (skip j+1 vj+1)

		       ;; OK -- j & j+1 are dups, so we're committed to moving
		       ;; data around. In lp2, v[start,j] is what we've done;
		       ;; v[k,end) is what we have yet to handle.
		       (let lp2 ((j j) (vj vj) (k (+ j 2)))
			 (let lp3 ((k k))
			   (if (>= k end)
			       (+ j 1) ; Done.
			       (let ((vk (vector-ref v k))
				     (k+1 (+ k 1)))
				 (if (elt= vj vk)
				     (lp3 k+1)
				     (let ((j+1 (+ j 1)))
				       (vector-set! v j+1 vk)
				       (lp2 j+1 vk k+1))))))))))))))))
		    
;;; Copyright
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; This code is
;;;     Copyright (c) 1998 by Olin Shivers.
;;; The terms are: You may do as you please with this code, as long as
;;; you do not delete this notice or hold me responsible for any outcome
;;; related to its use.
;;;
;;; Blah blah blah. Don't you think source files should contain more lines
;;; of code than copyright notice?
;;;
;;; Code porting
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;
;;; If your Scheme has a faster mechanism for handling optional arguments
;;; (e.g., Chez), you should definitely port over to it. Note that argument
;;; defaulting and error-checking are interleaved -- you don't have to
;;; error-check defaulted START/END args to see if they are fixnums that are
;;; legal vector indices for the corresponding vector, etc.