File: compact-table.scm

package info (click to toggle)
scheme48 1.9.2-1
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 18,232 kB
  • sloc: lisp: 88,907; ansic: 87,519; sh: 3,224; makefile: 771
file content (119 lines) | stat: -rw-r--r-- 3,789 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
; Part of Scheme 48 1.9.  See file COPYING for notices and license.

; Authors: Mike Sperber, Robert Ransom
; Copyright (c) 2005-2006 by Basis Technology Corporation. 

; A compact table is an encoding of a very large vector that has lots
; of recurring patterns.  It was written for encoding Unicode tables.

; The vector is partitioned into blocks, and the blocks get assembled
; into a new compressed vector.  Each time a new block gets added, the
; algorithm looks if the same block is already present in the
; compressed vector, or the compressed vector ends with a prefix of
; the new block.  In the former case, nothing needs to get added.  In
; the latter case, only the suffix needs to get added.  At the same
; time, the algorithm computes a table with indices of the block
; beginnings.

; The algorithm can take a long time; little attempt at optimization
; has been made.  It's mainly intended for offline computation as part
; of a build process.

; This tries to merge BLOCK onto REVERSE-BASE, sharing the prefix of
; BLOCK.

; returns new reverse list + index offset

(define (compact-block block reverse-base)
  (let* ((block-size (length block))
	 (base-block (reverse (take-upto reverse-base block-size)))
	 (base-block-size (length base-block)))
    (let loop ((base-block base-block)
	       (offset 0))
      (if (list-prefix? base-block block)
	  (values (append (reverse (list-tail block (- base-block-size offset)))
			  reverse-base)
		  offset)
	  (loop (cdr base-block) (+ 1 offset))))))

; GET-VALUE is a thunk that returns the next value of the input vector
; every time it gets called.  BLOCK-SIZE is the size of the blocks in
; the algorithm.

; The procedure returns two values: the indices vector and a vector of
; the actual values.

(define (compute-compact-table get-value block-size)
  
  (define (get-block)
    (let loop ((i 0) (rev-block '()))
      (cond
       ((>= i block-size)
	(reverse rev-block))
       ((get-value)
	=> (lambda (value)
	     (loop (+ 1 i) (cons value rev-block))))
       (else
	(reverse rev-block)))))
      
  (let loop ((reverse-values '())
	     (reverse-indices '())
	     (last-index 0)
	     ;; cache for blocks that have already shown up twice
	     ;; (reduces run time *a lot*)
	     (bingo-block-alist '()))

    (let ((block (get-block)))
      (cond
       ((null? block)
	(values (list->vector (reverse reverse-indices))
		(list->vector (reverse reverse-values))))
       ((assoc block bingo-block-alist)
	=> (lambda (pair)
	     (loop reverse-values
		   (cons (cdr pair) reverse-indices)
		   last-index
		   bingo-block-alist)))
       ((sublist-index (reverse block) reverse-values)
	=> (lambda (rev-index)
	     (loop reverse-values
		   (cons (+ (- block-size (length block)) (- last-index rev-index))
			 reverse-indices)
		   last-index
		   (cons (cons block (- last-index rev-index)) bingo-block-alist))))
       (else
	(call-with-values
	    (lambda () (compact-block block reverse-values))
	  (lambda (reverse-values offset)
	    (loop reverse-values
		  (cons (+ last-index offset) reverse-indices)
		  (+ last-index offset)
		  bingo-block-alist))))))))

; List utilities

(define (sublist-index sublist list)
  (let loop ((list list)
	     (index 0))
    (cond
     ((list-prefix? sublist list)
      index)
     ((null? list)
      #f)
     (else (loop (cdr list) (+ 1 index))))))

(define (list-prefix? list-1 list-2)
  (cond
   ((null? list-1) #t)
   ((null? list-2) #f)
   ((equal? (car list-1) (car list-2))
    (list-prefix? (cdr list-1) (cdr list-2)))
   (else #f)))

(define (take-upto list count)
  (let loop ((list list) (count count) (rev-result '()))
    (if (or (zero? count)
	    (null? list))
	(reverse rev-result)
	(loop (cdr list) (- count 1) (cons (car list) rev-result)))))