1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119
|
#| trie.jl -- data structure for encoding character lists as a tree
$Id: trie.jl,v 1.1 2002/01/21 07:33:21 jsh Exp $
Copyright (C) 2002 John Harper <jsh@unfactored.org>
This file is part of librep.
librep is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
librep is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Jade; see the file COPYING. If not, write to
the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
|#
(define-structure rep.data.trie
(export make-trie
trie-insert-string!
trie-string-ref
trie-contains-string?
trie-foreach
make-trie-from-file)
(open rep
rep.io.files
rep.regexp)
;; tree nodes
;; each node is a list (node (CHAR . VALUE) ...) VALUE is typically
;; another tree
(define (make-node) (list 'node))
(define (node-ref node key)
(cdr (assq key (cdr node))))
(define (node-set! node key v)
(let ((cell (assq key (cdr node))))
(if cell
(rplacd cell v)
(rplacd node (cons (cons key v) (cdr node))))))
;; trees of tokens
(define make-trie make-node)
;; returns the sub-tree of the last key, or false
(define (trie-ref tree keys)
(if (null keys)
tree
(let ((sub-tree (node-ref tree (car keys))))
(and sub-tree (trie-ref sub-tree (cdr keys))))))
(define (trie-insert-1! tree key)
(let ((sub (node-ref tree key)))
(if (not sub)
(let ((new (make-node)))
(node-set! tree key new)
new)
sub)))
;; returns the sub-tree of the last inserted token
(define (trie-insert! tree keys)
(if (not keys)
tree
(trie-insert! (trie-insert-1! tree (car keys)) (cdr keys))))
;; string handling
(defconst word-terminator eow)
(define (trie-insert-string! tree string)
(trie-insert-1! (trie-insert! tree (vector->list string)) word-terminator))
(define (trie-string-ref tree string)
(trie-ref tree (vector->list string)))
(define (trie-contains-string? tree string)
(let ((end (trie-string-ref tree string)))
(and (node-ref end word-terminator) t)))
(define (trie-foreach tree callback)
(define (iter tree tokens)
(mapc (lambda (x)
(if (eq (car x) word-terminator)
(callback (apply concat (reverse tokens)))
(iter (cdr x) (cons (car x) tokens))))
(cdr tree)))
(iter tree '()))
(define (make-trie-from-file filename #!key callback)
(let ((file (open-file filename 'read))
(tree (make-trie)))
(unwind-protect
(let loop ()
(let ((string (read-line file)))
(when string
(when (string-match "\\s+$" string)
(setq string (substring string 0 (match-start))))
(when callback
(setq string (callback string)))
(when string
(trie-insert-string! tree string))
(loop))))
(close-file file))
tree)))
|