File: trie.jl

package info (click to toggle)
librep 0.17-13
  • links: PTS
  • area: main
  • in suites: etch, etch-m68k
  • size: 5,648 kB
  • ctags: 2,969
  • sloc: ansic: 32,770; lisp: 12,399; sh: 7,971; makefile: 515; sed: 93
file content (119 lines) | stat: -rw-r--r-- 3,222 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
#| trie.jl -- data structure for encoding character lists as a tree

   $Id: trie.jl,v 1.1 2002/01/21 07:33:21 jsh Exp $

   Copyright (C) 2002 John Harper <jsh@unfactored.org>

   This file is part of librep.

   librep is free software; you can redistribute it and/or modify it
   under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2, or (at your option)
   any later version.

   librep is distributed in the hope that it will be useful, but
   WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with Jade; see the file COPYING.  If not, write to
   the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
|#

(define-structure rep.data.trie

    (export make-trie
	    trie-insert-string!
	    trie-string-ref
	    trie-contains-string?
	    trie-foreach
	    make-trie-from-file)

    (open rep
	  rep.io.files
	  rep.regexp)


;; tree nodes

  ;; each node is a list (node (CHAR . VALUE) ...) VALUE is typically
  ;; another tree

  (define (make-node) (list 'node))

  (define (node-ref node key)
    (cdr (assq key (cdr node))))

  (define (node-set! node key v)
    (let ((cell (assq key (cdr node))))
      (if cell
	  (rplacd cell v)
	(rplacd node (cons (cons key v) (cdr node))))))


;; trees of tokens

  (define make-trie make-node)

  ;; returns the sub-tree of the last key, or false
  (define (trie-ref tree keys)
    (if (null keys)
	tree
      (let ((sub-tree (node-ref tree (car keys))))
	(and sub-tree (trie-ref sub-tree (cdr keys))))))

  (define (trie-insert-1! tree key)
    (let ((sub (node-ref tree key)))
      (if (not sub)
	  (let ((new (make-node)))
	    (node-set! tree key new)
	    new)
	sub)))

  ;; returns the sub-tree of the last inserted token
  (define (trie-insert! tree keys)
    (if (not keys)
	tree
      (trie-insert! (trie-insert-1! tree (car keys)) (cdr keys))))


;; string handling

  (defconst word-terminator eow)

  (define (trie-insert-string! tree string)
    (trie-insert-1! (trie-insert! tree (vector->list string)) word-terminator))

  (define (trie-string-ref tree string)
    (trie-ref tree (vector->list string)))

  (define (trie-contains-string? tree string)
    (let ((end (trie-string-ref tree string)))
      (and (node-ref end word-terminator) t)))

  (define (trie-foreach tree callback)
    (define (iter tree tokens)
      (mapc (lambda (x)
	      (if (eq (car x) word-terminator)
		  (callback (apply concat (reverse tokens)))
		(iter (cdr x) (cons (car x) tokens))))
	    (cdr tree)))
    (iter tree '()))

  (define (make-trie-from-file filename #!key callback)
    (let ((file (open-file filename 'read))
	  (tree (make-trie)))
      (unwind-protect
	  (let loop ()
	    (let ((string (read-line file)))
	      (when string
		(when (string-match "\\s+$" string)
		  (setq string (substring string 0 (match-start))))
		(when callback
		  (setq string (callback string)))
		(when string
		  (trie-insert-string! tree string))
		(loop))))
	(close-file file))
      tree)))