File: double-array-builder.lisp

package info (click to toggle)
ruby-unf-ext 0.0.9.1-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 5,480 kB
  • sloc: cpp: 14,130; lisp: 1,180; ruby: 98; makefile: 4
file content (166 lines) | stat: -rw-r--r-- 5,928 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
(defpackage dawg.double-array-builder
  (:use :common-lisp :dawg.global)
  (:export build-from-bintrie))
(in-package :dawg.double-array-builder)

(package-alias :dawg.double-array.node-allocator :node-allocator)
(package-alias :dawg.double-array.buffered-output :output)
(package-alias :dawg.bintrie-builder :bintrie)

;;;;;;;;;;;;;;;
;;; declamation
(declaim #.*fastest*
         (inline set-base set-chck set-opts))

;;;;;;;;;;;;
;;; constant
(defconstant +BUFFER_SIZE+ 819200)

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; da (abbreviation of "double array")
(defstruct da
  (node t :type output:buffered-output)
  (exts t :type stream)
  (done-count 0 :type positive-fixnum))

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; node
(defstruct node
  (index         0 :type positive-fixnum)
  (base          0 :type positive-fixnum)
  (terminal?     t :type boolean)
  (sibling-total 0 :type positive-fixnum)
  (chck          0 :type uint1)
  (children    '() :type list))

(defun new-node (parent-base-idx trie)
  (declare (positive-fixnum parent-base-idx))
  (make-node :index (+ parent-base-idx (bintrie:node-label trie))
             :sibling-total #1=(bintrie:node-sibling-total trie)
             :terminal? (bintrie:node-terminal? trie)
             :base (if (bintrie:node-terminal? trie) (bintrie::node-value trie) 0)
             :chck (bintrie:node-label trie)))

(defun child-acceptable-p (node)
  nil)

(defun add-child (node child)
  (with-slots (children) (the node node)
    (setf children (nconc children (list (bintrie:node-label child))))))

;;;;;;;;;;;;;;;;;;;;;;
;;; auxiliary function

(defun merge-files-native-order (destination files)
  ;; write each file size
  (with-open-output-file (out destination 'uint4)
    (loop FOR (file) IN files
          DO (with-open-file (in file :element-type 'uint1)
               (write-byte (file-length in) out))))
  
  ;; write each file content
  (with-open-output-file (out destination 'uint1 :if-exists :append)
    (loop FOR (file) IN files
          DO (with-open-file (in file :element-type 'uint1)
               (loop FOR b = (read-byte in nil nil)
                     WHILE b
                     DO (write-byte b out))))))

(defun merge-files-reverse-order (destination files)
  ;; write each file size
  (with-open-output-file (out destination 'uint4)
    (loop FOR (file) IN files
      DO (with-open-file (in file :element-type 'uint1)
           (write-byte (byte-reverse (file-length in) 4) out))))
  
  ;; write each file content
  (loop FOR (file type) IN files
        FOR byte-size = (ecase type (uint4 4) (uint4 4))
    DO
    (with-open-output-file (out destination type :if-exists :append)
      (with-open-file (in file :element-type type)
        (loop FOR b = (read-byte in nil nil)
              WHILE b
          DO (write-byte (byte-reverse b byte-size) out))))))

(defun merge-files (destination byte-order files)
  (if (or (eq byte-order :native)
          (eq byte-order +NATIVE_ORDER+))
      (merge-files-native-order destination files)
    (merge-files-reverse-order destination files))
  (mapc #'delete-file (mapcar #'first files)))

(defmacro show (fmt &rest args)
  `(when show-progress
     (format t ,fmt ,@args)))

;;;;;;;;;;;;;;;;;;
;;; build function
(defun write-node-impl (node da)
  (with-slots (index type base terminal? sibling-total chck children) (the node node)
    (let ((n 0))
      (declare ((unsigned-byte 32) n))
      (setf (ldb (byte 24  0) n) base
            (ldb (byte  8 24) n) chck)
      (output:write-uint n (da-node da) :position index))))

(defun write-node (node da &key base)
  (when base
    (setf (node-base node) base))
  (write-node-impl node da))

(defmacro show-and-write-node (node da &key base)
  `(progn 
     (incf #1=(da-done-count ,da))
     (when (and show-progress (zerop (mod #1# 100000)))
       (show ";  ~a nodes~%" #1#))
     (write-node ,node ,da :base ,base)))

(defun build-impl (trie alloca da node memo &optional show-progress)
  (let ((children (bintrie:collect-children trie)))
    (loop WHILE (and (not #1=(gethash (bintrie:node-child trie) memo))
                     (null (cdr children))
                     (not (bintrie::node-terminal? (car children)))
                     (child-acceptable-p node))
      DO
      (add-child node (car children))
      (setf trie (car children))
      (setf children (bintrie:collect-children trie)))
  
    (a.if #1#
          (show-and-write-node node da :base it)
      (if (null children)
          (show-and-write-node node da)
        (let ((base-idx (node-allocator:allocate
                         alloca 
                         (mapcar #'bintrie:node-label children))))
          (setf #1# base-idx)
          (show-and-write-node node da :base base-idx)
            
          (dolist (child children)
            (build-impl child alloca da (new-node base-idx child) memo show-progress)))))))
                        

;;;;;;;;;;;;;;;;;;;;;
;;; external function
(defun build-from-bintrie (trie &key output-file byte-order show-progress)
  (show "~2&; build double array from trie:~%")
  (let ((node-file (format nil "~a.node" output-file))
        (exts-file (format nil "~a.ext" output-file)))
    (show ";  create tmpfiles: ~a, ~a~%" node-file exts-file)

    (show "; build:~%")
    (output:with-output (node node-file :byte-width 4)
      (with-open-output-file (exts exts-file 'uint4)
        (let ((da (make-da :node node :exts exts)))
          (build-impl trie (node-allocator:make) da 
                      (new-node 0 trie)
                      (make-hash-table :test #'eq)
                      show-progress))))
    (show "; concatenate tempfiles to ~A~%"  output-file)
    (merge-files output-file byte-order `((,node-file uint4) (,exts-file uint4))))
  'done)

(package-alias :dawg.double-array.node-allocator)
(package-alias :dawg.double-array.buffered-output)
(package-alias :dawg.bintrie-builder)