File: parse-2002.lisp

package info (click to toggle)
cl-umlisp-orf 3.3.2-3.1
  • links: PTS, VCS
  • area: contrib
  • in suites: bookworm, sid
  • size: 228 kB
  • sloc: lisp: 2,841; makefile: 56
file content (328 lines) | stat: -rw-r--r-- 14,337 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
;;;; -*- Mode: Lisp; Syntax: ANSI-Common-Lisp; Base: 10; Package: umlisp -*-
;;;; *************************************************************************
;;;; FILE IDENTIFICATION
;;;;
;;;; Name:     parse-2002.lisp
;;;; Purpose:  Parsing and SQL insertion routines for UMLisp which may
;;;;           change from year to year
;;;; Author:   Kevin M. Rosenberg
;;;; Created:  Apr 2000
;;;;
;;;; $Id$
;;;;
;;;; This file, part of UMLisp, is
;;;;    Copyright (c) 2000-2004 by Kevin M. Rosenberg, M.D.
;;;;
;;;; UMLisp users are granted the rights to distribute and use this software
;;;; as governed by the terms of the GNU General Public License.
;;;; *************************************************************************

(in-package #:umlisp-orf)

;;; Pre-read data for custom fields into hash tables
(defvar *preparse-hash-init?* nil)

(eval-when (:compile-toplevel :load-toplevel :execute)
(let ((pfstr-hash nil)      ;;; Preferred concept strings by CUI
      (cui-lrl-hash nil)    ;;; LRL by CUI
      (lui-lrl-hash nil)    ;;; LRL by LUI
      (cuisui-lrl-hash nil) ;;; LRL by CUISUI
      (sab-srl-hash nil))   ;;; SRL by SAB

  (defun make-preparse-hash-table ()
    (if pfstr-hash
        (progn
          (clrhash pfstr-hash)
          (clrhash cui-lrl-hash)
          (clrhash lui-lrl-hash)
          (clrhash cuisui-lrl-hash)
          (clrhash sab-srl-hash))
      (setf
          pfstr-hash (make-hash-table :size 800000)
          cui-lrl-hash (make-hash-table :size 800000)
          lui-lrl-hash (make-hash-table :size 1500000)
          cuisui-lrl-hash (make-hash-table :size 1800000)
          sab-srl-hash (make-hash-table :size 100 :test 'equal))))

  (defun buffered-ensure-preparse (&optional (force-read nil))
    (when (or force-read (not *preparse-hash-init?*))
      (make-preparse-hash-table)
      (setq *preparse-hash-init?* t))
    (with-buffered-umls-file (line "MRCON")
      (let ((cui (parse-ui (aref line 0)))
            (lui (parse-ui (aref line 3)))
            (sui (parse-ui (aref line 5)))
            (lrl (parse-integer (aref line 7))))
        (unless (gethash cui pfstr-hash)  ;; if haven't stored pfstr for cui
          (if (and (string-equal (aref line 1) "ENG") ; LAT
                   (string-equal (aref line 2) "P") ; ts
                   (string-equal (aref line 4) "PF")) ; stt
              (setf (gethash cui pfstr-hash) (aref line 6))))
        (set-lrl-hash cui lrl cui-lrl-hash)
        (set-lrl-hash lui lrl lui-lrl-hash)
        (set-lrl-hash (make-cuisui cui sui) lrl cuisui-lrl-hash)))
    (with-buffered-umls-file (line "MRSO")
      (let ((sab (aref line 3)))
        (unless (gethash sab sab-srl-hash)  ;; if haven't stored
          (setf (gethash sab sab-srl-hash) (aref line 6))))))

  (defun ensure-preparse (&optional (force-read nil))
    (when (or force-read (not *preparse-hash-init?*))
      (make-preparse-hash-table)
      (setq *preparse-hash-init?* t))
    (with-umls-file (line "MRCON")
      (let ((cui (parse-ui (nth 0 line)))
            (lui (parse-ui (nth 3 line)))
            (sui (parse-ui (nth 5 line)))
            (lrl (parse-integer (nth 7 line))))
        (unless (gethash cui pfstr-hash)  ;; if haven't stored pfstr for cui
          (if (and (string-equal (nth 1 line) "ENG") ; LAT
                   (string-equal (nth 2 line) "P") ; ts
                   (string-equal (nth 4 line) "PF")) ; stt
              (setf (gethash cui pfstr-hash) (nth 6 line))))
        (set-lrl-hash cui lrl cui-lrl-hash)
        (set-lrl-hash lui lrl lui-lrl-hash)
        (set-lrl-hash (make-cuisui cui sui) lrl cuisui-lrl-hash)))
    (with-umls-file (line "MRSO")
      (let ((sab (nth 3 line)))
        (multiple-value-bind (val found) (gethash sab sab-srl-hash)
          (declare (ignore val))
          (unless found
            (setf (gethash sab sab-srl-hash) (parse-integer (nth 6 line))))))))

  (defun pfstr-hash (cui)
    (gethash cui pfstr-hash))

  (defun cui-lrl (cui)
    (gethash cui cui-lrl-hash))

  (defun lui-lrl (lui)
    (gethash lui lui-lrl-hash))

  (defun cuisui-lrl (cuisui)
    (gethash cuisui cuisui-lrl-hash))

  (defun sab-srl (sab)
    (aif (gethash sab sab-srl-hash) it 0))
)) ;; closure

(defun set-lrl-hash (key lrl hash)
  "Set the least restrictive level in hash table"
  (multiple-value-bind (hash-lrl found) (gethash key hash)
    (if (or (not found) (< lrl hash-lrl))
        (setf (gethash key hash) lrl))))

;; UMLS file and column structures
;;; SQL datatypes symbols
;;; sql-u - Unique identifier
;;; sql-s - Small integer (16-bit)
;;; sql-i - Integer (32-bit)
;;; sql-l - Big integer (64-bit)
;;; sql-f - Floating point
;;; sql-c - Character data

(defparameter +col-datatypes+
    '(("AV" sql-f) ("BTS" sql-i) ("CLS" sql-i) ("COF" sql-i) ("CUI1" sql-u)
      ("CUI2" sql-u) ("CUI" sql-u) ("CXN" sql-s) ("FR" sql-i) ("LRL" sql-s)
      ("LUI" sql-u) ("MAX" sql-s) ("MIN" sql-s) ("RANK" sql-s) ("REF" sql-c)
      ("RNK" sql-s) ("RWS" sql-i) ("SRL" sql-s) ("SUI" sql-u) ("TUI" sql-u)
      ;;; Custom columns
      ("KCUISUI" sql-l) ("KCUILUI" sql-l) ("KCUILRL" sql-i) ("KLUILRL" sql-i)
      ("KSRL" sql-i) ("KLRL" sql-i)
      ;;; LEX columns
      ("EUI" sql-u) ("EUI2" sql-u)
      ;;; Semantic net columns
      ("UI" sql-u) ("UI2" sql-u) ("UI3" sql-u)
      ;; New fields for 2002AD
      ("RCUI" sql-u) ("VCUI" sql-u) ("CFR" sql-i) ("TFR" sql-i)
      )
    "SQL data types for each non-string column")

(defparameter +custom-tables+
    nil
  #+ignore
  '(("MRCONSO" "SELECT m.CUI, m.LAT, m.TS, m.LUI, m.STT, m.SUI, m.STR, m.LRL, s.SAB, s.TTY, s.SCD, s.SRL FROM MRCON m, MRSO s WHERE m.CUI=s.CUI AND m.LUI=s.LUI AND m.SUI=s.SUI")
    ("MRCONFULL" "SELECT m.CUI, m.LAT, m.TS, m.LUI, m.STT, m.SUI, m.STR, m.LRL, s.SAB, s.TTY, s.SCD, s.SRL, t.TUI FROM MRCON m, MRSO s, MRSTY t WHERE m.CUI=s.CUI AND m.LUI=s.LUI AND m.SUI=s.SUI AND m.CUI=t.CUI AND s.CUI=t.CUI"))
  "Custom tables to create")

(defparameter +custom-cols+
    '(("MRCON" "KPFSTR" "TEXT" 1024
               (lambda (x) (pfstr-hash (parse-ui (nth 0 x)))))
      ("MRCON" "KCUISUI" "BIGINT" 0
       (lambda (x) (write-to-string (make-cuisui (parse-ui (nth 0 x)) (parse-ui (nth 5 x))))))
      ("MRCON" "KCUILUI" "BIGINT" 0
       (lambda (x) (write-to-string (make-cuilui (parse-ui (nth 0 x)) (parse-ui (nth 3 x))))))
      ("MRCON" "KCUILRL" "INTEGER" 0
       (lambda (x) (write-to-string (cui-lrl (parse-ui (nth 0 x))))))
      ("MRCON" "KLUILRL" "INTEGER" 0
       (lambda (x) (write-to-string (lui-lrl (parse-ui (nth 3 x))))))
      ("MRLO" "KLRL" "INTEGER" 0
       (lambda (x) (write-to-string
                    (if (zerop (length (nth 4 x)))
                        (cui-lrl (parse-ui (nth 0 x)))
                      (cuisui-lrl (make-cuisui (parse-ui (nth 0 x)) (parse-ui (nth 4 x))))))))
      ("MRSTY" "KLRL" "INTEGER" 0
       (lambda (x) (write-to-string (cui-lrl (parse-ui (nth 0 x))))))
      ("MRCOC" "KLRL" "INTEGER" 0
       (lambda (x) (write-to-string
                    (max (cui-lrl (parse-ui (nth 0 x)))
                         (kmrcl:aif (cui-lrl (parse-ui (nth 1 x))) kmrcl::it 0)))))
      ("MRSAT" "KSRL" "INTEGER" 0
       (lambda (x) (write-to-string (sab-srl (nth 5 x)))))
      ("MRREL" "KSRL" "INTEGER" 0
       (lambda (x) (write-to-string (sab-srl (nth 4 x)))))
      ("MRRANK" "KSRL" "INTEGER" 0
       (lambda (x) (write-to-string (sab-srl (nth 1 x)))))
      ("MRDEF" "KSRL" "INTEGER" 0
       (lambda (x) (write-to-string (sab-srl (nth 1 x)))))
      ("MRCXT" "KSRL" "INTEGER" 0
       (lambda (x) (write-to-string (sab-srl (nth 2 x)))))
      ("MRATX" "KSRL" "INTEGER" 0
       (lambda (x) (write-to-string (sab-srl (nth 1 x)))))
      ("MRXW.ENG" "KLRL" "INTEGER" 0
       (lambda (x) (write-to-string (cuisui-lrl (make-cuisui
                                                 (parse-ui (nth 2 x))
                                                 (parse-ui (nth 4 x)))))))
      ("MRXW.NONENG" "KLRL" "INTEGER" 0
       (lambda (x) (write-to-string (cuisui-lrl (make-cuisui
                                                 (parse-ui (nth 2 x))
                                                 (parse-ui (nth 4 x)))))))
      ("MRXNW.ENG" "KLRL" "INTEGER" 0
       (lambda (x) (write-to-string (cuisui-lrl (make-cuisui
                                                 (parse-ui (nth 2 x))
                                                 (parse-ui (nth 4 x)))))))
      ("MRXNS.ENG" "KLRL" "INTEGER" 0
       (lambda (x) (write-to-string (cuisui-lrl (make-cuisui
                                                 (parse-ui (nth 2 x))
                                                 (parse-ui (nth 4 x)))))))
      ("MRREL" "KPFSTR2" "TEXT" 1024
       (lambda (x) (pfstr-hash (parse-ui (nth 2 x)))))
      ("MRCOC" "KPFSTR2" "TEXT" 1024
       (lambda (x) (pfstr-hash (parse-ui (nth 1 x)))))
      ("MRCXT" "KCUISUI" "BIGINT" 0
       (lambda (x) (write-to-string (make-cuisui (parse-ui (nth 0 x)) (parse-ui (nth 1 x))))))
      ("MRSAT" "KCUILUI" "BIGINT" 0
       (lambda (x) (write-to-string (make-cuilui (parse-ui (nth 0 x)) (parse-ui (nth 1 x))))))
      ("MRSAT" "KCUISUI" "BIGINT" 0
       (lambda (x) (write-to-string (make-cuisui (parse-ui (nth 0 x)) (parse-ui (nth 2 x))))))
      ("MRSO" "KCUISUI" "BIGINT" 0
       (lambda (x) (write-to-string (make-cuisui (parse-ui (nth 0 x)) (parse-ui (nth 2 x))))))
      ("MRXW.ENG" "KCUISUI" "BIGINT" 0
       (lambda (x) (write-to-string (make-cuisui (parse-ui (nth 2 x)) (parse-ui (nth 4 x))))))
      ("MRXNW.ENG" "KCUISUI" "BIGINT" 0
       (lambda (x) (write-to-string (make-cuisui (parse-ui (nth 2 x)) (parse-ui (nth 4 x))))))
      ("MRXNS.ENG" "KCUISUI" "BIGINT" 0
       (lambda (x) (write-to-string (make-cuisui (parse-ui (nth 2 x)) (parse-ui (nth 4 x))))))
      ("MRXW.NONENG" "LAT" "VARCHAR" 3 (lambda (x) (nth 0 x)))
      ("MRXW.NONENG" "WD"  "VARCHAR" 200  (lambda (x) (nth 1 x)))
      ("MRXW.NONENG" "CUI" "INTEGER" 0 (lambda (x) (write-to-string (parse-ui (nth 2 x)))))
      ("MRXW.NONENG" "LUI" "INTEGER" 0 (lambda (x) (write-to-string (parse-ui (nth 3 x)))))
      ("MRXW.NONENG" "SUI" "INTEGER" 0 (lambda (x) (write-to-string (parse-ui (nth 4 x)))))
      ("MRXW.NONENG" "KCUISUI" "BIGINT" 0
       (lambda (x) (write-to-string (make-cuisui (parse-ui (nth 2 x)) (parse-ui (nth 4 x)))))))
  "Custom columns to create.(filename, col, sqltype, value-func).")

(defparameter +index-cols+
    '(("CUI" "MRATX") ("CUI1" "MRCOC") ("CUI" "MRCON") ("LUI" "MRCON")
      ("LRL" "MRCON")
      ("SUI" "MRCON") ("CUI" "MRCXT") ("CUI" "MRDEF") ("CUI" "MRLO")
      ("CUI1" "MRREL") ("CUI" "MRSAT") ("LUI" "MRSAT") ("SUI" "MRSAT")
      ("CUI" "MRSO") ("SAB" "MRSO") ("SRL" "MRSO") ("CUI" "MRSTY")
      ("TUI" "MRSTY") ("CUI" "MRXNS_ENG")
      #+ignore ("NSTR" "MRXNS_ENG" 10)
      ("CUI" "MRXNW_ENG") ("NWD" "MRXNW_ENG") ("WD" "MRXW_ENG")
      ("KCUISUI" "MRCON") ("KCUILUI" "MRCON") ("KCUILRL" "MRCON")
      ("KLUILRL" "MRCON") ("KCUISUI" "MRCXT")
      ("KCUISUI" "MRSO") ("KCUISUI" "MRSAT")  ("KCUILUI" "MRSAT")
      ("KCUISUI" "MRXW_ENG") ("KCUISUI" "MRXNW_ENG")
      ("KCUISUI" "MRXNS_ENG") ("KCUISUI" "MRXW_NONENG")
      ("KSRL" "MRATX") ("KSRL" "MRCXT") ("KSRL" "MRDEF") ("KSRL" "MRRANK")
      ("KSRL" "MRREL") ("KSRL" "MRSAT") ("KLRL" "MRCOC")
      ("KLRL" "MRLO") ("KLRL" "MRSTY") ("KLRL" "MRXW_ENG") ("KLRL" "MRXNW_ENG")
      ("KLRL" "MRXNS_ENG") ("KLRL" "MRXW_NONENG")
      ;; LEX indices
      ("EUI" "LRABR") ("EUI2" "LRABR") ("EUI" "LRAGR") ("EUI" "LRCMP") ("EUI" "LRMOD")
      ("EUI" "LRNOM") ("EUI2" "LRNOM") ("EUI" "LRPRN") ("EUI" "LRPRP") ("EUI" "LRSPL")
      ("EUI" "LRTRM") ("EUI" "LRTYP") ("EUI" "LRWD") ("WRD" "LRWD")
      ("BAS" "LRABR")
      ;; Semantic NET indices
      ("UI" "SRSTRE1") ("UI2" "SRSTRE1") ("UI3" "SRSTRE1")
      ("STY_RL" "SRDEF") ("RT" "SRDEF") ("STY_RL" "SRSTR") ("STY_RL2" "SRSTR")
      ("RL" "SRSTR")
      ("SRL" "MRSAB") ("RSAB" "MRSAB") ("VSAB" "MRSAB") ("RCUI" "MRSAB")
      ("VCUI" "MRSAB") ("LAT" "MRSAB"))
  "Columns in files to index")


(defparameter +custom-index-cols+
  nil
  #+ignore
  '(("CUI" "MRCONFULL") ("SAB" "MRCONFULL") ("TUI" "MRCONFULL"))
  "Indexes to custom tables")

;; File & Column functions

(defun gen-ucols ()
  (add-ucols (gen-ucols-meta))
  (add-ucols (gen-ucols-custom))
  (add-ucols (gen-ucols-generic "LRFLD"))
  (add-ucols (gen-ucols-generic "SRFLD")))

(defun gen-ucols-meta ()
"Initialize all umls columns"
  (let ((cols '()))
    (with-umls-file (line "MRCOLS")
      (destructuring-bind (col des ref min av max fil dty) line
        (push (make-ucol col des ref (parse-integer min) (read-from-string av)
                         (parse-integer max) fil dty)
              cols)))
    (nreverse cols)))

(defun gen-ucols-custom ()
"Initialize umls columns for custom columns"
  (loop for customcol in +custom-cols+
        collect
        (make-ucol (nth 1 customcol) "" 0 0 0 (nth 3 customcol)
                   (nth 0 customcol) nil :sqltype (nth 2 customcol)
                   :custom-value-fun (nth 4 customcol))))

(defun gen-ucols-generic (col-filename)
"Initialize for generic (LEX/NET) columns"
  (let ((cols '()))
    (with-umls-file (line col-filename)
      (destructuring-bind (nam des ref fil) line
        (setq nam (escape-column-name nam))
        (dolist (file (delimited-string-to-list fil #\,))
          (push
           (make-ucol nam des ref nil nil nil file nil)
           cols))))
    (nreverse cols)))


(defun gen-ufiles ()
  (add-ufiles (gen-ufiles-generic "MRFILES"))
  (add-ufiles (gen-ufiles-generic "LRFIL"))
  (add-ufiles (gen-ufiles-generic "SRFIL"))
  ;; needs to come last
  (add-ufiles (gen-ufiles-custom)))


(defun gen-ufiles-generic (files-filename)
"Initialize all LEX file structures"
  (let ((files '()))
    (with-umls-file (line files-filename)
      (destructuring-bind (fil des fmt cls rws bts) line
        (push (make-ufile
               fil des (substitute #\_ #\. fil) (parse-integer cls)
               (parse-integer rws) (parse-integer bts)
               (concatenate 'list (umls-field-string-to-list fmt)
                            (custom-colnames-for-filename fil)))
              files)))
    (nreverse files)))

(defun gen-ufiles-custom ()
  (make-ufile "MRXW.NONENG" "Custom NonEnglish Index" "MRXW_NONENG"
              5 0 0 (fields (find-ufile "MRXW.ENG"))))