File: derived.lisp

package info (click to toggle)
cl-unicode 0.1.4-3
  • links: PTS
  • area: main
  • in suites: jessie, jessie-kfreebsd
  • size: 2,124 kB
  • ctags: 210
  • sloc: lisp: 1,706; makefile: 18
file content (120 lines) | stat: -rw-r--r-- 5,367 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-UNICODE; Base: 10 -*-
;;; $Header: /usr/local/cvsrep/cl-unicode/derived.lisp,v 1.15 2012-05-04 21:17:44 edi Exp $

;;; Copyright (c) 2008-2012, Dr. Edmund Weitz. All rights reserved.

;;; Redistribution and use in source and binary forms, with or without
;;; modification, are permitted provided that the following conditions
;;; are met:

;;;   * Redistributions of source code must retain the above copyright
;;;     notice, this list of conditions and the following disclaimer.

;;;   * Redistributions in binary form must reproduce the above
;;;     copyright notice, this list of conditions and the following
;;;     disclaimer in the documentation and/or other materials
;;;     provided with the distribution.

;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED
;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
;;; ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

(in-package :cl-unicode)

(defconstant +xid-difference+
  ;; the usual mumbo jumbo for SBCL...
  (if (boundp '+xid-difference+)
    (symbol-value '+xid-difference+)
    '(#x37a
      (#x309b . #x309c)
      (#xfc5e . #xfc63)
      (#xfdfa . #xfdfb)
      #xfe70
      #xfe72
      #xfe74
      #xfe76
      #xfe78
      #xfe7a
      #xfe7c
      #xfe7e)))

(defvar *derived-map*
  `(("Any")
    ("LC" "Lu" "Ll" "Lt")
    ("L" "LC" "Lm" "Lo")
    ("M" "Mn" "Mc" "Me")
    ("N" "Nd" "Nl" "No")
    ("P" "Pc" "Pd" "Ps" "Pe" "Pi" "Pf" "Po")
    ("S" "Sm" "Sc" "Sk" "So")
    ("Z" "Zs" "Zl" "Zp")
    ("C" "Cc" "Cf" "Cs" "Co" "Cn")
    ("Math" "Sm" "OtherMath")
    ("Alphabetic" "L" "Nl" "OtherAlphabetic")
    ("Lowercase" "Ll" "OtherLowercase")
    ("Uppercase" "Lu" "OtherUppercase")
    ("GraphemeExtend" "Me" "Mn" "OtherGraphemeExtend")
    ("GraphemeBase" ("C" "Zl" "Zp" "GraphemeExtend"))
    ("IDStart" "L" "Nl" "OtherIDStart" ("PatternSyntax" "PatternWhiteSpace"))
    ("IDContinue" "IDStart" "Mn" "Mc" "Nd" "Pc" "OtherIDContinue" ("PatternSyntax" "PatternWhiteSpace"))
    ("XIDStart" "IDStart" (,@+xid-difference+ #xe33 #xeb3 (#xff9e . #xff9f)))
    ("XIDContinue" "IDContinue" ,+xid-difference+)
    ("DefaultIgnorableCodePoint" "OtherDefaultIgnorableCodePoint" "Cf" "VariationSelector"
                                 ("WhiteSpace" (#xfff9 . #xfffb) (#x600 . #x603) #x6dd #x70f))))

;; todo: xid_start, xid_continue,

(defun build-derived-test-function (property-designators)
  (labels ((build-test-function (designator)
             (etypecase designator
               (string
                (let ((test-function (gethash (gethash designator *property-map*) *property-tests*)))
                  (assert test-function (designator)
                    "Unknown property name ~S." designator)
                  test-function))
               (integer
                (lambda (c)
                  (= (ensure-code-point c) designator)))
               (cons
                (let ((from (car designator))
                      (to (car designator)))
                (assert (and (typep from 'integer) (typep to 'integer)) (designator)
                  "Car and cdr of ~S must both be integers." designator)
                (lambda (c)
                  (<= from (ensure-code-point c) to))))))
           (collect-test-functions (designators)
             (loop for designator in designators
                   collect (build-test-function designator))))
    (let ((positive-test-functions
           (collect-test-functions (remove-if-not 'atom property-designators)))
          (negative-test-functions
           (collect-test-functions (find-if-not 'atom property-designators))))
      (cond (negative-test-functions
             (lambda (c)
               (and (or (null positive-test-functions)
                        (loop for test-function in positive-test-functions
                              thereis (funcall (the function test-function) c)))
                    (not (loop for test-function in negative-test-functions
                               thereis (funcall (the function test-function) c))))))                  
            (t
             (lambda (c)
               (or (null positive-test-functions)
                   (loop for test-function in positive-test-functions
                         thereis (funcall (the function test-function) c)))))))))
               
(defun build-derived-test-functions ()
  (loop for (name . property-names) in *derived-map*
        for symbol = (register-property-symbol name) do
        (assert (null (gethash symbol *property-tests*)) (name)
          "There is already a property named ~S." name)
        (setf (gethash symbol *property-tests*)
              (build-derived-test-function property-names)
              (gethash name *property-map*)
              symbol)))