File: unicode-category.scm

package info (click to toggle)
scheme48 1.9.2-1
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 18,232 kB
  • sloc: lisp: 88,907; ansic: 87,519; sh: 3,224; makefile: 771
file content (86 lines) | stat: -rw-r--r-- 3,048 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
; Part of Scheme 48 1.9.  See file COPYING for notices and license.

; Authors: Mike Sperber
; Copyright (c) 2005-2006 by Basis Technology Corporation. 

; Code-point classification

(define-enumerated-type primary-category :primary-category
  primary-category?
  primary-categories
  primary-category-name
  primary-category-index
  (letter
   number
   punctuation
   symbol
   mark
   separator
   miscellaneous))

(define-finite-type general-category :general-category
  (primary-category id symbol)
  general-category?
  general-categories
  general-category-name
  general-category-index
  (primary-category general-category-primary-category)
  (id general-category-id)
  (symbol general-category-symbol)
  ((uppercase-letter (primary-category letter) "Lu" 'Lu)
   (lowercase-letter (primary-category letter) "Ll" 'Ll)
   (titlecase-letter (primary-category letter) "Lt" 'Lt)
   (modified-letter (primary-category letter) "Lm" 'Lm)
   (other-letter (primary-category letter) "Lo" 'Lo)

   (non-spacing-mark (primary-category mark) "Mn" 'Mn)
   (combining-spacing-mark (primary-category mark) "Mc" 'Mc)
   (enclosing-mark (primary-category mark) "Me" 'Me)
   
   (decimal-digit-number (primary-category number) "Nd" 'Nd)
   (letter-number (primary-category number) "Nl" 'Nl)
   (other-number (primary-category number) "No" 'No)

   (opening-punctuation (primary-category punctuation) "Ps" 'Ps)
   (closing-punctuation (primary-category punctuation) "Pe" 'Pe)
   (initial-quote-punctuation (primary-category punctuation) "Pi" 'Pi)
   (final-quote-punctuation (primary-category punctuation) "Pf" 'Pf)
   (dash-punctuation (primary-category punctuation) "Pd" 'Pd)
   (connector-punctuation (primary-category punctuation) "Pc" 'Pc)
   (other-punctuation (primary-category punctuation) "Po" 'Po)
   
   (currency-symbol (primary-category symbol) "Sc" 'Sc)
   (mathematical-symbol (primary-category symbol) "Sm" 'Sm)
   (modifier-symbol (primary-category symbol) "Sk" 'Sk)
   (other-symbol (primary-category symbol) "So" 'So)

   (space-separator (primary-category separator) "Zs" 'Zs)
   (paragraph-separator (primary-category separator) "Zp" 'Zp)
   (line-separator (primary-category separator) "Zl" 'Zl)

   (control-character (primary-category miscellaneous) "Cc" 'Cc)
   (formatting-character (primary-category miscellaneous) "Cf" 'Cf)
   (surrogate (primary-category miscellaneous) "Cs" 'Cs)
   (private-use-character (primary-category miscellaneous) "Co" 'Co)
   (unassigned (primary-category miscellaneous) "Cn" 'Cn)))

(define (bits-necessary count)
  (let loop ((e 0)
	     (reached 1))
    (if (>= reached count)
	e
	(loop (+ e 1) (* 2 reached)))))

(define *general-category-bits*
  (bits-necessary (vector-length general-categories)))
   
(define (id->general-category id)
  (let ((count (vector-length general-categories)))
    (let loop ((i 0))
      (cond
       ((>= i count) #f)
       ((string=? (general-category-id (vector-ref general-categories i))
		  id)
	(vector-ref general-categories i))
       (else
	(loop (+ 1 i)))))))