1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209
|
;;; utf8proc.scm
;;;
;;; tie the utf8proc library into the *libutf8proc* environment
(set! (*s7* 'print-length) 123123)
(require cload.scm)
(provide 'libutf8proc.scm)
;; if loading from a different directory, pass that info to C
(let ((directory (let ((current-file (port-filename)))
(and (memv (current-file 0) '(#\/ #\~))
(substring current-file 0 (- (length current-file) 9))))))
(when (and directory (not (member directory *load-path*)))
(set! *load-path* (cons directory *load-path*)))
(with-let (rootlet)
(require cload.scm))
(when (and directory (not (string-position directory *cload-cflags*)))
(set! *cload-cflags* (string-append "-I" directory " " *cload-cflags*))))
(if (not (defined? '*libutf8proc*))
(define *libutf8proc*
(with-let (sublet (unlet))
(set! *libraries* (cons (cons "libutf8proc.scm" (curlet)) *libraries*))
(c-define
'((C-macro (int (UTF8PROC_VERSION_MAJOR UTF8PROC_VERSION_MINOR UTF8PROC_VERSION_PATCH)))
(int (UTF8PROC_NULLTERM UTF8PROC_STABLE UTF8PROC_COMPAT UTF8PROC_COMPOSE UTF8PROC_DECOMPOSE
UTF8PROC_IGNORE UTF8PROC_REJECTNA UTF8PROC_NLF2LS UTF8PROC_NLF2PS UTF8PROC_NLF2LF
UTF8PROC_STRIPCC UTF8PROC_CASEFOLD UTF8PROC_CHARBOUND UTF8PROC_LUMP UTF8PROC_STRIPMARK))
(C-macro (int (UTF8PROC_ERROR_NOMEM UTF8PROC_ERROR_OVERFLOW UTF8PROC_ERROR_INVALIDUTF8 UTF8PROC_ERROR_NOTASSIGNED UTF8PROC_ERROR_INVALIDOPTS)))
(int (UTF8PROC_CATEGORY_CN UTF8PROC_CATEGORY_LU UTF8PROC_CATEGORY_LL UTF8PROC_CATEGORY_LT UTF8PROC_CATEGORY_LM
UTF8PROC_CATEGORY_LO UTF8PROC_CATEGORY_MN UTF8PROC_CATEGORY_MC UTF8PROC_CATEGORY_ME UTF8PROC_CATEGORY_ND
UTF8PROC_CATEGORY_NL UTF8PROC_CATEGORY_NO UTF8PROC_CATEGORY_PC UTF8PROC_CATEGORY_PD UTF8PROC_CATEGORY_PS
UTF8PROC_CATEGORY_PE UTF8PROC_CATEGORY_PI UTF8PROC_CATEGORY_PF UTF8PROC_CATEGORY_PO UTF8PROC_CATEGORY_SM
UTF8PROC_CATEGORY_SC UTF8PROC_CATEGORY_SK UTF8PROC_CATEGORY_SO UTF8PROC_CATEGORY_ZS UTF8PROC_CATEGORY_ZL
UTF8PROC_CATEGORY_ZP UTF8PROC_CATEGORY_CC UTF8PROC_CATEGORY_CF UTF8PROC_CATEGORY_CS UTF8PROC_CATEGORY_CO))
(int (UTF8PROC_BIDI_CLASS_L UTF8PROC_BIDI_CLASS_LRE UTF8PROC_BIDI_CLASS_LRO UTF8PROC_BIDI_CLASS_R UTF8PROC_BIDI_CLASS_AL
UTF8PROC_BIDI_CLASS_RLE UTF8PROC_BIDI_CLASS_RLO UTF8PROC_BIDI_CLASS_PDF UTF8PROC_BIDI_CLASS_EN UTF8PROC_BIDI_CLASS_ES
UTF8PROC_BIDI_CLASS_ET UTF8PROC_BIDI_CLASS_AN UTF8PROC_BIDI_CLASS_CS UTF8PROC_BIDI_CLASS_NSM UTF8PROC_BIDI_CLASS_BN
UTF8PROC_BIDI_CLASS_B UTF8PROC_BIDI_CLASS_S UTF8PROC_BIDI_CLASS_WS UTF8PROC_BIDI_CLASS_ON UTF8PROC_BIDI_CLASS_LRI
UTF8PROC_BIDI_CLASS_RLI UTF8PROC_BIDI_CLASS_FSI UTF8PROC_BIDI_CLASS_PDI))
(int (UTF8PROC_DECOMP_TYPE_FONT UTF8PROC_DECOMP_TYPE_NOBREAK UTF8PROC_DECOMP_TYPE_INITIAL UTF8PROC_DECOMP_TYPE_MEDIAL
UTF8PROC_DECOMP_TYPE_FINAL UTF8PROC_DECOMP_TYPE_ISOLATED UTF8PROC_DECOMP_TYPE_CIRCLE UTF8PROC_DECOMP_TYPE_SUPER
UTF8PROC_DECOMP_TYPE_SUB UTF8PROC_DECOMP_TYPE_VERTICAL UTF8PROC_DECOMP_TYPE_WIDE UTF8PROC_DECOMP_TYPE_NARROW
UTF8PROC_DECOMP_TYPE_SMALL UTF8PROC_DECOMP_TYPE_SQUARE UTF8PROC_DECOMP_TYPE_FRACTION UTF8PROC_DECOMP_TYPE_COMPAT))
(int (UTF8PROC_BOUNDCLASS_START UTF8PROC_BOUNDCLASS_OTHER UTF8PROC_BOUNDCLASS_CR UTF8PROC_BOUNDCLASS_LF
UTF8PROC_BOUNDCLASS_CONTROL UTF8PROC_BOUNDCLASS_EXTEND UTF8PROC_BOUNDCLASS_L UTF8PROC_BOUNDCLASS_V
UTF8PROC_BOUNDCLASS_T UTF8PROC_BOUNDCLASS_LV UTF8PROC_BOUNDCLASS_LVT UTF8PROC_BOUNDCLASS_REGIONAL_INDICATOR
UTF8PROC_BOUNDCLASS_SPACINGMARK))
;; in version 2:
;; UTF8PROC_BOUNDCLASS_PREPEND UTF8PROC_BOUNDCLASS_ZWJ UTF8PROC_BOUNDCLASS_E_BASE
;; UTF8PROC_BOUNDCLASS_E_MODIFIER UTF8PROC_BOUNDCLASS_GLUE_AFTER_ZWJ UTF8PROC_BOUNDCLASS_E_BASE_GAZ
;; utf8proc_int32_t utf8proc_totitle(utf8proc_int32_t c)
;; utf8proc_bool utf8proc_grapheme_break_stateful(utf8proc_int32_t codepoint1, utf8proc_int32_t codepoint2, utf8proc_int32_t *state)
(char* utf8proc_version (void))
(char* utf8proc_errmsg (int))
(int utf8proc_tolower ((utf8proc_int32_t int)))
(int utf8proc_toupper ((utf8proc_int32_t int)))
(int utf8proc_islower (utf8proc_int32_t))
(int utf8proc_isupper (utf8proc_int32_t))
(int utf8proc_charwidth ((utf8proc_int32_t int)))
(int utf8proc_category ((utf8proc_int32_t int)))
(char* utf8proc_category_string ((utf8proc_int32_t int)))
(bool utf8proc_codepoint_valid ((utf8proc_int32_t int)))
(bool utf8proc_grapheme_break ((utf8proc_int32_t int) (utf8proc_int32_t int)))
(char* utf8proc_NFD (char*)) ; these return newly allocated memory -- should probably free it here
(char* utf8proc_NFC (char*))
(char* utf8proc_NFKD (char*))
(char* utf8proc_NFKC (char*))
(in-C "static s7_pointer g_utf8proc_iterate(s7_scheme *sc, s7_pointer args)
{
utf8proc_int32_t code_ref = 0;
s7_int len, res;
char *str;
str = (char *)s7_string(s7_car(args));
len = s7_integer(s7_cadr(args));
res = utf8proc_iterate(str, len, &code_ref);
return(s7_cons(sc, s7_make_integer(sc, res), s7_make_integer(sc, code_ref)));
}")
(C-function ("utf8proc_iterate" g_utf8proc_iterate "" 2))
(in-C "static s7_pointer g_utf8proc_encode_char(s7_scheme *sc, s7_pointer args)
{
ssize_t res;
utf8proc_uint8_t buf[8];
res = utf8proc_encode_char((utf8proc_int32_t)s7_integer(s7_car(args)), buf);
return(s7_list(sc, 2, s7_make_string_with_length(sc, buf, res), s7_make_integer(sc, res)));
}")
(C-function ("utf8proc_encode_char" g_utf8proc_encode_char "" 1))
(in-C "static s7_pointer g_utf8proc_reencode(s7_scheme *sc, s7_pointer args)
{
s7_pointer buffer, codepoints, options;
ssize_t res;
buffer = s7_car(args);
codepoints = s7_cadr(args);
options = s7_caddr(args);
res = utf8proc_reencode((utf8proc_int32_t *)s7_string(buffer),
(utf8proc_ssize_t)s7_integer(codepoints),
(utf8proc_option_t)s7_integer(options));
return(s7_make_integer(sc, res));
}")
(C-function ("utf8proc_reencode" g_utf8proc_reencode "" 3))
(in-C "static s7_pointer g_utf8proc_get_property(s7_scheme *sc, s7_pointer args)
{
const utf8proc_property_t *info;
info = utf8proc_get_property((utf8proc_int32_t)s7_integer(s7_car(args)));
return(s7_inlet(sc, s7_list(sc, 30,
s7_make_symbol(sc, \"category\"), s7_make_integer(sc, info->category),
s7_make_symbol(sc, \"combining_class\"), s7_make_integer(sc, info->combining_class),
s7_make_symbol(sc, \"bidi_class\"), s7_make_integer(sc, info->bidi_class),
s7_make_symbol(sc, \"decomp_type\"), s7_make_integer(sc, info->decomp_type),
#if (UTF8PROC_VERSION_MAJOR >= 2)
s7_make_symbol(sc, \"uppercase_seqindex\"), s7_make_integer(sc, info->uppercase_seqindex),
s7_make_symbol(sc, \"lowercase_seqindex\"), s7_make_integer(sc, info->lowercase_seqindex),
s7_make_symbol(sc, \"titlecase_seqindex\"), s7_make_integer(sc, info->titlecase_seqindex),
s7_make_symbol(sc, \"casefold_seqindex\"), s7_make_integer(sc, info->casefold_seqindex),
s7_make_symbol(sc, \"comb_index\"), s7_make_integer(sc, info->comb_index),
#else
s7_make_symbol(sc, \"uppercase_mapping\"), s7_make_integer(sc, info->uppercase_mapping),
s7_make_symbol(sc, \"lowercase_mapping\"), s7_make_integer(sc, info->lowercase_mapping),
s7_make_symbol(sc, \"titlecase_mapping\"), s7_make_integer(sc, info->titlecase_mapping),
s7_make_symbol(sc, \"comb1st_index\"), s7_make_integer(sc, info->comb1st_index),
s7_make_symbol(sc, \"comb2nd_index\"), s7_make_integer(sc, info->comb2nd_index),
#endif
s7_make_symbol(sc, \"bidi_mirrored\"), s7_make_integer(sc, info->bidi_mirrored),
s7_make_symbol(sc, \"comp_exclusion\"), s7_make_integer(sc, info->comp_exclusion),
s7_make_symbol(sc, \"ignorable\"), s7_make_integer(sc, info->ignorable),
s7_make_symbol(sc, \"control_boundary\"), s7_make_integer(sc, info->control_boundary),
s7_make_symbol(sc, \"boundclass\"), s7_make_integer(sc, info->boundclass),
s7_make_symbol(sc, \"charwidth\"), s7_make_integer(sc, info->charwidth))));
}")
(C-function ("utf8proc_get_property" g_utf8proc_get_property "" 1))
(in-C "static s7_pointer g_utf8proc_decompose_char(s7_scheme *sc, s7_pointer args)
{
s7_pointer code, opt, str;
int last_boundclass;
utf8proc_ssize_t size;
utf8proc_int32_t *dst;
ssize_t res;
code = s7_car(args);
str = s7_cadr(args);
opt = s7_caddr(args);
dst = (utf8proc_int32_t *)s7_string(str);
size = (utf8proc_ssize_t)s7_string_length(str);
res = utf8proc_decompose_char((utf8proc_int32_t)s7_integer(code), dst, size, (utf8proc_option_t)s7_integer(opt), &last_boundclass);
return(s7_make_integer(sc, res));
}")
(C-function ("utf8proc_decompose_char" g_utf8proc_decompose_char "" 3))
(in-C "static s7_pointer g_utf8proc_map(s7_scheme *sc, s7_pointer args) /* returns (cons string-or-#f size-of-string-or-error-integer) */
{
s7_pointer opt, str, p;
utf8proc_ssize_t res;
s7_int len;
utf8proc_uint8_t *dst, *new_str;
str = s7_car(args);
len = s7_string_length(str);
opt = s7_cadr(args);
new_str = (utf8proc_uint8_t *)malloc(len + 1);
memcpy((void *)new_str, (const void *)s7_string(str), len); /* online example uses len+1? */
res = utf8proc_map(new_str, len, &dst, (utf8proc_option_t)s7_integer(opt));
if (res < 0)
{
free(new_str);
return(s7_cons(sc, s7_f(sc), s7_make_integer(sc, res))); /* utf8proc library frees dst in this case */
}
p = s7_make_string_with_length(sc, dst, res);
free(new_str);
free(dst);
return(s7_cons(sc, p, s7_make_integer(sc, res)));
}")
(C-function ("utf8proc_map" g_utf8proc_map "" 2))
(in-C "static s7_pointer g_utf8proc_decompose(s7_scheme *sc, s7_pointer args)
{
s7_pointer opt, str;
s7_int len;
ssize_t res;
utf8proc_int32_t *dst;
str = s7_car(args);
opt = s7_cadr(args);
len = s7_string_length(str);
dst = (utf8proc_int32_t *)malloc(len * 4);
res = utf8proc_decompose((const utf8proc_uint8_t *)s7_string(str), len, dst, len, (utf8proc_option_t)s7_integer(opt));
if (res < 0) return(s7_make_integer(sc, res));
return(s7_make_string_with_length(sc, (char *)dst, res));
}")
(C-function ("utf8proc_decompose" g_utf8proc_decompose "" 2))
)
"" "utf8proc.h" "" "-lutf8proc" "utf8proc_s7")
(curlet))))
*libutf8proc*
|