1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130
|
#include "common.h"
#undef UChar
#include <string>
#include <unicode/translit.h>
extern "C" {
#ifdef HAVE_RUBY_ENCODING_H
#include <ruby/encoding.h>
static VALUE rb_eEncodingCompatibilityError;
static void check_utf8_encoding(VALUE str) {
static rb_encoding *_cached[3] = {NULL, NULL, NULL};
rb_encoding *enc;
if (_cached[0] == NULL) {
_cached[0] = rb_utf8_encoding();
_cached[1] = rb_usascii_encoding();
_cached[2] = rb_ascii8bit_encoding();
}
enc = rb_enc_get(str);
if (enc != _cached[0] && enc != _cached[1] && enc != _cached[2]) {
rb_raise(rb_eEncodingCompatibilityError,
"Input must be UTF-8 or US-ASCII, %s given", rb_enc_name(enc));
}
}
#else
static void check_utf8_encoding(VALUE str) {}
#endif
extern VALUE rb_mCharlockHolmes;
static VALUE rb_cTransliterator;
static VALUE rb_transliterator_id_list(VALUE self) {
UErrorCode status = U_ZERO_ERROR;
icu::StringEnumeration *id_list;
int32_t id_list_size;
const char *curr_id;
int32_t curr_id_len;
VALUE rb_ary;
VALUE rb_curr_id;
id_list_size = 0;
id_list = icu::Transliterator::getAvailableIDs(status);
if(!U_SUCCESS(status)) {
rb_raise(rb_eArgError, "%s", u_errorName(status));
}
status = U_ZERO_ERROR;
id_list_size = id_list->count(status);
if(!U_SUCCESS(status)) {
rb_raise(rb_eArgError, "%s", u_errorName(status));
}
rb_ary = rb_ary_new2(id_list_size);
do {
curr_id_len = 0;
curr_id = id_list->next(&curr_id_len, status);
if(!U_SUCCESS(status)) {
rb_raise(rb_eArgError, "%s", u_errorName(status));
}
if (curr_id != NULL) {
rb_curr_id = charlock_new_str(curr_id, curr_id_len);
rb_ary_push(rb_ary, rb_curr_id);
}
} while(curr_id != NULL);
delete id_list;
return rb_ary;
}
static VALUE rb_transliterator_transliterate(VALUE self, VALUE rb_txt, VALUE rb_id) {
UErrorCode status = U_ZERO_ERROR;
UParseError p_error;
icu::Transliterator *trans;
const char *txt;
size_t txt_len;
const char *id;
size_t id_len;
icu::UnicodeString *u_txt;
std::string result;
VALUE rb_out;
Check_Type(rb_txt, T_STRING);
Check_Type(rb_id, T_STRING);
check_utf8_encoding(rb_txt);
check_utf8_encoding(rb_id);
txt = RSTRING_PTR(rb_txt);
txt_len = RSTRING_LEN(rb_txt);
id = RSTRING_PTR(rb_id);
id_len = RSTRING_LEN(rb_id);
trans = icu::Transliterator::createInstance(icu::UnicodeString(id, id_len), UTRANS_FORWARD, p_error, status);
if(!U_SUCCESS(status)) {
rb_raise(rb_eArgError, "%s", u_errorName(status));
}
u_txt = new icu::UnicodeString(txt, txt_len);
trans->transliterate(*u_txt);
icu::StringByteSink<std::string> sink(&result);
u_txt->toUTF8(sink);
delete u_txt;
delete trans;
rb_out = charlock_new_str(result.data(), result.length());
return rb_out;
}
void _init_charlock_transliterator(void) {
#ifdef HAVE_RUBY_ENCODING_H
rb_eEncodingCompatibilityError = rb_const_get(rb_cEncoding, rb_intern("CompatibilityError"));
#endif
rb_cTransliterator = rb_define_class_under(rb_mCharlockHolmes, "Transliterator", rb_cObject);
rb_define_singleton_method(rb_cTransliterator, "id_list", (VALUE(*)(...))rb_transliterator_id_list, 0);
rb_define_singleton_method(rb_cTransliterator, "transliterate", (VALUE(*)(...))rb_transliterator_transliterate, 2);
}
}
|