1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69
|
require "test/unit"
TEST_ROOT = File.dirname(__FILE__)
require File.join(TEST_ROOT, "..", "lib", "unidecode")
class UnidecodeTest < Test::Unit::TestCase
# Silly phrases courtesy of Frank da Cruz
# http://www.columbia.edu/kermit/utf8.html
DONT_CONVERT = [
"Vitrum edere possum; mihi non nocet.", # Latin
"Je puis mangier del voirre. Ne me nuit.", # Old French
"Kristala jan dezaket, ez dit minik ematen.", # Basque
"Kaya kong kumain nang bubog at hindi ako masaktan.", # Tagalog
"Ich kann Glas essen, ohne mir weh zu tun.", # German
"I can eat glass and it doesn't hurt me.", # English
]
CONVERT_PAIRS = {
"Je peux manger du verre, ça ne me fait pas de mal." => # French
"Je peux manger du verre, ca ne me fait pas de mal.",
"Pot să mănânc sticlă și ea nu mă rănește." => # Romanian
"Pot sa mananc sticla si ea nu ma raneste.",
"Ég get etið gler án þess að meiða mig." => # Icelandic
"Eg get etid gler an thess ad meida mig.",
"Unë mund të ha qelq dhe nuk më gjen gjë." => # Albanian
"Une mund te ha qelq dhe nuk me gjen gje.",
"Mogę jeść szkło i mi nie szkodzi." => # Polish
"Moge jesc szklo i mi nie szkodzi.",
"Я могу есть стекло, оно мне не вредит." => # Russian
"Ia moghu iest' stieklo, ono mnie nie vriedit.",
"Мога да ям стъкло, то не ми вреди." => # Bulgarian
"Mogha da iam stklo, to nie mi vriedi.",
"ᛁᚳ᛫ᛗᚨᚷ᛫ᚷᛚᚨᛋ᛫ᛖᚩᛏᚪᚾ᛫ᚩᚾᛞ᛫ᚻᛁᛏ᛫ᚾᛖ᛫ᚻᛖᚪᚱᛗᛁᚪᚧ᛫ᛗᛖ᛬" => # Anglo-Saxon
"ic.mag.glas.eotacn.ond.hit.ne.heacrmiacth.me:",
"ὕαλον ϕαγεῖν δύναμαι· τοῦτο οὔ με βλάπτει" => # Classical Greek
"ualon phagein dunamai; touto ou me blaptei",
"मैं काँच खा सकता हूँ और मुझे उससे कोई चोट नहीं पहुंचती" => # Hindi
"maiN kaaNc khaa sktaa huuN aur mujhe usse koii cott nhiiN phuNctii",
"من می توانم بدونِ احساس درد شيشه بخورم" => # Persian
"mn my twnm bdwni Hss drd shyshh bkhwrm",
"أنا قادر على أكل الزجاج و هذا لا يؤلمن" => # Arabic
"'n qdr `l~ 'kl lzjj w hdh l yw'lmn",
"אני יכול לאכול זכוכית וזה לא מזיק לי" => # Hebrew
"ny ykvl lkvl zkvkyt vzh l mzyq ly",
"ฉันกินกระจกได้ แต่มันไม่ทำให้ฉันเจ็บ" => # French
"chankinkracchkaid aetmanaimthamaihchanecchb",
"我能吞下玻璃而不伤身体。" => # Chinese
"Wo Neng Tun Xia Bo Li Er Bu Shang Shen Ti . ",
"私はガラスを食べられます。それは私を傷つけません。" => # French
"Si hagarasuwoShi beraremasu. sorehaSi woShang tukemasen. ",
}
def test_unicode_decode
DONT_CONVERT.each do |ascii|
assert_equal ascii, Unidecoder::decode(ascii)
end
CONVERT_PAIRS.each do |unicode, ascii|
assert_equal ascii, Unidecoder::decode(unicode)
end
end
def test_to_ascii
DONT_CONVERT.each do |ascii|
assert_equal ascii, ascii.to_ascii
end
CONVERT_PAIRS.each do |unicode, ascii|
assert_equal ascii, unicode.to_ascii
end
end
end
|