1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241
|
--===========================================================================--
-- Serbian --
--===========================================================================--
local translit = thirddata.translit
local pcache = translit.parser_cache
local lpegmatch = lpeg.match
-- Special thanks to Mojca Miklavec and Arthur Reutenauer for their
-- assistance in creating these transliteration routines.
if not translit.done_serbian then
--------------------------------------------
-- Lowercase Serbian (Cyrillic -> Latin) --
--------------------------------------------
translit.sr_tolt_lower = translit.make_add_dict{
["а"] = "a",
["б"] = "b",
["в"] = "v",
["г"] = "g",
["д"] = "d",
["ђ"] = "đ",
["е"] = "e",
["ж"] = "ž",
["з"] = "z",
["и"] = "i",
["ј"] = "j",
["к"] = "k",
["л"] = "l",
["љ"] = "lj",
["м"] = "m",
["н"] = "n",
["њ"] = "nj",
["о"] = "o",
["п"] = "p",
["р"] = "r",
["с"] = "s",
["т"] = "t",
["ћ"] = "ć",
["у"] = "u",
["ф"] = "f",
["х"] = "h",
["ц"] = "c",
["ч"] = "č",
["џ"] = "dž",
["ш"] = "š",
}
translit.tables["Serbian Cyr->Lat Transliteration lowercase"] = translit.sr_tolt_lower
--------------------------------------------
-- Uppercase Serbian (Cyrillic -> Latin) --
--------------------------------------------
translit.sr_tolt_upper = translit.make_add_dict{
["А"] = "A",
["Б"] = "B",
["В"] = "V",
["Г"] = "G",
["Д"] = "D",
["Ђ"] = "Đ",
["Е"] = "E",
["Ж"] = "Ž",
["З"] = "Z",
["И"] = "I",
["Ј"] = "J",
["К"] = "K",
["Л"] = "L",
["Љ"] = "Lj",
["М"] = "M",
["Н"] = "N",
["Њ"] = "Nj",
["О"] = "O",
["П"] = "P",
["Р"] = "R",
["С"] = "S",
["Т"] = "T",
["Ћ"] = "Ć",
["У"] = "U",
["Ф"] = "F",
["Х"] = "H",
["Ц"] = "C",
["Ч"] = "Č",
["Џ"] = "Dž",
["Ш"] = "Š",
}
translit.tables["Serbian Cyr->Lat Transliteration uppercase"] = translit.sr_tolt_upper
local function __inverse_tab (t)
local result = { }
for k,v in next,t do result[v] = k end
return result
end
translit.sr_tocy_lower = translit.make_add_dict(__inverse_tab(translit.sr_tolt_lower))
translit.sr_tocy_upper = translit.make_add_dict(__inverse_tab(translit.sr_tolt_upper))
--- Good reading up front:
--- <http://en.wikipedia.org/wiki/User:Aleksandar_Šušnjar/Serbian_Wikipedia's_Challenges#Real-time_transliteration_for_display>
--- <http://www.vokabular.org/forum/index.php?topic=3817.15>
local except = {
["konjug"] = "конјуг",
["konjunk"] = "конјунк",
["injekc"] = "инјекц",
["injunkt"] = "инјункт",
["panjelin"] = "панјелин",
["tanjug"] = "танјуг",
["vanjezič"] = "ванјезич",
["vanjadransk"] = "ванјадранск",
["nadžanj"] = "наджањ",
["nadždrel"] = "надждрел",
["nadžet"] = "наджет",
["nadživ"] = "наджив",
["nadžnj"] = "наджњ",
["nadžup"] = "наджуп",
["odžal"] = "оджал",
["odžar"] = "оджар",
["odživ"] = "оджив",
["odžubor"] = "оджубор",
["odžur"] = "оджур",
["odžvak"] = "оджвак",
["podžanr"] = "поджанр",
["podže"] = "подже", -- “поджећи”
}
local P = lpeg.P
local sub, upper = unicode.utf8.sub, unicode.utf8.upper
local p_tocy, p_i_tocy, p_tolt, p_i_tolt
for left, right in next, except do -- generating exception patterns for both sides
local Left = upper(sub(left, 1, 1)) .. sub(left, 2)
local Right = upper(sub(right, 1, 1)) .. sub(right, 2)
local LEFT, RIGHT = upper(left), upper(right)
local p_i_left = P(left) / right + P(Left) / Right + P(LEFT) / RIGHT
local p_i_right = P(right) / left + P(Right) / Left + P(RIGHT) / LEFT
local p_left = P" " * p_i_left
local p_right = P" " * p_i_right
if not p_tocy then
p_tocy = p_left
p_i_tocy = p_i_left
p_tolt = p_right
p_i_tolt = p_i_right
else
p_tocy = p_tocy + p_left
p_i_tocy = p_i_tocy + p_i_left
p_tolt = p_tolt + p_right
p_i_tolt = p_i_tolt + p_i_right
end
end
local _p_hintchar = P"*" / ""
local hintme = "dln"
local _p_tocy_hint, _p_tolt_hint
for left in hintme:utfcharacters() do
local right = translit.sr_tocy_lower[left]
local LEFT, RIGHT = upper(left), upper(right)
if not _p_tocy_hint then
_p_tocy_hint = P(left) / right + P(LEFT) / RIGHT
_p_tolt_hint = P(right) / left + P(RIGHT) / LEFT
else
_p_tocy_hint = _p_tocy_hint + P(left) / right + P(LEFT) / RIGHT
_p_tolt_hint = _p_tolt_hint + P(right) / left + P(RIGHT) / LEFT
end
end
translit.serbian_exceptions = { }
translit.serbian_exceptions.p_tocy = p_tocy
translit.serbian_exceptions.p_tolt = p_tolt
translit.serbian_exceptions.p_tocy_init = p_i_tocy
translit.serbian_exceptions.p_tolt_init = p_i_tolt
translit.serbian_exceptions.p_tocy_hint = _p_tocy_hint * _p_hintchar
translit.serbian_exceptions.p_tolt_hint = _p_tolt_hint * _p_hintchar
translit.done_serbian = true
end
--===========================================================================--
-- End Of Tables --
--===========================================================================--
local t = translit
local function sr (mode)
local P, R, Cs = lpeg.P, lpeg.R, lpeg.Cs
local utfchar = translit.utfchar
local modestr = "p_" .. mode:match("to..$")
local _p_sre = t.serbian_exceptions[modestr]
local _p_sre_i = t.serbian_exceptions[modestr .. "_init"]
local trl_sr = translit.make_add_dict{}
trl_sr = t[mode.."_upper"] + t[mode.."_lower"]
-- transliteration from latin script requires macro handling …
local _p_macro = P[[\]] * R("az", "AZ")^1 -- assuming standard catcodes
local _p_sr = translit.addrules (trl_sr, _p_sr) / trl_sr
if translit.hinting then
_p_sr = t.serbian_exceptions[modestr .. "_hint"] + _p_sr
end
local p_sr
if translit.sr_except then
p_sr = Cs(_p_sre_i^-1 * (_p_macro + _p_sre + _p_sr + utfchar)^0)
else
p_sr = Cs((_p_macro + _p_sr + utfchar)^0)
end
return p_sr
end
translit.methods["sr_tolt"] = function (text)
local pname = "sr_tolt" .. tostring(translit.hinting) .. tostring(translit.sr_except)
local p = pcache[pname]
if not p then
p = sr("sr_tolt")
pcache[pname] = p
end
return lpegmatch(p, text)
end
translit.methods["sr_tocy"] = function (text)
local pname = "sr_tocy" .. tostring(translit.hinting) .. tostring(translit.sr_except)
local p = pcache[pname]
if not p then
p = sr("sr_tocy")
pcache[pname] = p
end
return lpegmatch(p, text)
end
-- vim:ft=lua:sw=4:ts=4
|