File: ThaiLogical-Latin.xml

package info (click to toggle)
python-babel 1.3%2Bdfsg.1-5
links: PTS, VCS
area: main
in suites: jessie, jessie-kfreebsd
size: 69,060 kB
ctags: 1,644
sloc: xml: 582,663; python: 9,389; makefile: 216; sh: 188
file content (160 lines) | stat: -rw-r--r-- 9,604 bytes
<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE supplementalData SYSTEM "../../common/dtd/ldmlSupplemental.dtd">
<!--
Copyright © 1991-2013 Unicode, Inc.
CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/)
For terms of use, see http://www.unicode.org/copyright.html
-->
<supplementalData>
  <version number="$Revision: 8242 $"/>
	<generation date="$Date: 2013-02-24 21:06:02 -0600 (Sun, 24 Feb 2013) $"/>
	<transforms>
		<transform source="ThaiLogical" target="Latin" direction="both" visibility="internal">
			<comment># Thai-Latin</comment>
			<comment># This set of rules follows ISO 11940</comment>
			<comment>#\u0009see http://homepage.mac.com/sirbinks/pdf/Thai.r2.pdf</comment>
			<comment># except that that does not mention an implicit vowel, so we use ọ</comment>
			<comment>#</comment>
			<comment># The transcription is fairly ugly, so we ought to also do the UNGEGN version</comment>
			<comment>#\u0009see: http://www.eki.ee/wgrs/rom1_th.pdf</comment>
			<comment># and probably make that the main variant.</comment>
			<comment># Note: this is an internal file. The NFD/NFC is handled externally, in the index</comment>
			<comment># The insertion of spaces between words, the reversal of the vowels</comment>
			<comment># and the conversion of space to semicolon are done *outside* of these rules.</comment>
			<comment># So as far as these rules are concerned, the vowels are in logical order!</comment>
			<comment># insert implicit vowel (and remove it going the other way)</comment>
			<comment># COMMENTED out: the implicit vowel positions cannot be predicted algorithmically</comment>
			<comment>#$consonant = [ก-ฮ];</comment>
			<comment>#$vowel = [ะ-ฺเ-ไ็];</comment>
			<comment>#{ ( $consonant ) } [^$vowel \uE000] → | $1 \uE000 ;</comment>
			<comment>#\uE000 → ọ ;</comment>
			<comment># ← ọ ;</comment>
			<tRule>$notAbove = [^\p{ccc=0}\p{ccc=above}] ;</tRule>
			<tRule>$notBelow = [^\p{ccc=0}\p{ccc=below}] ;</tRule>
			<comment># Consonants</comment>
			<comment># Warning: the 'h's need to be handled carefully!</comment>
			<comment># What we really want to say is the following, but we can't</comment>
			<comment># $notHAccent = !($notAbove*   ̄ | $notBelow*   ̣) ;</comment>
			<comment># Since the only accents we care about that could cause problems are free-standing accents below, we use instead:</comment>
			<tRule>$freeStandingBelow = [̥  ];</tRule>
			<tRule>$hAccent =  [   ̄     ̣];</tRule>
			<tRule>$notHAccent0 = [^$freeStandingBelow$hAccent];</tRule>
			<tRule>$notHAccent1 = $freeStandingBelow [^$hAccent];</tRule>
			<tRule>ห → h̄ ; # THAI CHARACTER HO HIP</tRule>
			<tRule>ห | $1 ← h ($notAbove*)    ̄; # backward case, account for reordering</tRule>
			<tRule>ฮ ↔ ḥ ; # THAI CHARACTER HO NOKHUK</tRule>
			<tRule>ข ↔ k̄h ; # THAI CHARACTER KHO KHAI</tRule>
			<tRule>ฃ ↔ ḳ̄h ; # THAI CHARACTER KHO KHUAT</tRule>
			<tRule>ฅ ↔ kʹh ; # THAI CHARACTER KHO KHON</tRule>
			<tRule>ฆ ↔ ḳh ; # THAI CHARACTER KHO RAKHANG</tRule>
			<tRule>ค ← kh } $notHAccent1 ; # THAI CHARACTER KHO KHWAI</tRule>
			<tRule>ค ↔ kh } $notHAccent0 ; # THAI CHARACTER KHO KHWAI</tRule>
			<tRule>ก ↔ k ; # THAI CHARACTER KO KAI</tRule>
			<tRule>ภ ↔ p̣h ; # THAI CHARACTER PHO SAMPHAO</tRule>
			<tRule>ผ ↔ p̄h ; # THAI CHARACTER PHO PHUNG</tRule>
			<tRule>พ ← ph } $notHAccent1 ; # THAI CHARACTER PHO PHAN</tRule>
			<tRule>พ ↔ ph } $notHAccent0 ; # THAI CHARACTER PHO PHAN</tRule>
			<tRule>ป ↔ p ; # THAI CHARACTER PO PLA</tRule>
			<tRule>ฉ ↔ c̄h ; # THAI CHARACTER CHO CHING</tRule>
			<tRule>ฌ ↔ c̣h ; # THAI CHARACTER CHO CHOE</tRule>
			<tRule>ช ← ch } $notHAccent1 ; # THAI CHARACTER CHO CHANG</tRule>
			<tRule>ช ↔ ch } $notHAccent0 ; # THAI CHARACTER CHO CHANG</tRule>
			<tRule>จ ↔ c ; # THAI CHARACTER CHO CHAN</tRule>
			<tRule>ฐ ↔ ṭ̄h ; # THAI CHARACTER THO THAN</tRule>
			<tRule>ฑ ↔ ṯh ; # THAI CHARACTER THO NANGMONTHO</tRule>
			<tRule>ฒ ↔ tʹh ; # THAI CHARACTER THO PHUTHAO</tRule>
			<tRule>ถ ↔ t̄h ; # THAI CHARACTER THO THUNG</tRule>
			<tRule>ธ ↔ ṭh ; # THAI CHARACTER THO THONG</tRule>
			<tRule>ท ← th } $notHAccent1 ; # THAI CHARACTER THO THAHAN</tRule>
			<tRule>ท ↔ th } $notHAccent0 ; # THAI CHARACTER THO THAHAN</tRule>
			<comment>#Note: TO PATAK deviates from ISO since t-dotunder + h would be ambigous. So it uses vertical tick.</comment>
			<tRule>ฏ ↔ t̩ ; # THAI CHARACTER TO PATAK</tRule>
			<tRule>ต ↔ t ; # THAI CHARACTER TO TAO</tRule>
			<comment># since there is no singleton g (generated), don't worry about that.</comment>
			<tRule>ง ↔ ng ; # THAI CHARACTER NGO NGU</tRule>
			<tRule>ณ ↔ ṇ ; # THAI CHARACTER NO NEN</tRule>
			<tRule>น ↔ n ; # THAI CHARACTER NO NU</tRule>
			<tRule>ญ ↔ ỵ  ; # THAI CHARACTER YO YING</tRule>
			<tRule>ฎ ↔ ḍ ; # THAI CHARACTER DO CHADA</tRule>
			<tRule>ด ↔ d ; # THAI CHARACTER DO DEK</tRule>
			<tRule>บ ↔ b ; # THAI CHARACTER BO BAIMAI</tRule>
			<tRule>ฝ ↔ f̄ ; # THAI CHARACTER FO FA</tRule>
			<tRule>ฝ | $1 ← f ($notAbove*)    ̄; # backward case, account for reordering</tRule>
			<tRule>ม ↔ m ; # THAI CHARACTER MO MA</tRule>
			<tRule>ย ↔ y ; # THAI CHARACTER YO YAK</tRule>
			<tRule>ร ↔ r ; # THAI CHARACTER RO RUA</tRule>
			<tRule>ฤ ↔ v ; # THAI CHARACTER RU</tRule>
			<tRule>ฦ ↔ ł ; # THAI CHARACTER LU</tRule>
			<tRule>ว ↔ w ; # THAI CHARACTER WO WAEN</tRule>
			<tRule>ศ ↔ ṣ̄ ; # THAI CHARACTER SO SALA***</tRule>
			<tRule>ศ | $1 ← s    ̣ ($notAbove*)    ̄; # backward case, account for reordering</tRule>
			<tRule>ษ ↔ s̄ʹ ; # THAI CHARACTER SO RUSI</tRule>
			<tRule>ส → s̄ ; # THAI CHARACTER SO SUA***</tRule>
			<tRule>ส | $1 ← s ($notAbove*)    ̄; # backward case, account for reordering</tRule>
			<tRule>ฬ ↔ ḷ ; # THAI CHARACTER LO CHULA</tRule>
			<tRule>ล ↔ l ; # THAI CHARACTER LO LING</tRule>
			<tRule>ฟ ↔ f ; # THAI CHARACTER FO FAN</tRule>
			<tRule>อ ↔ x ; # THAI CHARACTER O ANG</tRule>
			<tRule>ซ ↔ s ; # THAI CHARACTER SO SO</tRule>
			<comment># vowels</comment>
			<tRule>ั ↔ ạ ; # THAI CHARACTER MAI HAN-AKAT</tRule>
			<tRule>า → ā ; # THAI CHARACTER SARA AA</tRule>
			<tRule>า | $1 ← a ($notAbove*)    ̄; # backward case, account for reordering</tRule>
			<comment># We deviate from ISO for SARA AM for disambiguation</comment>
			<tRule>ำ → a  ̉; # THAI CHARACTER SARA AM</tRule>
			<tRule>ำ | $1 ← a ($notAbove*)  ̉ ; # backward case, account for reordering</tRule>
			<tRule>ะ ↔ a ; # THAI CHARACTER SARA A</tRule>
			<tRule>ี ↔ ī ; # THAI CHARACTER SARA II</tRule>
			<tRule>ี | $1 ← i ($notAbove*)    ̄  ; # backward case, account for reordering</tRule>
			<tRule>ื ↔ ụ̄ ; # THAI CHARACTER SARA UEE</tRule>
			<tRule>ื | $1 ← u   ̣ ($notAbove*)    ̄  ; # backward case, account for reordering</tRule>
			<tRule>ึ ↔ ụ ; # THAI CHARACTER SARA UE</tRule>
			<tRule>ู ↔ ū ; # THAI CHARACTER SARA UU</tRule>
			<tRule>ู | $1 ← u  ($notAbove*)    ̄  ; # backward case, account for reordering</tRule>
			<tRule>ุ ↔ u ; # THAI CHARACTER SARA U</tRule>
			<tRule>ฯ ↔ ‡ ; # THAI CHARACTER PAIYANNOI</tRule>
			<comment># ฿ ↔ XXX ; # THAI CURRENCY SYMBOL BAHT</comment>
			<tRule>เ ↔ e ; # THAI CHARACTER SARA E</tRule>
			<tRule>แ ↔ æ ; # THAI CHARACTER SARA AE</tRule>
			<tRule>โ ↔ o ; # THAI CHARACTER SARA O</tRule>
			<tRule>ใ ↔ ı ; # THAI CHARACTER SARA AI MAIMUAN</tRule>
			<tRule>ไ ↔ ị ; # THAI CHARACTER SARA AI MAIMALAI</tRule>
			<tRule>ๅ ↔ ɨ ; # THAI CHARACTER LAKKHANGYAO</tRule>
			<tRule>็ ↔ ̆ ; # THAI CHARACTER MAITAIKHU</tRule>
			<tRule>่ ↔ ̀ ; # THAI CHARACTER MAI EK</tRule>
			<tRule>้ ↔ ̂ ; # THAI CHARACTER MAI THO</tRule>
			<tRule>๊ ↔ ́ ; # THAI CHARACTER MAI TRI</tRule>
			<tRule>๋ ↔ ̌ ; # THAI CHARACTER MAI CHATTAWA</tRule>
			<tRule>์ ↔ ̒ ; # THAI CHARACTER THANTHAKHAT</tRule>
			<tRule>๎ ↔ '~' ; # THAI CHARACTER YAMAKKAN</tRule>
			<comment># We deviate from ISO for disambiguation</comment>
			<tRule>ํ ↔  ̊ ; # THAI CHARACTER NIKHAHIT</tRule>
			<tRule>๏ ↔ '§' ; # THAI CHARACTER FONGMAN</tRule>
			<tRule>๐ ↔ 0 ; # THAI DIGIT ZERO</tRule>
			<tRule>๑ ↔ 1 ; # THAI DIGIT ONE</tRule>
			<tRule>๒ ↔ 2 ; # THAI DIGIT TWO</tRule>
			<tRule>๓ ↔ 3 ; # THAI DIGIT THREE</tRule>
			<tRule>๔ ↔ 4 ; # THAI DIGIT FOUR</tRule>
			<tRule>๕ ↔ 5 ; # THAI DIGIT FIVE</tRule>
			<tRule>๖ ↔ 6 ; # THAI DIGIT SIX</tRule>
			<tRule>๗ ↔ 7 ; # THAI DIGIT SEVEN</tRule>
			<tRule>๘ ↔ 8 ; # THAI DIGIT EIGHT</tRule>
			<tRule>๙ ↔ 9 ; # THAI DIGIT NINE</tRule>
			<tRule>๚ ↔ '||' ; # THAI CHARACTER ANGKHANKHU</tRule>
			<tRule>๛ ↔ » ; # THAI CHARACTER KHOMUT</tRule>
			<tRule>ๆ ↔ « ; # THAI CHARACTER MAIYAMOK</tRule>
			<comment># moved down to make shorter first</comment>
			<comment>#Note: PHINTHU deviates from ISO since underring causes canonical problems. So it uses spacing tick below.</comment>
			<tRule>ฺ ↔ ˌ ; # THAI CHARACTER PHINTHU</tRule>
			<tRule>ิ ↔ i ; # THAI CHARACTER SARA I</tRule>
			<comment># fallbacks</comment>
			<tRule>| k ← g ;</tRule>
			<tRule>| k ← h ;</tRule>
			<tRule>| c ← j ;</tRule>
			<tRule>| k ← q ;</tRule>
			<tRule>| s ← z ;</tRule>
			<tRule>:: (lower);</tRule>
		</transform>
	</transforms>
</supplementalData>