File: romkan.rb

package info (click to toggle)
ruby-romkan 0.4.0-3
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 92 kB
  • sloc: ruby: 348; makefile: 2
file content (306 lines) | stat: -rw-r--r-- 8,551 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
# -*- coding: utf-8 -*-
#
# Ruby/Romkan - a Romaji <-> Kana conversion library for Ruby.
#
# Copyright (C) 2001 Satoru Takabayashi <satoru@namazu.org>
#     All rights reserved.
#     This is free software with ABSOLUTELY NO WARRANTY.
#
# You can redistribute it and/or modify it under the terms of 
# the Ruby's licence.
$LOAD_PATH << File.dirname(File.expand_path(__FILE__))
require 'romkan/version'
class Array
  def pairs(s=2)
    0.step(self.size-1,s){
      |x| yield self.slice(x,s)
    }
  end
end

class String
# This table is imported from KAKASI <http://kakasi.namazu.org/> and modified.
  KUNREITAB = "\
ぁ	xa	あ	a	ぃ	xi	い	i	ぅ	xu
う	u	う゛	vu	う゛ぁ	va	う゛ぃ	vi 	う゛ぇ	ve
う゛ぉ	vo	ぇ	xe	え	e	ぉ	xo	お	o 

か	ka	が	ga	き	ki	きゃ	kya	きゅ	kyu 
きょ	kyo	ぎ	gi	ぎゃ	gya	ぎゅ	gyu	ぎょ	gyo 
く	ku	ぐ	gu	け	ke	げ	ge	こ	ko
ご	go 

さ	sa	ざ	za	し	si	しゃ	sya	しゅ	syu 
しょ	syo	じ	zi	じゃ	zya	じゅ	zyu	じょ	zyo 
す	su	ず	zu	せ	se	ぜ	ze	そ	so
ぞ	zo 

た	ta	だ	da	ち	ti	ちゃ	tya	ちゅ	tyu 
ちょ	tyo	ぢ	di	ぢゃ	dya	ぢゅ	dyu	ぢょ	dyo 

っ	xtu 
っう゛	vvu	っう゛ぁ	vva	っう゛ぃ	vvi 
っう゛ぇ	vve	っう゛ぉ	vvo 
っか	kka	っが	gga	っき	kki	っきゃ	kkya 
っきゅ	kkyu	っきょ	kkyo	っぎ	ggi	っぎゃ	ggya 
っぎゅ	ggyu	っぎょ	ggyo	っく	kku	っぐ	ggu 
っけ	kke	っげ	gge	っこ	kko	っご	ggo	っさ	ssa 
っざ	zza	っし	ssi	っしゃ	ssya 
っしゅ	ssyu	っしょ	ssho 
っじ	zzi	っじゃ	zzya	っじゅ	zzyu	っじょ	zzyo 
っす	ssu	っず	zzu	っせ	sse	っぜ	zze	っそ	sso 
っぞ	zzo	った	tta	っだ	dda	っち	tti 
っちゃ	ttya	っちゅ	ttyu	っちょ	ttyo	っぢ	ddi 
っぢゃ	ddya	っぢゅ	ddyu	っぢょ	ddyo	っつ	ttu 
っづ	ddu	って	tte	っで	dde	っと	tto	っど	ddo 
っは	hha	っば	bba	っぱ	ppa	っひ	hhi 
っひゃ	hhya	っひゅ	hhyu	っひょ	hhyo	っび	bbi 
っびゃ	bbya	っびゅ	bbyu	っびょ	bbyo	っぴ	ppi 
っぴゃ	ppya	っぴゅ	ppyu	っぴょ	ppyo	っふ	hhu 
っふぁ	ffa	っふぃ	ffi	っふぇ	ffe	っふぉ	ffo 
っぶ	bbu	っぷ	ppu	っへ	hhe	っべ	bbe	っぺ    ppe
っほ	hho	っぼ	bbo	っぽ	ppo	っや	yya	っゆ	yyu 
っよ	yyo	っら	rra	っり	rri	っりゃ	rrya 
っりゅ	rryu	っりょ	rryo	っる	rru	っれ	rre 
っろ	rro 

つ	tu	づ	du	て	te	で	de	と	to
ど	do 

な	na	に	ni	にゃ	nya	にゅ	nyu	にょ	nyo 
ぬ	nu	ね	ne	の	no 

は	ha	ば	ba	ぱ	pa	ひ	hi	ひゃ	hya 
ひゅ	hyu	ひょ	hyo	び	bi	びゃ	bya	びゅ	byu 
びょ	byo	ぴ	pi	ぴゃ	pya	ぴゅ	pyu	ぴょ	pyo 
ふ	hu	ふぁ	fa	ふぃ	fi	ふぇ	fe	ふぉ	fo 
ぶ	bu	ぷ	pu	へ	he	べ	be	ぺ	pe
ほ	ho	ぼ	bo	ぽ	po 

ま	ma	み	mi	みゃ	mya	みゅ	myu	みょ	myo 
む	mu	め	me	も	mo 

ゃ	xya	や	ya	ゅ	xyu	ゆ	yu	ょ	xyo
よ	yo

ら	ra	り	ri	りゃ	rya	りゅ	ryu	りょ	ryo 
る	ru	れ	re	ろ	ro 

ゎ	xwa	わ	wa	ゐ	wi	ゑ	we
を	wo	ん	n 

ん     n'
でぃ   dyi
ー     -
ちぇ    tye
っちぇ	ttye
じぇ	zye
"

  HEPBURNTAB = "\
ぁ	xa	あ	a	ぃ	xi	い	i	ぅ	xu
う	u	う゛	vu	う゛ぁ	va	う゛ぃ	vi	う゛ぇ	ve
う゛ぉ	vo	ぇ	xe	え	e	ぉ	xo	お	o


か	ka	が	ga	き	ki	きゃ	kya	きゅ	kyu
きょ	kyo	ぎ	gi	ぎゃ	gya	ぎゅ	gyu	ぎょ	gyo
く	ku	ぐ	gu	け	ke	げ	ge	こ	ko
ご	go	

さ	sa	ざ	za	し	shi	しゃ	sha	しゅ	shu
しょ	sho	じ	ji	じゃ	ja	じゅ	ju	じょ	jo
す	su	ず	zu	せ	se	ぜ	ze	そ	so
ぞ	zo

た	ta	だ	da	ち	chi	ちゃ	cha	ちゅ	chu
ちょ	cho	ぢ	di	ぢゃ	dya	ぢゅ	dyu	ぢょ	dyo

っ	xtsu	
っう゛	vvu	っう゛ぁ	vva	っう゛ぃ	vvi	
っう゛ぇ	vve	っう゛ぉ	vvo	
っか	kka	っが	gga	っき	kki	っきゃ	kkya	
っきゅ	kkyu	っきょ	kkyo	っぎ	ggi	っぎゃ	ggya	
っぎゅ	ggyu	っぎょ	ggyo	っく	kku	っぐ	ggu	
っけ	kke	っげ	gge	っこ	kko	っご	ggo	っさ	ssa
っざ	zza	っし	sshi	っしゃ	ssha	
っしゅ	sshu	っしょ	ssho	
っじ	jji	っじゃ	jja	っじゅ	jju	っじょ	jjo	
っす	ssu	っず	zzu	っせ	sse	っぜ	zze	っそ	sso
っぞ	zzo	った	tta	っだ	dda	っち	cchi	
っちゃ	ccha	っちゅ	cchu	っちょ	ccho	っぢ	ddi	
っぢゃ	ddya	っぢゅ	ddyu	っぢょ	ddyo	っつ	ttsu	
っづ	ddu	って	tte	っで	dde	っと	tto	っど	ddo
っは	hha	っば	bba	っぱ	ppa	っひ	hhi	
っひゃ	hhya	っひゅ	hhyu	っひょ	hhyo	っび	bbi	
っびゃ	bbya	っびゅ	bbyu	っびょ	bbyo	っぴ	ppi	
っぴゃ	ppya	っぴゅ	ppyu	っぴょ	ppyo	っふ	ffu	
っふぁ	ffa	っふぃ	ffi	っふぇ	ffe	っふぉ	ffo	
っぶ	bbu	っぷ	ppu	っへ	hhe	っべ	bbe	っぺ	ppe
っほ	hho	っぼ	bbo	っぽ	ppo	っや	yya	っゆ	yyu
っよ	yyo	っら	rra	っり	rri	っりゃ	rrya	
っりゅ	rryu	っりょ	rryo	っる	rru	っれ	rre	
っろ	rro	

つ	tsu	づ	du	て	te	で	de	と	to
ど	do	

な	na	に	ni	にゃ	nya	にゅ	nyu	にょ	nyo
ぬ	nu	ね	ne	の	no	

は	ha	ば	ba	ぱ	pa	ひ	hi	ひゃ	hya
ひゅ	hyu	ひょ	hyo	び	bi	びゃ	bya	びゅ	byu
びょ	byo	ぴ	pi	ぴゃ	pya	ぴゅ	pyu	ぴょ	pyo
ふ	fu	ふぁ	fa	ふぃ	fi	ふぇ	fe	ふぉ	fo
ぶ	bu	ぷ	pu	へ	he	べ	be	ぺ	pe
ほ	ho	ぼ	bo	ぽ	po	

ま	ma	み	mi	みゃ	mya	みゅ	myu	みょ	myo
む	mu	め	me	も	mo

ゃ	xya	や	ya	ゅ	xyu	ゆ	yu	ょ	xyo
よ	yo	

ら	ra	り	ri	りゃ	rya	りゅ	ryu	りょ	ryo
る	ru	れ	re	ろ	ro	

ゎ	xwa	わ	wa	ゐ	wi	ゑ	we
を	wo	ん	n	

ん     n'
でぃ   dyi
ー     -
ちぇ    che
っちぇ	cche
じぇ	je
"

  KANROM = (kanaroma = Hash.new
            (KUNREITAB + HEPBURNTAB).split(/\s+/).pairs {|x|
              kana, roma = x
              kanaroma[kana] = roma
            }
            kanaroma)

  ROMKAN = (romakana = Hash.new
            (KUNREITAB + HEPBURNTAB).split(/\s+/).pairs {|x|
              kana, roma = x
              romakana[roma] = kana
            }
            romakana)

  # Sort in long order so that a longer Romaji sequence precedes.
  ROMPAT = ROMKAN.keys.sort {|a, b| b.length <=> a.length}.join "|"

  KANPAT = KANROM.keys.sort {|a, b| 
    b.length <=> a.length ||
      KANROM[a].length <=> KANROM[b].length
  }.join "|"

  KUNREI  = (i = 0; KUNREITAB. split(/\s+/).select {i += 1; i % 2 == 0})
  HEPBURN = (i = 0; HEPBURNTAB.split(/\s+/).select {i += 1; i % 2 == 0})

  KUNPAT = KUNREI.sort  {|a, b| b.length <=> a.length }.join "|"
  HEPPAT = HEPBURN.sort {|a, b| b.length <=> a.length }.join "|"

  TO_HEPBURN = (romrom = Hash.new
                KUNREI.each_with_index {|x, ix|
                  romrom[KUNREI[ix]] = HEPBURN[ix]}
                romrom)

  TO_KUNREI =  (romrom = Hash.new
                HEPBURN.each_with_index {|x, ix|
                  romrom[HEPBURN[ix]] = KUNREI[ix]}
                romrom)

  # FIXME: ad hod solution
  # tanni   => tan'i
  # kannji  => kanji
  # hannnou => han'nou
  # hannnya => han'nya
  def normalize_double_n
    self.gsub(/nn/, "n'").gsub(/n\'(?=[^aiueoyn]|$)/, "n")
  end

  def normalize_double_n!
    self.gsub!(/nn/, "n'")
    self.gsub!(/n\'(?=[^aiueoyn]|$)/, "n")
    self
  end

  # Romaji -> Kana
  # It can handle both Hepburn and Kunrei sequences.
  def to_kana 
    tmp = self.normalize_double_n
    tmp.gsub(/(#{ROMPAT})/) { ROMKAN[$1] }
  end

  # Kana -> Romaji.  
  # Return Hepburn sequences.
  def to_roma
    tmp = self.gsub(/(#{KANPAT})/) { KANROM[$1] }
    tmp.gsub(/n'(?=[^aeiuoyn]|$)/, "n")
  end

  # Romaji -> Romaji
  # Normalize into Hepburn sequences.
  # e.g. kannzi -> kanji, tiezo -> chiezo
  def to_hepburn
    tmp = self.normalize_double_n
    tmp.gsub(/\G((?:#{HEPPAT})*?)(#{KUNPAT})/) { $1 + TO_HEPBURN[$2]}
  end

  # Romaji -> Romaji
  # Normalize into Kunrei sequences.
  # e.g. kanji -> kanzi, chiezo -> tiezo
  def to_kunrei
    tmp = self.normalize_double_n
    tmp.gsub(/\G((?:#{KUNPAT})*?)(#{HEPPAT})/) { $1 + TO_KUNREI[$2]}
  end

  def to_kana!
    self.normalize_double_n!
    self.gsub!(/(#{ROMPAT})/) { ROMKAN[$1] }
    self
  end

  def to_roma!
    self.gsub!(/(#{KANPAT})/) { KANROM[$1] }
    self.gsub!(/n'(?=[^aeiuoyn]|$)/, "n")
    self
  end

  def to_hepburn!
    self.normalize_double_n!
    self.gsub!(/\G((?:#{HEPPAT})*?)(#{KUNPAT})/) { $1 + TO_HEPBURN[$2]}
    self
  end

  def to_kunrei!
    tmp = self.normalize_double_n!
    tmp.gsub!(/\G((?:#{KUNPAT})*?)(#{HEPPAT})/) { $1 + TO_KUNREI[$2]}
  end

  def consonant?
    if /^[ckgszjtdhfpbmyrwxn]$/.match(self)
      true
    else
      false
    end
  end

  def vowel?
    if /^[aeiou]$/.match(self)
      true
    else
      false
    end
  end

  # `z' => (za ze zi zo zu)
  def expand_consonant 
    ROMKAN.keys.select do |x|
      /^#{self}.$/ =~ x 
    end
  end
end