File: migemo.rb.in

package info (click to toggle)
migemo 0.40-7.1
  • links: PTS
  • area: main
  • in suites: etch, etch-m68k
  • size: 4,208 kB
  • ctags: 123
  • sloc: ruby: 892; sh: 555; makefile: 119
file content (235 lines) | stat: -rw-r--r-- 6,376 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
#
# Ruby/Migemo - a library for Japanese incremental search.
#
# Copyright (C) 2001 Satoru Takabayashi <satoru@namazu.org>
#     All rights reserved.
#     This is free software with ABSOLUTELY NO WARRANTY.
#
# You can redistribute it and/or modify it under the terms of 
# the GNU General Public License version 2.
#
# NOTE: Ruby/Migemo can work only with EUC_JP encoding. ($KCODE="e")
#

require 'migemo-dict'
require 'migemo-regex'
require 'romkan'
require 'jcode'
include MigemoRegex

class String
  # Hiragana to Katakana
  def to_katakana
    self.gsub(//, '\\1').tr('-', '-')
  end

  def quotemeta
    self.gsub(/([^ \w])/, '\\\\\\1')
  end

  def first
    /^(\\.|.)/ =~ self
    $1
  end

  def last
    /(\\.|.)$/ =~ self
    $1
  end

  def rest
    /^(\\.|.)(.*)/ =~ self
    $2
  end

  HANZEN_TAB = {
    " " => "", "!" => "", '"' => "", "#" => "", 
    "\$" => "", "%" => "", "&" => "", "'" => "",
    "(" => "", ")" => "", "*" => "", "+" => "",
    "," => "", "-" => "", "." => "", "/" => "",
    "0" => "", "1" => "", "2" => "", "3" => "",
    "4" => "", "5" => "", "6" => "", "7" => "",
    "8" => "", "9" => "", ":" => "", ";" => "",
    "<" => "", "=" => "", ">" => "", "?" => "",
    '@' => "", "A" => "", "B" => "", "C" => "",
    "D" => "", "E" => "", "F" => "", "G" => "",
    "H" => "", "I" => "", "J" => "", "K" => "",
    "L" => "", "M" => "", "N" => "", "O" => "",
    "P" => "", "Q" => "", "R" => "", "S" => "",
    "T" => "", "U" => "", "V" => "", "W" => "",
    "X" => "", "Y" => "", "Z" => "", "[" => "", 
    "\\" => "", "]" => "", "^" => "", "_" => "",
    "`" => "", "a" => "", "b" => "", "c" => "",
    "d" => "", "e" => "", "f" => "", "g" => "",
    "h" => "", "i" => "", "j" => "", "k" => "",
    "l" => "", "m" => "", "n" => "", "o" => "",
    "p" => "", "q" => "", "r" => "", "s" => "",
    "t" => "", "u" => "", "v" => "", "w" => "",
    "x" => "", "y" => "", "z" => "", "{" => "",
    "|" => "", "}" => "", "~" => ""} #'

  HANZEN_RE = Regexp.new(HANZEN_TAB.keys.sort.map {|x| x.quotemeta}.join('|'))

  def to_fullwidth
    self.gsub(HANZEN_RE) {|s| HANZEN_TAB[s]}
  end
end

class Migemo
  VERSION = '@VERSION@'
  def initialize (dict, pattern)
    @type = "ruby"
    @pattern = pattern
    @insertion = ""
    @optimization = 3
    @static_dict = dict
    @dict_cache = nil
    @user_dict = nil
    @regex_dict = nil
    @with_paren = false
  end
  attr_accessor :optimization
  attr_accessor :type
  attr_accessor :insertion
  attr_accessor :dict_cache
  attr_accessor :user_dict
  attr_accessor :regex_dict
  attr_accessor :with_paren

  private
  # `do'   => ()
  # `d'    => (     )
  # `sh'   => (    )
  # `don'  => (ɤ ɤ ɤ ɤ ɤ ɤ ɤ)  # special case 1
  # `nodd' => (Τ)                                # special case 2
  # `doc'  => (ɤ ɤ)                           # special case 3
  # `dox'  => (ɤ ɤ ɤ ɤ)                 # special case 4
  # `essy' => (ä ä ä)          # special case 5
  # `ny'   => (ˤ ˤ ˤ)                      # special case 6
  def expand_kanas
    kana = @pattern.downcase.to_kana
    /^(.*)(.)$/ =~ kana ;
    head = $1;
    last = $2;

    cand = Array.new;
    return [] if last == nil
    if last.consonant?
      if /^(.*)(.)$/ =~ head && $2.consonant?
	head2 = $1;
	beforelast = $2;
	if last == $beforelast # special case 2
	  cand.push head2 + ""
        elsif /^(.*)(.)$/ =~ head2 && beforelast == $2 && last.consonant?
          # special case 5
	  cand += (beforelast + last).expand_consonant.map do |x|
	    $1 + "" + x.to_kana
          end
	else
	  cand += (beforelast + last).expand_consonant.map do |x|
	    head2 + x.to_kana
	  end
	end
      elsif /^(.*?)(n?)ny$/ =~ @pattern && $2 == "" # special case 6
        head2 = $1
        cand += "ny".expand_consonant.map do |x|
          head2 + x.to_kana
        end
      else
	deriv = last.expand_consonant
	deriv.push "xtsu";
	if last == "c" # special case 3
	  deriv.push "chi";
	elsif last == "x" # special case 4
	  deriv.push "xya", "xyu", "xyo", "xwa"
	end
	cand += deriv.map do |x| head + x.to_kana end
      end
    elsif last == "" # speacial case 1
      cand.push kana;
      cand += ("n".expand_consonant + [""]).map do |x|
	head + x.to_kana
      end
    else
      cand.push kana
    end
    return cand.sort
  end

  # `ᤷ' => ( ӹ   ݤ ܲ γ  Ӳ)
  def expand_words (dict, pattern)
    raise if pattern == nil
    words = Array.new
    dict.lookup(pattern) do |item|
      words += item.values
    end
    return words
  end

  def lookup_cache
    @dict_cache.lookup(@pattern)
  end

  def lookup0
    compiler = RegexCompiler.new
    compiler.push(@pattern)
    compiler.push(@pattern.to_fullwidth)
    expand_kanas.each do |x| 
      compiler.push(x)
      compiler.push(x.to_katakana)
      expand_words(@static_dict, x).each do |x| compiler.push(x) end
    end
    expand_words(@static_dict, @pattern).each do |x| compiler.push(x) end
    compiler.uniq
    compiler.optimize(@optimization) if @optimization
    compiler.regex
  end

  def lookup_user_dict
    compiler = RegexCompiler.new
    expand_kanas.each do |x| 
      expand_words(@user_dict, x).each do |x| compiler.push(x) end
    end
    expand_words(@user_dict, @pattern).each do |x| compiler.push(x) end
    compiler.uniq
    compiler.optimize(@optimization) if @optimization
    compiler.regex
  end

  def lookup_regex_dict
    regexes = []
    @regex_dict.lookup(@pattern) do |item|
      regexes += item.values
    end
    regexes
  end

  public
  def lookup
    if @pattern == ""
      return RegexAlternation.new
    end
    result = if @dict_cache
	       lookup_cache || lookup0
	     else
	       lookup0
	     end
    if @user_dict
      lookup_user_dict.each{|x| result.push(x) }
    end
    result
  end

  def regex_tree
    lookup
  end

  def regex
    regex = lookup
    renderer = RegexRendererFactory.new(regex, @type, @insertion)
    renderer.with_paren = @with_paren
    string = renderer.render
    string = renderer.join_regexes(string, lookup_regex_dict) if @regex_dict
    string
  end
end