File: prime-dict-convert.src

package info (click to toggle)
prime 1.0.0.1-2.2
  • links: PTS
  • area: main
  • in suites: wheezy
  • size: 1,420 kB
  • sloc: ruby: 7,139; lisp: 1,866; sh: 442; makefile: 242
file content (215 lines) | stat: -rwxr-xr-x 5,913 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
#!/usr/bin/ruby1.8
# prime-dict-convert: A dictionary converter from a dictionary of another IME
# $Id: prime-dict-convert.src,v 1.4 2005/03/07 07:51:34 komatsu Exp $
#
# Copyright (C) 2003 Hiroyuki Komatsu <komatsu@taiyaki.org>
#     All rights reserved.
#     This is free software with ABSOLUTELY NO WARRANTY.
#
# You can redistribute it and/or modify it under the terms of 
# the GNU General Public License version 2.

PRIME_LIBDIR = '%rubydir%'
$LOAD_PATH.unshift(PRIME_LIBDIR) unless $LOAD_PATH.member?(PRIME_LIBDIR)

require 'prime/taiyaki'
require 'getoptlong'
require 'prime/makedict/dictformat-skkdic'
require 'prime/makedict/dictformat-pubdic'
require 'prime/makedict/dictformat-cannadic'
require 'prime/makedict/dictformat-adambnc'
require 'prime/makedict/dictformat-wordnet'
require 'prime/makedict/dictformat-usage'
require 'prime/makedict/basicdict'

begin
  ## The following files require MeCab.
  Kernel::require('prime/makedict/dictformat-wordlist')
  Kernel::require('prime/makedict/dictformat-text')
rescue LoadError
  FLAG_MECAB = false
else
  FLAG_MECAB = true
end

class PrimeDictConvertCommand
  include Debug

  def initialize (command_name)
    @debug_mode = false
    @is_interactive = true

    @indexing   = true
    @conversion = true
    @output_dictname = nil
    @input_dictnames = []

    @formats = {:prime    => [DictFormat],
                :pubdic   => [DictFormatPubdic],
                :canna    => [DictFormatCannadic],
                :skk      => [DictFormatSkkdic],
                :usage    => [DictFormatUsage],
                :adambnc  => [DictFormatAdamBNC],
                :wordnet  => [DictFormatWordNet],
    }
    if FLAG_MECAB then
      @formats[:wordlist] = DictFormatWordlist
      @formats[:text]     = DictFormatText
      @formats[:gaim]     = DictFormatGaim
    end

    @command_name = command_name
    @version = '%PACKAGE_VERSION%'
    @usage = <<"EOF"
#{@command_name}:#{@version} -- converts from a dictionary of another IME.

  Usage: #{@command_name} output_dictname --<format> input_dictname [options]
       --append    : append words to the existent output dictionary. (default)
       --overwrite : delete the existent output dictionary and make new dict.

       --prime    :  merge the input_dict with the output_dict
       --canna    :  convert from the canna dict
       --skk      :  convert from the skk dict
       --pubdic   :  convert from the pubdic dict
       --usage    :  convert from the usage dict (beta)
       --adambnc  :  convert from the Adam\'s BNC word frequency list (beta)
       --wordnet  :  convert from the WordNet cntlist
       --wordlist :  convert from the wordlist without the readings and POSes
                     (beta)
       --text     :  convert from the plain text file (beta)
       --gaim     :  convert from the Gaim log (beta)

       --no-index :  convert only without indexing
  -q,  --quiet    :  run this command and show nothing.
  -v,  --version  :  show the version and exit
  -h,  --help     :  show this help and exit
  -d,  --debug    :  run under debug mode
EOF
  end

  def main ()  
    parse_options()

    if @output_dictname.nil? or @input_dictnames.empty? then
      print_usage()
      exit()
    end
    convert_dicts()
    index_dict()
  end

  def convert_dicts
    basedict = DictFormat.new(@is_interactive)
    if @flag_append then
      basedict.load_existent_dict(@output_dictname)
      dict = basedict.dict
    end
    dict = basedict.dict

    converter = nil
    @input_dictnames.each {|(type, filename)|
      if @formats.has_key?(type) then
        converter = @formats[type][0].new(@is_interactive)
      else
	puts "Invalid format option..."
	exit()
      end

      converter.dict = dict
      converter.load_external_dict(filename)
      dict = converter.dict
    }
    converter.save_dict(@output_dictname)
  end

  def index_dict ()
    if @flag_index == false then
      return
    end
    indexer = PrimeBasicdict.new(@output_dictname, @is_interactive)
    indexer.make_pos_table()
    indexer.make_basicdict_indexes()
  end

  def print_version ()
    puts "#{@command_name}:#{@version}"
  end
  
  def print_usage ()
    puts @usage
  end

  private
  def parse_options ()
    options = {}
    parser = GetoptLong.new()
    option_list = [
      ['--help',       '-h',   GetoptLong::NO_ARGUMENT],
      ['--version',    '-v',   GetoptLong::NO_ARGUMENT],
      ['--debug',      '-d',   GetoptLong::NO_ARGUMENT],
      ['--quiet',      '-q',   GetoptLong::NO_ARGUMENT],
      ['--append',             GetoptLong::NO_ARGUMENT],
      ['--overwrite',          GetoptLong::NO_ARGUMENT],
      ['--no-index',           GetoptLong::NO_ARGUMENT],
    ]
    @formats.keys.each { | key |
      option_list.push( ['--' + key.to_s(), GetoptLong::REQUIRED_ARGUMENT] )
    }
    parser.set_options(*option_list)

    parser.each_option {|option, arg|
      options[option.sub(/^--/, '')] = arg
    }

    if options['version'] then
      print_version()
      exit()
    elsif options['help'] then
      print_usage()
      exit()
    end

    if options['debug'] then
      $DEBUG = true
    end
    if options['quiet'] then
      @is_interactive = false
    end

    @output_dictname = ARGV[0]

    @flag_append = true
    if options['append'] then
      @flag_append = true
    elsif options['overwrite'] then
      @flag_append = false
    end

    if options['no-index'] then
      @flag_index = false
    else
      @flag_index = true
    end

    @formats.keys.each { | format |
      argument = options[format.to_s]
      if argument then
        @input_dictnames.push([format, argument])
      end
    }
        
  end
end



if File::expand_path($0) == File::expand_path(__FILE__) then
  conv_command = PrimeDictConvertCommand.new(File::basename($0))
  conv_command.main()
end


# --- Text properties for Emacs. ---
# Local variables:
# mode: ruby
# End: