File: make-tankan-dic.rb

package info (click to toggle)
skktools 1.3.3-2
  • links: PTS, VCS
  • area: main
  • in suites: jessie, jessie-kfreebsd
  • size: 1,380 kB
  • ctags: 349
  • sloc: sh: 11,551; ansic: 1,300; ruby: 1,049; perl: 798; lisp: 431; python: 241; makefile: 126; awk: 94; cpp: 73; sed: 1
file content (86 lines) | stat: -rwxr-xr-x 2,738 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
#!/usr/local/bin/ruby -Ke
# -*- coding: euc-jp -*-
## Copyright (C) 2006 MITA Yuusuke <clefs@mail.goo.ne.jp>
##
## Author: MITA Yuusuke <clefs@mail.goo.ne.jp>
## Maintainer: SKK Development Team <skk@ring.gr.jp>
## Version: $Id: make-tankan-dic.rb,v 1.2 2013/05/26 09:47:48 skk-cvs Exp $
## Keywords: japanese, dictionary
## Last Modified: $Date: 2013/05/26 09:47:48 $
##
## This program is free software; you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation; either version 2, or (at your option)
## any later version.

## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
## General Public License for more details.

## You should have received a copy of the GNU General Public License
## along with this program, see the file COPYING.  If not, write to the
## Free Software Foundation Inc., 51 Franklin St, Fifth Floor, Boston,
## MA 02110-1301, USA.
##
### Instruction:
##
## % make-tankan-dic.rb SKK-JISYO.L | skkdic-expr2 > SKK-JISYO.tankan
##
## This would generate a compact single chinese-letter (tankanji) dictionary
## useful in combination with skk-tankan.el.
## 
## (add-to-list 'skk-search-prog-list
##  '(skk-tankan-search 'skk-search-jisyo-file
## 	 "~/skk/dic/SKK-JISYO.tankan" 0))
##
##
## XXX This won't work with SKK-JISYO.JIS3_4; helas, ruby basically cannot
## handle JISX0213!
## 
require 'jcode' if RUBY_VERSION.to_f < 1.9
#require 'kconv'
require 'skkdictools'
require 'optparse'
opt = OptionParser.new

keep_annotation = true
purge = false
min_size = 2
max_size = 2


opt.on('-u', 'remove annotations') { keep_annotation = false }
opt.on('-p', 'purge candidates marked with "" or "?"') { purge = true }
opt.on('-m VAL', 'minimal size of each word (in byte)') { |i| min_size = i.to_i }
opt.on('-M VAL', 'maximal size of each word (in byte)') { |i| max_size = i.to_i }

begin
  opt.parse!(ARGV)
rescue OptionParser::InvalidOption => e
  print "'#{$0} -h' for help.\n"
  exit 1
end


while gets
  next if $_ =~ /^;/ || $_ =~ /^$/ || $_ !~ /^[-]/
  midasi, tokens = $_.parse_skk_entry

  notyet = true
  tokens.each do |token|
    word, annotation, comment = token.skk_split_tokens
    next if word.size < min_size || word.size > max_size
    next if purge && annotation =~ //
    next if purge && annotation =~ /\?$/
    # TODO: check if it's `Kanji'
    if notyet
      notyet = false
      print midasi, " /"
    end
    print word
    print ";", annotation if keep_annotation && !annotation.nil? && !annotation.empty?
    print "/"
  end
  print "\n" if !notyet
end