File: asayaKe.rb

package info (click to toggle)
skktools 1.3.3-2
  • links: PTS, VCS
  • area: main
  • in suites: jessie, jessie-kfreebsd
  • size: 1,380 kB
  • ctags: 349
  • sloc: sh: 11,551; ansic: 1,300; ruby: 1,049; perl: 798; lisp: 431; python: 241; makefile: 126; awk: 94; cpp: 73; sed: 1
file content (127 lines) | stat: -rwxr-xr-x 3,858 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
#!/usr/local/bin/ruby -Ke
# -*- coding: euc-jp -*-
## Copyright (C) 2005 MITA Yuusuke <clefs@mail.goo.ne.jp>
##
## Author: MITA Yuusuke <clefs@mail.goo.ne.jp>
## Maintainer: SKK Development Team <skk@ring.gr.jp>
## Version: $Id: asayaKe.rb,v 1.4 2013/05/26 09:47:48 skk-cvs Exp $
## Keywords: japanese, dictionary
## Last Modified: $Date: 2013/05/26 09:47:48 $
##
## This program is free software; you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation; either version 2, or (at your option)
## any later version.

## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
## General Public License for more details.

## You should have received a copy of the GNU General Public License
## along with this program, see the file COPYING.  If not, write to the
## Free Software Foundation Inc., 51 Franklin St, Fifth Floor, Boston,
## MA 02110-1301, USA.
##
### Commentary:
## Based on registdic.cgi and skkform.rb by Mikio NAKAJIMA.
##
### Instruction:
## This script converts okuri-nasi pairs with okuri into okuri-ari:
## ֤䤱 /īƤ/  ֤k /ī/
##
## '-e' simply extracts such pairs.
## '-E' outputs both in original and converted forms.
##
## '-o' given, the okuri is appended as an annotation:
## ֤k /ī;-/
##
## '-O' given, the result will be in skk-henkan-okuri-strictly format:
## ֤k /ī/[/ī]/
##
## '-u' eliminates all the annotations.
##
## '-p' eliminates pairs with "" or "?" annotations that are suspected
## as 'wrong' words.
##
## NOTE: skkdictools.rb should be in one of the ruby loadpaths.
##
#require 'jcode'
#require 'kconv'
require 'skkdictools'
require 'optparse'
opt = OptionParser.new

mode = "convert"
unannotate = false
okuri_mode = "none"
#stem = 0
purge = false
filter = false


opt.on('-e', 'extract okuri-nasi-with-okuri pairs') { mode = "extract" }
opt.on('-E', 'extract and then convert okuri-nasi-with-okuri pairs') { mode = "both" }
#opt.on('-f', 'output original pairs if conversion failed') { filter = true }
opt.on('-o', 'append original "okurigana" as annotation') { okuri_mode = "annotation" }
opt.on('-O', 'append original "okurigana" in skk-henkan-okuri-strictly format') { okuri_mode = "bracket" }
opt.on('-p', 'purge candidates marked with "" or "?"') { purge = true }
opt.on('-u', 'eliminate annotations') { unannotate = true }
#opt.on('-s VAL', 'stem candidates equal or shorter than VAL letters') { |v| stem = v.to_i * 2 }

begin
  opt.parse!(ARGV)
rescue OptionParser::InvalidOption => e
  print "'#{$0} -h' for help.\n"
  exit 1
end


while gets
  next if $_ =~ /^;/
  tmp = $_.chop.split(" /", 2)
  midasi = tmp.shift
  tokens = tmp[0].split("/")

  tokens.each do |token|
    candidate, annotation = token.split(";", 2)
    #next if tmp[0].length <= stem
    next if purge && annotation =~ //
    next if purge && annotation =~ /\?$/

    key, prefix, postfix = okuri_nasi_to_ari(midasi, candidate)
    if !key.nil?
      if mode == "extract" || mode == "both"
	print "#{midasi} /#{candidate}"
	if !unannotate && !annotation.nil?
	  print ";#{annotation}"
	end
	print "/\n"
      end

      if mode == "convert" || mode == "both"
	print "#{key} /#{prefix}"

	case okuri_mode
	when "annotation"
	  if !unannotate && !annotation.nil?
	    print ";#{annotation}-#{postfix}"
	  else
	    print ";-#{postfix}"
	  end
	when "bracket"
	  if !unannotate && !annotation.nil?
	    print ";#{annotation}/[#{postfix[0,2]}/#{prefix};#{annotation}]"
	  else
	    print "/[#{postfix[0,2]}/#{prefix}]"
	  end
	else
	  if !unannotate && !annotation.nil?
	    print ";#{annotation}"
	  end
	end
	print "/\n"
      end
    end
  end
end