File: complete-numerative.rb

package info (click to toggle)
skktools 1.3.3-2
  • links: PTS, VCS
  • area: main
  • in suites: jessie, jessie-kfreebsd
  • size: 1,380 kB
  • ctags: 349
  • sloc: sh: 11,551; ansic: 1,300; ruby: 1,049; perl: 798; lisp: 431; python: 241; makefile: 126; awk: 94; cpp: 73; sed: 1
file content (102 lines) | stat: -rwxr-xr-x 3,486 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
#!/usr/local/bin/ruby -Ke
# -*- coding: euc-jp -*-
## Copyright (C) 2005 MITA Yuusuke <clefs@mail.goo.ne.jp>
##
## Author: MITA Yuusuke <clefs@mail.goo.ne.jp>
## Maintainer: SKK Development Team <skk@ring.gr.jp>
## Version: $Id: complete-numerative.rb,v 1.4 2013/05/26 09:47:48 skk-cvs Exp $
## Keywords: japanese, dictionary
## Last Modified: $Date: 2013/05/26 09:47:48 $
##
## This program is free software; you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation; either version 2, or (at your option)
## any later version.

## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
## General Public License for more details.

## You should have received a copy of the GNU General Public License
## along with this program, see the file COPYING.  If not, write to the
## Free Software Foundation Inc., 51 Franklin St, Fifth Floor, Boston,
## MA 02110-1301, USA.
##
### Instruction:
##
## This script is aimed to supplement missing numerative pairs by
## generating, for example, #ޤ /#3/#1/#0/#2/ from
## #ޤ /#0/.
##
##     % complete-numerative.rb SKK-JISYO.L > SKK-JISYO.num
##     % skkdic-expr2 SKK-JISYO.L + SKK-JISYO.num > SKK-JISYO.L.new
## 
## You might wish to reorder existing numerative pairs; if you always
## prefer /#0/#3/#1/#2/ , try this:
##
##     % complete-numerative.rb -o 0312 SKK-JISYO.L > SKK-JISYO.num
##     % skkdic-expr2 SKK-JISYO.L - SKK-JISYO.num + SKK-JISYO.num > SKK-JISYO.L.new
##
## If you simply want #0 pairs to appear at first, do this:
##
##     % complete-numerative.rb -o 0 SKK-JISYO.L > SKK-JISYO.num0
##     % skkdic-expr2 SKK-JISYO.num0 + SKK-JISYO.L > SKK-JISYO.L.new
##
##
## NOTE: skkdictools.rb should be in one of the ruby loadpaths.
##
##
## TODO: output /#3foo/#3bar/#1foo/#1bar/ instead of /#3foo/#1foo/#3bar/#1bar/
##

#require 'jcode'
#require 'kconv'
require 'skkdictools'
require 'optparse'
opt = OptionParser.new

purge = false
order = "3102"
annotation_mode = "all"
mode = "convert"

opt.on('-o ORDER', 'specify order of results, eg. "3102" => "/#3/#1/#0/#2/"') { |v| order = v } # TODO - check sanity
#opt.on('-u', "don't add annotations for derived pairs") { annotation_mode = "self" }
opt.on('-U', 'eliminate all the annotations') { annotation_mode = "none" }
opt.on('-p', 'skip candidates marked with "" or "?"') { purge = true }
opt.on('-e', 'only extract numerative entries') { mode = "extract" }

begin
  opt.parse!(ARGV)
rescue OptionParser::InvalidOption => e
  print "'#{$0} -h' for help.\n"
  exit 1
end

while gets
  next if $_ =~ /^;/ || $_ =~ /^$/ || $_ !~ /^[^ ]*#/
  if mode == "extract"
    # XXX This is lazy -- there's a slim chance of extracting
    # non-numerative pairs such as # //
    # Anyway it's equivalent to doing grep '^[^ ;]*#'
    print $_
    next
  end
  midasi, tokens = $_.parse_skk_entry

  tokens.each do |token|
    word, annotation, comment = token.skk_split_tokens
    next if word !~ /#[0-3]/
    next if purge && annotation =~ //
    next if purge && annotation =~ /\?$/
    order.each_byte do |num|
      if annotation_mode == "none"
	print_pair(midasi, word.gsub(/#[0-3]/, "##{num.chr}"), nil, nil)
      else
	print_pair(midasi, word.gsub(/#[0-3]/, "##{num.chr}"),
		annotation, comment)
      end
    end
  end
end