File: annotation-filter.rb

package info (click to toggle)
skktools 1.3.3-2
  • links: PTS, VCS
  • area: main
  • in suites: jessie, jessie-kfreebsd
  • size: 1,380 kB
  • ctags: 349
  • sloc: sh: 11,551; ansic: 1,300; ruby: 1,049; perl: 798; lisp: 431; python: 241; makefile: 126; awk: 94; cpp: 73; sed: 1
file content (113 lines) | stat: -rwxr-xr-x 4,051 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
#!/usr/local/bin/ruby -Ke
# -*- coding: euc-jp -*-
## Copyright (C) 2005 MITA Yuusuke <clefs@mail.goo.ne.jp>
##
## Author: MITA Yuusuke <clefs@mail.goo.ne.jp>
## Maintainer: SKK Development Team <skk@ring.gr.jp>
## Version: $Id: annotation-filter.rb,v 1.4 2013/05/26 09:47:48 skk-cvs Exp $
## Keywords: japanese, dictionary
## Last Modified: $Date: 2013/05/26 09:47:48 $
##
## This program is free software; you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation; either version 2, or (at your option)
## any later version.

## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
## General Public License for more details.

## You should have received a copy of the GNU General Public License
## along with this program, see the file COPYING.  If not, write to the
## Free Software Foundation Inc., 51 Franklin St, Fifth Floor, Boston,
## MA 02110-1301, USA.
##
### Instruction:
##
## 
require 'jcode' if RUBY_VERSION.to_f < 1.9
#require 'kconv'
require 'skkdictools'
require 'optparse'
opt = OptionParser.new

keep_annotation = false
output_all = true
unannotate_unique = false
unannotate_cap = 99999999
doublebar = "remove"
rulesets = Array.new
default_rulesets = [
  [ "exclude", '|\?$' ],
  # [ "exclude", "\[\]" ],
  [ "keep", '|λ|ܻ|||' ],
  # [ "keep", "NB:|=||||<rare>" ],
  # [ "cut", "" ] - 'doublebar' handles it inplace
]


opt.on('-c pattern', 'cut annotations after <pattern>') { |pattern| rulesets << [ "cut", pattern]}
opt.on('-e pattern', 'eliminate candidates if <pattern> matches') { |pattern| rulesets << [ "exclude", pattern]}
opt.on('-x pattern', 'output pairs if <pattern> matches (use with -t)') { |pattern| rulesets << [ "extract", pattern]}
opt.on('-u pattern', 'unannotate candidates if <pattern> matches (use with -k)') { |pattern| rulesets << [ "unannotate", pattern]}
opt.on('-U pattern', 'keep annotations matching <pattern>') { |pattern| rulesets << [ "keep", pattern]}

opt.on('-s', 'unannotate if the candidate is "unique"') { unannotate_unique = true }
opt.on('-j VAL', "never unannotate if an entry has more than <VAL> candidates") { |v| unannotate_cap = v.to_i }
opt.on('-k', 'keep annotations by default') { keep_annotation = true }
opt.on('-t', "extraction mode: output requested pairs only") { output_all = false }
opt.on('-d', "apply default rulesets") { rulesets += default_rulesets }

opt.on('-b', "sticky '' -- annotation after '' will always be kept") { doublebar = "sticky" }
#opt.on('-B', "always remove annotations after ''") { doublebar = "remove" }
opt.on('-B', "treat '' as a part of annotation") { doublebar = "dumb" }


begin
  opt.parse!(ARGV)
  #rulesets = default_rulesets if rulesets.empty?
rescue OptionParser::InvalidOption => e
  print "'#{$0} -h' for help.\n"
  exit 1
end


while gets
  next if $_ =~ /^;/ || $_ =~ /^$/
  midasi, tokens = $_.parse_skk_entry
  total = tokens.nitems
  #results = Array.new

  tokens.each do |token|
    word, annotation, comment = token.skk_split_tokens( doublebar == "dumb" ? nil : '')

    do_unannotate = !keep_annotation
    do_output = output_all
    do_unannotate = true if unannotate_unique && total == 1
    do_unannotate = false if unannotate_cap <= total

    rulesets.each do |rule|
      if !annotation.nil?
	match = (annotation =~ Regexp.compile(rule[1]))
	if match
	  case rule[0]
	  when "cut"
	    annotation = annotation[0, match]
	  when "extract"
	    do_output = true
	  when "exclude"
	    do_output = false
	  when "unannotate"
	    do_unannotate = true
	  when "keep"
	    do_unannotate = false
	  end
	end
      end
    end
    next if !do_output
    #results << [word, do_unannotate ? nil : annotation, doublebar == "sticky" ? comment : nil]
    print_pair(midasi, word, do_unannotate ? nil : annotation, doublebar == "sticky" ? comment : nil)
  end
end