File: optimize-dict.pl

package info (click to toggle)
cmigemo 20110227-7
  • links: PTS, VCS
  • area: main
  • in suites: wheezy
  • size: 840 kB
  • sloc: ansic: 2,617; lisp: 608; cs: 183; makefile: 159; sh: 137; cpp: 67; perl: 59; csh: 49
file content (42 lines) | stat: -rw-r--r-- 798 bytes parent folder | download | duplicates (8)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
#!/usr/bin/perl -w
# vim:set ts=8 sts=4 sw=4 tw=0:
# 
# optimize-dict.pl
#
# Last Change: 21-Jan-2005.
# Written By:  MURAOKA Taro <koron@tka.att.ne.jp>

binmode STDOUT;
my %migemo;
my @migemo;

# Read migemo-dict file.
while (<>)
{
    chomp;
    next if /^;/;
    my ($label, @word) = split /\t/;
    $label = lc($label); # Key must be lower case
    push @migemo, $label;
    push @{$migemo{$label}}, @word;
}

@migemo = sort {length($b) <=> length($a) or $a cmp $b} @migemo;

# Write migemo-dict file.
my $i;
for ($i = 0; $i < @migemo; ++$i)
{
    my $label = $migemo[$i];
    if (exists $migemo{$label})
    {
	print "$label\t" . join("\t", &uniq_array($migemo{$label})) . "\n";
	delete $migemo{$label};
    }
}

sub uniq_array
{
    my %array = map {$_, 1} @{$_[0]};
    return keys %array
}