File: PER.pl

package info (click to toggle)
apertium-eval-translator 1.2.2-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 188 kB
  • sloc: perl: 858; sh: 162; makefile: 7
file content (148 lines) | stat: -rwxr-xr-x 3,093 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
#!/usr/bin/perl -w
use utf8;

# (c) 2006-2010 Felipe Sánchez Martínez
# (c) 2006-2010 Universitat d'Alacant
#
# This software calculates  the position-independent word error
# rate (PER) between an automatic translation and a reference
#translation.
#
# This software is licensed under the GPL license version 3, or at
# your option any later version

use strict;
use warnings;

# Getting command line arguments:
use Getopt::Long;
# Documentation:
use Pod::Usage;
# I/O Handler
use IO::Handle;

my($test, $ref, $help, $seg, $version);

# Command line arguments
GetOptions( 'test|t=s'           => \$test,
            'ref|r=s'            => \$ref,
            'seg|s'              => \$seg,
            'help|h'             => \$help,
	    'version|v'          => \$version,
          ) || pod2usage(2);

if ($version) {
   print "PER.pl 1.0\n";
   exit 0;
}

pod2usage(2) if $help;
pod2usage(2) unless ($test);
pod2usage(2) unless ($ref);

open(TEST, "<$test") or die "Error: Cannot open test file \'$test\': $!\n";
open(REF, "<$ref") or die "Error: Cannot open reference file \'$ref\': $!\n";

my($ntest, $nref, $correct) = (0, 0, 0);
my(@words_test, @words_ref);

my $line = 0;

while(<TEST>) {
  chomp;
  s/^\s+//g;
  s/\s+$//g;
  @words_test = split /\s+/;

  $_=<REF>;
  chomp;
  s/^\s+//g;
  s/\s+$//g;
  @words_ref = split /\s+/;

  $line++;

  my $this_correct = &position_independent_correct_words;
  print "LINE $line ", 1 - (($this_correct - max(0, @words_test - @words_ref)) / @words_ref), "\n" if ($seg);

  $correct += $this_correct;
  $ntest += @words_test;
  $nref += @words_ref;
}

close(TEST);
close(REF);

print 1 - (($correct - max(0, $ntest - $nref)) / $nref), "\n";


sub position_independent_correct_words {
  my (%hash_test, %hash_ref);

  foreach (@words_test) {
    $hash_test{$_}++;
  }

  foreach (@words_ref) {
    $hash_ref{$_}++;
  }

  my $correct=0;

  foreach (keys %hash_test) {
    if(defined($hash_ref{$_})) {
      $correct += min($hash_test{$_}, $hash_ref{$_});
    }
  }

  return $correct;
}

sub max {
  my @list = @_;
  my $max = $list[0];

  foreach my $i (@list) {
    $max = $i if ($i > $max);
  }
   return $max;
}

sub min {
  my @list = @_;
  my $min = $list[0];

  foreach my $i (@list) {
    $min = $i if ($i < $min);
  }
   return $min;
}

__END__

=head1 NAME

=head1 SYNOPSIS

PER.pl -test testfile -ref reffile

Options:

  -test|-t     Specify the file with the translation to evaluate
  -ref|-r      Specify the file with the reference translation (only one)
  -seg|-s      Report PER also at segment level
  -help|-h     Show this help message
  -version|-v  Show version information and exit

This software calculates (at segment level and document level)
the position-independent error rate (PER) between an automatic translation
and a reference translation.

(c) 2006-2010 Felipe Sánchez-Martínez
(c) 2006-2010 Universitat d'Alacant

This software is licensed under the GNU GENERAL PUBLIC LICENSE version
3, or at your option any latter version. See
http://www.gnu.org/copyleft/gpl.html for a complete version of the
license.