1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445
|
use strict;
package Statistics::Contingency;
{
$Statistics::Contingency::VERSION = '0.09';
}
# Correct=Y Correct=N
# +-----------+-----------+
# Assigned=Y | a | b |
# +-----------+-----------+
# Assigned=N | c | d |
# +-----------+-----------+
# accuracy = (a+d)/(a+b+c+d)
# precision = a/(a+b)
# recall = a/(a+c)
# F1 = 2a/(2a + b + c)
# Edge cases:
# precision(0,0,+,d) = 0
# precision(a,0,c,d) = 1
# precision(0,+,c,d) = 0
# recall(a,b,0,d) = 1
# recall(0,b,+,d) = 0
# F1(a,0,0,d) = 1
# F1(0,+++,d) = 0
use Params::Validate qw(:all);
sub new {
my $package = shift;
my $self = bless { validate @_,
{
verbose => { type => SCALAR, default => 0 },
categories => { type => ARRAYREF|HASHREF },
}
}, $package;
$self->{$_} = 0 foreach qw(a b c d);
my $c = delete $self->{categories};
$self->{categories} = { map {($_ => {a=>0, b=>0, c=>0, d=>0})}
UNIVERSAL::isa($c, 'HASH') ? keys(%$c) : @$c
};
return $self;
}
sub set_entries {
my $self = shift;
@{ $self }{'a', 'b', 'c', 'd'} = @_;
}
sub add_result {
my ($self, $assigned, $correct, $name) = @_;
my $cats_table = $self->{categories};
# Hashify
foreach ($assigned, $correct) {
$_ = {$_ => 1}, next unless ref $_;
next if UNIVERSAL::isa($_, 'HASH'); # Leave alone
$_ = { map {($_ => 1)} @$_ }, next if UNIVERSAL::isa($_, 'ARRAY');
die "Unknown type '$_' for category list";
}
# Add to the micro/macro tables
foreach my $cat (keys %$cats_table) {
$cats_table->{$cat}{a}++, $self->{a}++ if $assigned->{$cat} and $correct->{$cat};
$cats_table->{$cat}{b}++, $self->{b}++ if $assigned->{$cat} and !$correct->{$cat};
$cats_table->{$cat}{c}++, $self->{c}++ if !$assigned->{$cat} and $correct->{$cat};
$cats_table->{$cat}{d}++, $self->{d}++ if !$assigned->{$cat} and !$correct->{$cat};
}
if ($self->{verbose}) {
print "$name: assigned=(@{[ keys %$assigned ]}) correct=(@{[ keys %$correct ]})\n";
}
# Clear any cached results
delete $self->{macro};
$self->{hypotheses}++;
}
sub _invert {
my ($self, $x, $y) = @_;
return 1 unless $y;
return 0 unless $x;
return 1 / (1 + $y/$x);
}
sub _accuracy {
my $h = $_[1];
return 1 unless grep $h->{$_}, qw(a b c d);
return +($h->{a} + $h->{d}) / ($h->{a} + $h->{b} + $h->{c} + $h->{d});
}
sub _error {
my $h = $_[1];
return 0 unless grep $h->{$_}, qw(a b c d);
return +($h->{b} + $h->{c}) / ($h->{a} + $h->{b} + $h->{c} + $h->{d});
}
sub _precision {
my ($self, $h) = @_;
return 0 if $h->{c} and !$h->{a} and !$h->{b};
return $self->_invert($h->{a}, $h->{b});
}
sub _recall {
my ($self, $h) = @_;
return $self->_invert($h->{a}, $h->{c});
}
sub _F1 {
my ($self, $h) = @_;
return $self->_invert(2 * $h->{a}, $h->{b} + $h->{c});
}
# Fills in precision, recall, etc. for each category, and computes their averages
sub _macro_stats {
my $self = shift;
return $self->{macro} if $self->{macro};
my @metrics = qw(precision recall F1 accuracy error);
my $cats = $self->{categories};
die "No category information has been recorded"
unless keys %$cats;
my %results;
while (my ($cat, $scores) = each %$cats) {
foreach my $metric (@metrics) {
my $method = "_$metric";
$results{$metric} += ($scores->{$metric} = $self->$method($scores));
}
}
foreach (@metrics) {
$results{$_} /= keys %$cats;
}
$self->{macro} = \%results;
}
sub micro_accuracy { $_[0]->_accuracy( $_[0]) }
sub micro_error { $_[0]->_error( $_[0]) }
sub micro_precision { $_[0]->_precision($_[0]) }
sub micro_recall { $_[0]->_recall( $_[0]) }
sub micro_F1 { $_[0]->_F1( $_[0]) }
sub macro_accuracy { shift()->_macro_stats->{accuracy} }
sub macro_error { shift()->_macro_stats->{error} }
sub macro_precision { shift()->_macro_stats->{precision} }
sub macro_recall { shift()->_macro_stats->{recall} }
sub macro_F1 { shift()->_macro_stats->{F1} }
sub category_stats {
my $self = shift;
$self->_macro_stats;
return $self->{categories};
}
sub stats_table {
my $self = shift;
my $figs = shift || 3;
my @data = map $self->_sig_figs($_, $figs),
(
$self->macro_recall,
$self->macro_precision,
$self->macro_F1,
$self->micro_recall,
$self->micro_precision,
$self->micro_F1,
$self->micro_error,
);
my $m = 0; # Max length of @data items
for (@data) {
$m = length() if length() > $m;
}
my $s = ' ' x ($m - 4);
my $out = "+" . ("-" x (10 + 7*$m)) . "+\n";
$out .= "| $s maR $s maP$s maF1 $s miR $s miP$s miF1 $s Err |\n";
$out .= "| %${m}s %${m}s %${m}s %${m}s %${m}s %${m}s %${m}s |\n";
$out .= "+" . ("-" x (10 + 7*$m)) . "+\n";
return sprintf($out, @data);
}
sub _sig_figs {
my ($self, $number, $figs) = @_;
my $after_point = $figs - int ($number != 0 ? log($number)/log(10) : 0);
return sprintf "%.${after_point}f", $number;
}
1;
__END__
=head1 NAME
Statistics::Contingency - Calculate precision, recall, F1, accuracy, etc.
=head1 VERSION
version 0.09
=head1 SYNOPSIS
use Statistics::Contingency;
my $s = new Statistics::Contingency(categories => \@all_categories);
while (...something...) {
...
$s->add_result($assigned_categories, $correct_categories);
}
print "Micro F1: ", $s->micro_F1, "\n"; # Access a single statistic
print $s->stats_table; # Show several stats in table form
=head1 DESCRIPTION
The C<Statistics::Contingency> class helps you calculate several
useful statistical measures based on 2x2 "contingency tables". I use
these measures to help judge the results of automatic text
categorization experiments, but they are useful in other situations as
well.
The general usage flow is to tally a whole bunch of results in the
C<Statistics::Contingency> object, then query that object to obtain
the measures you are interested in. When all results have been
collected, you can get a report on accuracy, precision, recall, F1,
and so on, with both macro-averaging and micro-averaging over
categories.
=head2 Macro vs. Micro Statistics
All of the statistics offered by this module can be calculated for
each category and then averaged, or can be calculated over all
decisions and then averaged. The former is called macro-averaging
(specifically, macro-averaging with respect to category), and the
latter is called micro-averaging. The two procedures bias the results
differently - micro-averaging tends to over-emphasize the performance
on the largest categories, while macro-averaging over-emphasizes the
performance on the smallest. It's often best to look at both of them
to get a good idea of how your data distributes across categories.
=head2 Statistics available
All of the statistics are calculated based on a so-called "contingency
table", which looks like this:
Correct=Y Correct=N
+-----------+-----------+
Assigned=Y | a | b |
+-----------+-----------+
Assigned=N | c | d |
+-----------+-----------+
a, b, c, and d are counts that reflect how the assigned categories
matched the correct categories. Depending on whether a
macro-statistic or a micro-statistic is being calculated, these
numbers will be tallied per-category or for the entire result set.
The following statistics are available:
=over 4
=item * accuracy
This measures the portion of all decisions that were correct
decisions. It is defined as C<(a+d)/(a+b+c+d)>. It falls in the
range from 0 to 1, with 1 being the best score.
Note that macro-accuracy and micro-accuracy will always give the same
number.
=item * error
This measures the portion of all decisions that were incorrect
decisions. It is defined as C<(b+c)/(a+b+c+d)>. It falls in the
range from 0 to 1, with 0 being the best score.
Note that macro-error and micro-error will always give the same
number.
=item * precision
This measures the portion of the assigned categories that were
correct. It is defined as C<a/(a+b)>. It falls in the range from 0
to 1, with 1 being the best score.
=item * recall
This measures the portion of the correct categories that were
assigned. It is defined as C<a/(a+c)>. It falls in the range from 0
to 1, with 1 being the best score.
=item * F1
This measures an even combination of precision and recall. It is
defined as C<2*p*r/(p+r)>. In terms of a, b, and c, it may be
expressed as C<2a/(2a+b+c)>. It falls in the range from 0 to 1, with
1 being the best score.
=back
The F1 measure is often the only simple measure that is worth trying
to maximize on its own - consider the fact that you can get a perfect
precision score by always assigning zero categories, or a perfect
recall score by always assigning every category. A truly smart system
will assign the correct categories and only the correct categories,
maximizing precision and recall at the same time, and therefore
maximizing the F1 score.
Sometimes it's worth trying to maximize the accuracy score, but
accuracy (and its counterpart error) are considered fairly crude
scores that don't give much information about the performance of a
categorizer.
=head1 METHODS
The general execution flow when using this class is to create a
C<Statistics::Contingency> object, add a bunch of results to it, and
then report on the results.
=over 4
=item * $e = Statistics::Contingency->new()
Returns a new C<Statistics::Contingency> object. Expects a
C<categories> parameter specifying the entire set of categories that
may be assigned during this experiment. Also accepts a C<verbose>
parameter - if true, some diagnostic status information will be
displayed when certain actions are performed.
=item * $e->add_result($assigned_categories, $correct_categories, $name)
Adds a new result to the experiment. The lists of assigned and
correct categories can be given as an array of category names
(strings), as a hash whose keys are the category names and whose
values are anything logically true, or as a single string if there is
only one category.
If you've already got the lists in hash form, this will be the fastest
way to pass them. Otherwise, the current implementation will convert
them to hash form internally in order to make its calculations
efficient.
The C<$name> parameter is an optional name for this result. It will
only be used in error messages or debugging/progress output.
In the current implementation, we only store the contingency tables
per category, as well as a table for the entire result set. This
means that you can't recover information about any particular single
result from the C<Statistics::Contingency> object.
=item * $e->set_entries($a, $b, $c, $d)
If you don't wish to use the c<add_result()> interface, but still take
advantage of the calculation methods and the various edge cases they
handle, you can directly set the four elements of the contingency
table with this method.
=item * $e->micro_accuracy
Returns the micro-averaged accuracy for the data set.
=item * $e->micro_error
Returns the micro-averaged error for the data set.
=item * $e->micro_precision
Returns the micro-averaged precision for the data set.
=item * $e->micro_recall
Returns the micro-averaged recall for the data set.
=item * $e->micro_F1
Returns the micro-averaged F1 for the data set.
=item * $e->macro_accuracy
Returns the macro-averaged accuracy for the data set.
=item * $e->macro_error
Returns the macro-averaged error for the data set.
=item * $e->macro_precision
Returns the macro-averaged precision for the data set.
=item * $e->macro_recall
Returns the macro-averaged recall for the data set.
=item * $e->macro_F1
Returns the macro-averaged F1 for the data set.
=item * $e->stats_table
Returns a string combining several statistics in one graphic table.
Since accuracy is 1 minus error, we only report error since it takes
less space to print. An optional argument specifies the number of
significant digits to show in the data - the default is 3 significant
digits.
=item * $e->category_stats
Returns a hash reference whose keys are the names of each category,
and whose values contain the various statistical measures (accuracy,
error, precision, recall, or F1) about each category as a hash reference. For
example, to print a single statistic:
print $e->category_stats->{sports}{recall}, "\n";
Or to print certain statistics for all categtories:
my $stats = $e->category_stats;
while (my ($cat, $value) = each %$stats) {
print "Category '$cat': \n";
print " Accuracy: $value->{accuracy}\n";
print " Precision: $value->{precision}\n";
print " F1: $value->{F1}\n";
}
=back
=head1 AUTHOR
Ken Williams <kwilliams@cpan.org>
=head1 COPYRIGHT
Copyright 2002-2008 Ken Williams. All rights reserved.
This distribution is free software; you can redistribute it and/or
modify it under the same terms as Perl itself.
=cut
|