1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48
|
package Algorithm::NaiveBayes::Util;
use strict;
use base qw(Exporter);
use vars qw(@EXPORT_OK);
@EXPORT_OK = qw(sum sum_hash max variance add_hash rescale);
use List::Util qw(max sum);
sub sum_hash {
my $href = shift;
return sum(values %$href);
}
sub variance {
my $array = shift;
return 0 unless @$array > 1;
my $mean = @_ ? shift : sum($array) / @$array;
my $var = 0;
$var += ($_ - $mean)**2 foreach @$array;
return $var / (@$array - 1);
}
sub add_hash {
my ($first, $second) = @_;
foreach my $k (keys %$second) {
$first->{$k} += $second->{$k};
}
}
sub rescale {
my ($scores) = @_;
# Scale everything back to a reasonable area in logspace (near zero), un-loggify, and normalize
my $total = 0;
my $max = max(values %$scores);
foreach (values %$scores) {
$_ = exp($_ - $max);
$total += $_**2;
}
$total = sqrt($total);
foreach (values %$scores) {
$_ /= $total;
}
}
1;
|