1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279
|
#!/usr/bin/env perl
# (c) Burak Gursoy. Distributed under the Perl License.
use strict;
use warnings;
use subs qw(_p);
use lib qw( .. );
use constant HUNDRED => 100;
use Carp qw( croak );
use Data::Dumper;
use Getopt::Long;
use HTTP::BrowserDetect;
use HTTP::DetectUserAgent;
use HTML::ParseBrowser;
use Parse::HTTP::UserAgent;
use Text::Table;
GetOptions(\my %opt, qw(
debug
));
our $SILENT = 1;
do 't/db.pl';
run();
sub run {
my @tests = database({ thaw => 1 });
welcome( scalar @tests );
my @fail_common = map { $_ => 0 } qw( lang os version );
my %fail = (
'Parse::HTTP::UserAgent' => { name => {}, @fail_common },
'HTML::ParseBrowser' => { name => {}, @fail_common },
'HTTP::DetectUserAgent' => { name => {}, @fail_common },
'HTTP::BrowserDetect' => { name => {}, @fail_common },
);
my %total;
foreach my $test ( @tests ) {
my %ok = parse_http_useragent( $test->{string} );
my %hdua = http_detectuseragent( $test->{string} );
my %hpb = html_parsebrowser( $test->{string} );
my %hbd = http_browserdetect( $test->{string} );
my %is = set_is( \%ok, \%hpb, \%hbd, \%hdua, $test->{string} );
foreach my $adjust ( qw( name lang version os ) ) {
++$total{ $adjust } if $is{ $adjust };
}
$hdua{name} = q{} if $hdua{name} && $hdua{name} eq 'Unknown';
failures( \%fail, \%is, \%ok, \%hdua, \%hbd, \%hpb );
my $phua_fail = ( $is{lang} && ! $ok{lang} ) ||
$is{v_nok} ||
( $is{os} && ! $ok{os} ) ||
( $is{name} && ! $ok{name} );
if ( $opt{debug} && $phua_fail ) {
debug_fail( \%is, \%ok, \%hdua, \%hpb, \%hbd, $test->{string} );
}
}
results( \%fail, \%total );
return;
}
sub welcome {
my $total = shift;
return _p <<"ATTENTION";
*** This is a test to compare the accuracy of the parsers.
*** The data set is from the test suite. There are $total UA strings
*** Parse::HTTP::UserAgent will detect all of them
*** A tiny fraction of the regressions can be related to wrong parsing.
*** Equation tests are not performed. Tests are boolean.
This may take a while. Please stand by ...
ATTENTION
}
sub set_is {
my($ok, $hpb, $hbd, $hdua, $string) = @_;
my $fetch = sub {
my($field, @slots) = @_;
my @rv = grep { $_ }
map { $_->{ $field } }
$ok, $hpb, $hbd, $hdua;
return $rv[0];
};
my %is = map { $_ => $fetch->( $_ ) } qw( name lang version os );
$is{v_nok} = $is{version}
&& ! $ok->{version}
&& _valid_v( $is{version}, $string );
return %is;
}
sub debug_fail {
my($is, $ok, $hdua, $hpb, $hbd, $string) = @_;
_p "$string\n",
_p "LANG : $is->{lang}\n" if $is->{lang} && ! $ok->{lang};
_p "VERSION: $is->{version}\n" if $is->{v_nok};
_p "OS : $is->{os}\n" if $is->{os} && ! $ok->{os};
_p "NAME : $is->{name}\n" if $is->{name} && ! $ok->{name};
_p Dumper({
parse_http_useragent => $ok,
http_detectuseragent => $hdua,
html_parsebrowser => $hpb,
http_browserdetect => $hbd,
});
_p q{-} x '80', "\n";
return;
}
sub results {
my($fail, $total) = @_;
my $tb = Text::Table->new(
q{|}, 'Parser',
q{|}, 'Name FAILS',
q{|}, 'Version FAILS',
q{|}, 'Language FAILS',
q{|}, 'OS FAILS',
q{|},
);
foreach my $parser ( keys %{$fail} ) {
my $all = $fail->{$parser}{name};
my $name = 0;
$name += $all->{$_} for keys %{ $all };
my $v = ratio( $fail->{$parser}{version}, $total->{version} );
my $l = ratio( $fail->{$parser}{lang} , $total->{lang} );
my $os = ratio( $fail->{$parser}{os} , $total->{os} );
$name = ratio( $name , $total->{name} );
$tb->load([
q{|}, $parser,
q{|}, $name,
q{|}, $v,
q{|}, $l,
q{|}, $os,
q{|},
]);
}
_p $tb->rule( qw( - + ) )
. $tb->title
. $tb->rule( qw( - + ) )
. $tb->body
. $tb->rule( qw( - + ) )
;
return;
}
sub ratio {
my $v = shift;
my $tot = shift;
my $r = $v ? sprintf('%.2f', ($v*HUNDRED)/$tot) : '0.00';
return sprintf '% 4d - % 6s%%', $v, $r;
}
sub parse_http_useragent {
my $ua = Parse::HTTP::UserAgent->new( shift );
my %rv = $ua->as_hash;
$rv{name} = 'Internet Explorer' if $rv{name} && $rv{name} eq 'MSIE';
return %rv;
}
sub html_parsebrowser {
my $ua = HTML::ParseBrowser->new( shift );
my %rv = map { $_ => $ua->$_() } qw(
user_agent
languages
language
langs
lang
detail
useragents
properties
name
version
v
major
minor
os
ostype
osvers
osarc
);
# version is a hash with major/minor crap
$rv{_version} = delete $rv{version};
$rv{version} = $rv{v};
return %rv;
}
sub http_browserdetect {
# can not detect lang
my $ua = HTTP::BrowserDetect->new( shift );
return version => $ua->version,
os => $ua->os_string,
name => $ua->browser_string,
;
}
sub http_detectuseragent {
my $ua = HTTP::DetectUserAgent->new( shift );
my %rv = map { $_ => $ua->$_() } qw (name version vendor type os);
return %rv;
}
sub failures {
my($fail, $is, $ok, $hdua, $hbd, $hpb) = @_;
no strict qw( refs );
foreach my $name ( qw( lang version os name ) ) {
&{ '_fail_' . $name }(
$fail, $is, $ok, $hdua, $hbd, $hpb
);
}
return;
}
sub _fail_lang {
my($fail, $is, $ok, $hdua, $hbd, $hpb) = @_;
my $L = $is->{lang};
$fail->{'Parse::HTTP::UserAgent'}->{lang}++ if $L && ! $ok->{lang};
$fail->{'HTTP::DetectUserAgent' }->{lang}++ if $L && ! $hdua->{lang};
$fail->{'HTML::ParseBrowser' }->{lang}++ if $L && ! $hpb->{lang};
$fail->{'HTTP::BrowserDetect' }->{lang}++ if $L && ! $hbd->{lang};
return;
}
sub _fail_version {
my($fail, $is, $ok, $hdua, $hbd, $hpb) = @_;
my $v = $is->{version};
$fail->{'Parse::HTTP::UserAgent'}->{version}++ if $is->{v_nok};
$fail->{'HTTP::DetectUserAgent' }->{version}++ if $v && ! $hdua->{version};
$fail->{'HTML::ParseBrowser' }->{version}++ if $v && ! $hpb->{v};
$fail->{'HTTP::BrowserDetect' }->{version}++ if $v && ! $hbd->{version};
return;
}
sub _fail_os {
my($fail, $is, $ok, $hdua, $hbd, $hpb) = @_;
my $os = $is->{os};
$fail->{'Parse::HTTP::UserAgent'}->{os}++ if $os && ! $ok->{os};
$fail->{'HTTP::DetectUserAgent' }->{os}++ if $os && ! $hdua->{os};
$fail->{'HTML::ParseBrowser' }->{os}++ if $os && ! $hpb->{os};
$fail->{'HTTP::BrowserDetect' }->{os}++ if $os && ! $hbd->{os};
return;
}
sub _fail_name {
my($fail, $is, $ok, $hdua, $hbd, $hpb) = @_;
my $n = $is->{name};
++$fail->{'Parse::HTTP::UserAgent'}->{name}{ $n } if $n && ! $ok->{name};
++$fail->{'HTTP::DetectUserAgent' }->{name}{ $n } if $n && ! $hdua->{name};
++$fail->{'HTML::ParseBrowser' }->{name}{ $n } if $n && ! $hpb->{name};
++$fail->{'HTTP::BrowserDetect' }->{name}{ $n } if $n && ! $hbd->{name};
return;
}
sub _valid_v { # prevent false-positives
my($v, $str)= @_;
return $str !~ m{ \A Mozilla [/] $v \s }xms;
}
sub _p {
print {*STDOUT} @_ or croak "Can't print: $!";
}
1;
__END__
|