1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44
|
use strict;
use Benchmark qw(cmpthese);
use blib;
use MeCab;
use Text::MeCab;
my @fields = qw(id surface feature length);
my $text = <<EOM;
ϤοͤȸƤǤ餳Ǥ⤿Ƚ̾Ǥʤ֤ߤθȤ⡢ˤȤäƼǤ롣ϤοͤεƤӵȤˡפȤʤ롣ɮäƤ⿴ƱǤ롣褽褽ƬʸʤɤϤȤƤȤˤʤʤ
䤬Τ礤ˤʤäΤϳҤǤ롣λϤޤ㡹ǤäٲˤѤƳ˹Ԥäͧã餼褤ȤüäΤǡ¿ζ̤ơгݤˤ϶ι̤䤷Ȥ䤬Ҥ夤ƻȷФʤˡƤӴͧãϡޤ˹鵢Ȥäˤ줬µǤäƤäɤͧãϤʤäͧãϤͤƤˤƤ˴ޤʤ뺧Ƥϸν餤ȷ뺧ˤϤޤǯ˴οͤʤäDzƵ٤ߤ٤Ȥ虜ζͷǤΤǤ롣˸Ƥɤ褦̤ˤϤɤƤʬʤäɤº줬µǤȤϸǤ굢٤ϤǤäϤȤȤˤʤää褿ϰͼĤ줿
عμȤϤޤˤϤޤʬΤdzҤˤäƤ褷äƤ褤ȤˤϡʬνɤαޤиͧãΤȤ©ҤǶԼͳΤʤˤǤäɤ⡢ععʤΤǯǯʤΤǡ٤ϻȤѤ⤷ʤääưͤܤäˤʤä̤˳湥ʽɤõݤ⤿ʤäΤǤ롣
ɤϳҤǤѤˤäͤΥΤȤϥʤΤˤĹıۤʤм꤬Ϥʤä֤ǹԤäƤϼ줿ɤĿͤϤˤĤǤƤƤ˳ؤϤᤤΤdzˤϻϰ̤Ƥ
ؤϤ˽гݤŤ֤֤äδ֤̤ȴưزȡդˤۤɤԲͼ郎Ǥ뤫Ȼפۤɡ褿ˤǺξ夬ưƤϳ椬Τ褦˹ƬǤ㤴㤷Ƥ⤢äΤäͤͤ⤿ʤ⡢䤫ʷʿޤơξ˿٤äƤߤꡢɨƬȤǤƤķͲΤǤä
ϼ¤λδ֤˸դФΤǤ롣λߤˤϳ㲰äϤդȤ餽ΰ츮˹ԤƤĹëդ礭ƤͤȰäơƼͭ崹ϤƤʤҤˤϡҤȤ⤳Ʊ崹ȤäʤΤɬפʤΤǤäϤߡǵ©볰ˡdzꡢĤϤ椤Τꡢ˹Ҥ仱¤ꤹΤǤ롣ʤˤʪޤ붲ϤäΤǡϳؤϤ뤿Ӥˤ㲰ذڤæƤˤƤ
EOM
sub mecab {
my $mecab = MeCab::Tagger->new();
for(my $node = $mecab->parseToNode($text);
$node;
$node = $node->{next}
) {
for my $field (@fields) {
$node->{$field};
}
}
}
sub text_mecab {
my $mecab = Text::MeCab->new();
for(my $node = $mecab->parse($text);
$node;
$node = $node->next
) {
for my $field (@fields) {
$node->$field();
}
}
}
cmpthese(100, {
mecab => \&mecab,
text_mecab => \&text_mecab,
});
|