# $File: //member/autrijus/Locale-Maketext-Fuzzy/lib/Locale/Maketext/Fuzzy.pm $ $Author: autrijus $ # $Revision: #4 $ $Change: 1124 $ $DateTime: 2002/10/01 07:37:28 $ package Locale::Maketext::Fuzzy; $Locale::Maketext::Fuzzy::VERSION = '0.02'; use strict; use Locale::Maketext; use base 'Locale::Maketext'; =head1 NAME Locale::Maketext::Fuzzy - Maketext from already interpolated strings =head1 VERSION This document describes version 0.02 of Locale::Maketext::Fuzzy. =head1 SYNOPSIS package MyApp::L10N; use base 'Locale::Maketext::Fuzzy'; # instead of Locale::Maketext package MyApp::L10N::de; use base 'MyApp::L10N'; our %Lexicon = ( # Exact match should always be preferred if possible "0 camels were released." => "Exact match", # Fuzzy match candidate "[quant,_1,camel was,camels were] released." => "[quant,_1,Kamel wurde,Kamele wurden] freigegeben.", # This could also match fuzzily, but is less preferred "[_2] released[_1]" => "[_1][_2] ist frei[_1]", ); package main; my $lh = MyApp::L10N->get_handle('de'); # All ->maketext calls below will become ->maketext_fuzzy instead $lh->override_maketext(1); # This prints "Exact match" print $lh->maketext('0 camels were released.'); # "1 Kamel wurde freigegeben." -- quant() gets 1 print $lh->maketext('1 camel was released.'); # "2 Kamele wurden freigegeben." -- quant() gets 2 print $lh->maketext('2 camels were released.'); # "3 Kamele wurden freigegeben." -- parameters are ignored print $lh->maketext('3 released.'); # "4 Kamele wurden freigegeben." -- normal usage print $lh->maketext('[*,_1,camel was,camels were] released.', 4); # "!Perl ist frei!" -- matches the broader one # Note that the sequence ([_2] before [_1]) is preserved print $lh->maketext('Perl released!'); =head1 DESCRIPTION This module is a subclass of C, with additional support for localizing messages that already contains interpolated variables. This is most useful when the messages are returned by external modules -- for example, to match C against C<[_1]: command not found>. Of course, this module is also useful if you're simply too lazy to use the $lh->maketext("[quant,_1,file,files] deleted.", $count); syntax, but wish to write $lh->maketext_fuzzy("$count files deleted"); instead, and have the correct plural form figured out automatically. If C seems too long to type for you, this module also provides a C method to turn I C calls into C calls. =head1 METHODS =head2 $lh->maketext_fuzzy(I[, I]); That method takes exactly the same arguments as the C method of C. If I is found in lexicons, it is applied in the same way as C. Otherwise, it looks at all lexicon entries that could possibly yield I, by turning C<[...]> sequences into C<(.*?)> and match the resulting regular expression against I. Once it finds all candidate entries, the longest one replaces the I for the real C call. Variables matched by its bracket sequences (C<$1>, C<$2>...) are placed before I; the order of variables in the matched entry are correctly preserved. For example, if the matched entry in C<%Lexicon> is C, this call: $fh->maketext_fuzzy("Test string", "param"); is equivalent to this: $fh->maketext("Test [_1]", "string", "param"); However, most of the time you won't need to supply I to a C call, since all parameters are already interpolated into the string. =head2 $lh->override_maketext([I]); If I is true, this accessor method turns C<$lh-Emaketext> into an alias for C<$lh-Emaketext_fuzzy>, so all consecutive C calls in the C<$lh>'s packages are automatically fuzzy. A false I restores the original behaviour. If the flag is not specified, returns the current status of override; the default is 0 (no overriding). Note that this call only modifies the symbol table of the I that C<$lh> belongs to, so other languages are not affected. If you want to override all language handles in a certain application, try this: MyApp::L10N->override_maketext(1); =head1 CAVEATS =over 4 =item * The "longer is better" heuristic to determine the best match is reasonably good, but could certainly be improved. =item * Currently, C<"[quant,_1,file] deleted"> won't match C<"3 files deleted">; you'll have to write C<"[quant,_1,file,files] deleted"> instead, or simply use C<"[_1] file deleted"> as the lexicon key and put the correct plural form handling into the corresponding value. =item * When used in combination with C's C backend, all keys would be iterated over each time a fuzzy match is performed, and may cause serious speed penalty. Patches welcome. =back =cut sub override_maketext { my ($class, $flag) = @_; $class = ref($class) if ref($class); no strict 'refs'; if ($flag) { *{"$class\::maketext"} = \&maketext_fuzzy; } elsif (@_ >= 2) { delete ${"$class\::"}{maketext}; } return (defined &{"$class\::maketext"} ? 1 : 0); } # Global cache of entries and their regexified forms my %regex_cache; sub maketext_fuzzy { my ($handle, $phrase) = splice(@_, 0, 2); # An array of all lexicon hashrefs my @lexicons = @{$handle->_lex_refs}; # Try exact match if possible at all. foreach my $lex (@lexicons) { return $handle->SUPER::maketext($phrase, @_) if exists $lex->{$phrase}; } # Keys are matched entries; values are arrayrefs of extracted params my %candidate; # Fuzzy match phase 1 -- extract all candidates foreach my $lex (@lexicons) { # We're not interested in non-bracketed entries, so ignore them foreach my $entry (grep /(?:(?[0]) or next; $candidate{$entry} ||= ( @{$re->[1]} ? [ @vars[@{$re->[1]}] ] : \@vars ); } } # Fail early if we cannot find anything that matches return $phrase unless %candidate; # Fuzzy match phase 2 -- select the best candidate $phrase = (sort { # For now, we just use a very crude heuristic: "Longer is better" length($b) <=> length($a) or $b cmp $a } keys %candidate)[0]; return $handle->SUPER::maketext( $phrase, @{$candidate{$phrase}}, @_ ); } sub _regexify { my $text = quotemeta(shift); my @ords; $text =~ s{ ( # capture into $1... (? length($a) # longest first } map { /^_(?:(\d+)|\\\*)$/ ? do { push @{$ordref}, ($1 - 1) if defined $1; ''; } : $_ # turn _1, _2, _*... into '' } @choices ) . ')'; $out =~ s/\Q(?:)\E$//; } return $out; } 1; =head1 SEE ALSO L, L =head1 BACKGROUND This particular module was written to facilitate an I layer for Slashcode's I