1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340
|
package Lingua::Translit::Tables;
#
# Copyright (C) 2007-2008 ...
# Alex Linke <alinke@lingua-systems.com>
# Rona Linke <rlinke@lingua-systems.com>
# Copyright (C) 2009-2016 Lingua-Systems Software GmbH
# Copyright (C) 2016-2017 Netzum Sorglos, Lingua-Systems Software GmbH
# Copyright (C) 2017-2022 Netzum Sorglos Software GmbH
#
use strict;
use warnings;
use utf8;
require 5.008;
our $VERSION = '0.28';
use Carp;
=pod
=encoding utf8
=head1 NAME
Lingua::Translit::Tables - provides transliteration tables
=head1 SYNOPSIS
use Lingua::Translit::Tables qw/:checks/;
my $truth;
$truth = translit_supported("ISO 9");
$truth = translit_reverse_supported("ISO 9");
use Lingua::Translit::Tables qw/:list/;
translit_list_supported();
=head1 DESCRIPTION
This module is primary used to provide transliteration tables for
L<Lingua::Translit> and therefore allows one to separate data and algorithm.
Beyond that, it provides routines to check if a given transliteration is
supported and allows one to print a simple list of supported transliterations
along with some meta information.
=head1 EXPORTS
No symbols are exported by default.
Use either the routine's name or one of the following I<tags> to import
symbols to your namespace.
=over 4
=item B<all>
Import all routines.
=item B<checks>
Import all routines that allow one to check if a given transliteration is
supported: translit_supported() and translit_reverse_supported().
=item B<list>
Import translit_list_supported(). (Convenience tag)
=back
=cut
require Exporter;
our @ISA = qw/Exporter/;
our @EXPORT = qw//; # Export nothing by default
our @EXPORT_OK = qw/translit_supported translit_reverse_supported
translit_list_supported/;
our %EXPORT_TAGS = (
checks => [qw/translit_supported translit_reverse_supported/],
list => [qw/translit_list_supported/],
all => [@EXPORT_OK]
);
# For convenience, the tables are initialized at the bottom of this file.
our %tables;
# Used internally to retrieve a reference to a single transliteration table.
sub _get_table_reference {
my $name = shift();
return unless $name;
$name = _get_table_id($name);
foreach my $table ( keys %tables ) {
return _handle_perl_unicode_bug( $tables{$table} )
if $table =~ /^$name$/i;
}
return;
}
# Handle the "Unicode Bug" affecting code points in the Latin-1 block.
#
# Have a look at perlunicode (section "The 'Unicode Bug'") for details.
sub _handle_perl_unicode_bug {
my $tbl = shift();
foreach my $rule ( @{ $tbl->{rules} } ) {
utf8::upgrade( $rule->{from} );
utf8::upgrade( $rule->{to} );
if ( defined( $rule->{context} ) ) {
utf8::upgrade( $rule->{context}->{before} )
if defined $rule->{context}->{before};
utf8::upgrade( $rule->{context}->{after} )
if defined $rule->{context}->{after};
}
}
return $tbl;
}
=head1 ROUTINES
=head2 translit_supported(I<translit_name>)
Returns true (1), iff I<translit_name> is supported. False (0) otherwise.
=cut
sub translit_supported {
return ( _get_table_reference( _get_table_id( $_[0] ) ) ? 1 : 0 );
}
=head2 translit_reverse_supported(I<translit_name>)
Returns true (1), iff I<translit_name> is supported and allows reverse
transliteration. False (0) otherwise.
=cut
sub translit_reverse_supported {
my $table = _get_table_reference( _get_table_id( $_[0] ) );
croak("Failed to retrieve table for $_[0].") unless ($table);
return ( ( $table->{reverse} =~ /^true$/ ) ? 1 : 0 );
}
=head2 B<translit_list_supported()>
Prints a list of all supported transliterations to STDOUT (UTF-8 encoded),
providing the following information:
* Name
* Reversibility
* Description
The same information is provided in this document as well:
=cut
sub translit_list_supported {
require Encode;
foreach my $table ( sort keys %tables ) {
printf(
"%s, %sreversible, %s\n",
Encode::encode( 'utf8', $tables{$table}->{name} ),
( $tables{$table}->{reverse} eq "false" ? 'not ' : '' ),
Encode::encode( 'utf8', $tables{$table}->{desc} )
);
}
}
=head1 SUPPORTED TRANSLITERATIONS
=over 4
=item Cyrillic
I<ALA-LC RUS>, not reversible, ALA-LC:1997, Cyrillic to Latin, Russian
I<ISO 9>, reversible, ISO 9:1995, Cyrillic to Latin
I<ISO/R 9>, reversible, ISO 9:1954, Cyrillic to Latin
I<DIN 1460 RUS>, reversible, DIN 1460:1982, Cyrillic to Latin, Russian
I<DIN 1460 UKR>, reversible, DIN 1460:1982, Cyrillic to Latin, Ukrainian
I<DIN 1460 BUL>, reversible, DIN 1460:1982, Cyrillic to Latin, Bulgarian
I<Streamlined System BUL>, not reversible, The Streamlined System: 2006,
Cyrillic to Latin, Bulgarian
I<GOST 7.79 RUS>, reversible, GOST 7.79:2000 (table B), Cyrillic to Latin,
Russian
I<GOST 7.79 RUS OLD>, not reversible, GOST 7.79:2000 (table B), Cyrillic to
Latin with support for Old Russian (pre 1918), Russian
I<GOST 7.79 UKR>, reversible, GOST 7.79:2000 (table B), Cyrillic to Latin,
Ukrainian
I<BGN/PCGN RUS Standard>, not reversible, BGN/PCGN:1947 (Standard Variant),
Cyrillic to Latin, Russian
I<BGN/PCGN RUS Strict>, not reversible, BGN/PCGN:1947 (Strict Variant),
Cyrillic to Latin, Russian
=item Greek
I<ISO 843>, not reversible, ISO 843:1997, Greek to Latin
I<DIN 31634>, not reversible, DIN 31634:1982, Greek to Latin
I<Greeklish>, not reversible, Greeklish (Phonetic), Greek to Latin
=item Latin
I<Common CES>, not reversible, Czech without diacritics
I<Common DEU>, not reversible, German without umlauts
I<Common POL>, not reversible, Unaccented Polish
I<Common RON>, not reversible, Romanian without diacritics as commonly used
I<Common SLK>, not reversible, Slovak without diacritics
I<Common SLV>, not reversible, Slovenian without diacritics
I<ISO 8859-16 RON>, reversible, Romanian with appropriate diacritics
=item Arabic
I<Common ARA>, not reversible, Common Romanization of Arabic
=item Sanskrit
I<IAST Devanagari>, not reversible, IAST Romanization to Devanāgarī
I<Devanagari IAST>, not reversible, Devanāgarī to IAST Romanization
=back
=head1 ADDING NEW TRANSLITERATIONS
In case you want to add your own transliteration tables to
L<Lingua::Translit>, have a look at the developer documentation at
L<https://www.netzum-sorglos.de/software/lingua-translit/developer-documentation.html>.
A template of a transliteration table is provided as well
(F<xml/template.xml>) so you can easily start developing.
=head1 BUGS
None known.
Please report bugs using CPAN's request tracker at
L<https://rt.cpan.org/Public/Dist/Display.html?Name=Lingua-Translit>.
=head1 SEE ALSO
L<Lingua::Translit>
L<http://www.netzum-sorglos.de/software/lingua-translit/>
=head1 CREDITS
Thanks to Dr. Daniel Eiwen, Romanisches Seminar, Universitaet Koeln for his
help on Romanian transliteration.
Thanks to Dmitry Smal and Rusar Publishing for contributing the "ALA-LC RUS"
transliteration table.
Thanks to Ahmed Elsheshtawy for his help implementing the "Common ARA" Arabic
transliteration.
Thanks to Dusan Vuckovic for contributing the "ISO/R 9" transliteration table.
Thanks to Ștefan Suciu for contributing the "ISO 8859-16 RON" transliteration
table.
Thanks to Philip Kime for contributing the "IAST Devanagari" and "Devanagari
IAST" transliteration tables.
Thanks to Nikola Lečić for contributing the "BGN/PCGN RUS Standard" and
"BGN/PCGN RUS Strict" transliteration tables.
=head1 AUTHORS
Alex Linke <alinke@netzum-sorglos.de>
Rona Linke <rlinke@netzum-sorglos.de>
=head1 LICENSE AND COPYRIGHT
Copyright (C) 2007-2008 Alex Linke and Rona Linke
Copyright (C) 2009-2016 Lingua-Systems Software GmbH
Copyright (C) 2016-2017 Netzum Sorglos, Lingua-Systems Software GmbH
Copyright (C) 2017-2022 Netzum Sorglos Software GmbH
This module is free software; you can redistribute it and/or modify it under
the same terms as Perl itself.
=cut
# Get a table's identifier (based on the table's name)
# i.e "Common DEU" -> "common_deu"
sub _get_table_id {
my $name = shift();
return "" unless $name;
$name =~ s/\s/_/g;
return lc($name);
}
# For convenience, the next line is automatically substituted with the set
# of transliteration tables at build time.
%tables; # PLACEHOLDER
1;
# vim: set ft=perl sts=4 sw=4 ts=4 ai et:
|