File: Table.pm

package info (click to toggle)
libmarc-charset-perl 0.95-1etch1
  • links: PTS
  • area: main
  • in suites: etch
  • size: 2,444 kB
  • ctags: 75
  • sloc: xml: 98,939; perl: 612; makefile: 52
file content (195 lines) | stat: -rw-r--r-- 3,745 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
package MARC::Charset::Table;

=head1 NAME 

MARC::Charset::Table - character mapping db

=head1 SYNOPSIS

    use MARC::Charset::Table;
    use MARC::Charset::Constants qw(:all);

    # create the table object
    my $table = MARC::Charset::Table->new();
   
    # get a code using the marc8 character set code and the character
    my $code = $table->lookup_by_marc8(CYRILLIC_BASIC, 'K');

    # get a code using the utf8 value
    $code = $table->lookup_by_utf8(chr(0x043A));

=head1 DESCRIPTION

MARC::Charset::Table is a wrapper around the character mapping database, 
which is implemented as a tied hash on disk. This database gets generated 
by Makefile.PL on installation of MARC::Charset using 
MARC::Charset::Compiler.

The database is essentially a key/value mapping where a key is a 
MARC-8 character set code + a MARC-8 character, or an integer representing the
UCS code point. These keys map to a serialized MARC::Charset::Code object.

=cut

use strict;
use warnings;
use POSIX;
use SDBM_File;
use MARC::Charset::Code;
use MARC::Charset::Constants qw(:all);
use Storable qw(freeze thaw);

=head2 new()

The consturctor.

=cut

sub new
{
    my $class = shift;
    my $self = bless {}, ref($class) || $class;
    $self->_init(O_RDONLY);
    return $self;
}


=head2 add_code()

Add a MARC::Charset::Code to the table.

=cut


sub add_code
{
    my ($self, $code) = @_;

    # the Code object is serialized
    my $frozen = freeze($code);

    # to support lookup by marc8 and utf8 values we 
    # stash away the rule in the db using two keys
    my $marc8_key = $code->marc8_hash_code();
    my $utf8_key = $code->utf8_hash_code();

    # stash away the marc8 lookup key
    $self->{db}->{$marc8_key} = $frozen;

    # stash away the utf8 lookup key (only if it's not already there!)
    # this means that the sets that appear in the xml file will have
    # precedence ascii/ansel
    $self->{db}->{$utf8_key} = $frozen unless exists $self->{db}->{$utf8_key};
}


=head2 get_code()

Retrieve a code using a hash key.

=cut

sub get_code
{
    my ($self, $key) = @_;
    my $db = $self->db();
    my $frozen = $db->{$key};
    return thaw($frozen) if $frozen;
    return undef;
}


=head2 lookup_by_marc8()

Looks up MARC::Charset::Code entry using a character set code and a MARC-8 
value.

    use MARC::Charset::Constants qw(HEBREW);
    $code = $table->lookup_by_marc8(HEBREW, chr(0x60));

=cut

sub lookup_by_marc8
{
    my ($self, $charset, $marc8) = @_;
    $charset = BASIC_LATIN if $charset eq ASCII_DEFAULT;
    return $self->get_code(sprintf('%s:%s', $charset, $marc8));
}


=head2 lookup_by_utf8()

Looks up a MARC::Charset::Code object using a utf8 value.

=cut

sub lookup_by_utf8
{
    my ($self, $value) = @_;
    return $self->get_code(ord($value));
}




=head2 db()

Returns a reference to a tied character database. MARC::Charset::Table
wraps access to the db, but you can get at it if you want.

=cut

sub db 
{
    return shift->{db};
}


=head2 db_path()

Returns the path to the character encoding database. Can be called 
statically too: 

    print MARC::Charset::Table->db_path();

=cut

sub db_path
{
    my $path = $INC{'MARC/Charset/Table.pm'};
    $path =~ s/\.pm$//;
    return $path;
}


=head2 brand_new()

An alternate constructor which removes the existing database and starts
afresh. Be careful with this one, it's really only used on MARC::Charset
installation.

=cut

sub brand_new 
{
    my $class = shift;
    my $self = bless {}, ref($class) || $class;
    $self->_init(O_CREAT|O_RDWR);
    return $self;
}


# helper function for initializing table internals

sub _init 
{
    my ($self,$opts) = @_;
    tie my %db, 'SDBM_File', db_path(), $opts, 0644;
    $self->{db} = \%db;
}





1;