1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225
|
# -*-perl-*-
#+##############################################################################
#
# enable_encoding.init: handle --enable-encoding
#
# Copyright (C) 2008 Patrice Dumas <dumas@centre-cired.fr>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
# 02110-1301 USA
#
#-##############################################################################
# for debugging printouts
#use Encode;
use strict;
use vars '$ENABLE_ENCODING_USE_ENTITY';
if (!$ENABLE_ENCODING_USE_ENTITY)
{
$USE_ISO = 0;
}
my $enable_encoding_default_init_out = $init_out;
$init_out = \&enable_encoding_init_out;
sub enable_encoding_is_entity($)
{
my $text = shift;
return 0 if (!$ENABLE_ENCODING_USE_ENTITY);
return 1 if ($text =~ /^&/ and $text =~ /;$/);
}
push @command_handler_process, \&enable_encoding_initialize_variables;
my @enable_encoding_accents_stack;
sub enable_encoding_initialize_variables()
{
@enable_encoding_accents_stack = ();
}
my %enable_encoding_default_accent;
sub enable_encoding_init_out()
{
&$enable_encoding_default_init_out();
# like utf8.init
if ($Texi2HTML::THISDOC{'ENCODING_NAME'} eq 'utf-8')
{
$normal_text = \&t2h_utf8_normal_text unless ($ENABLE_ENCODING_USE_ENTITY);
foreach my $key (keys(%unicode_accents), 'dotless')
{
$style_map{$key}->{'function'} = \&t2h_utf8_accent;
$style_map_texi{$key}->{'function'} = \&t2h_utf8_accent;
$style_map_pre{$key}->{'function'} = \&t2h_utf8_accent;
}
foreach my $key (%things_map)
{
if (exists($unicode_map{$key}) and ($unicode_map{$key} ne ''))
{
$things_map{$key} = chr(hex($unicode_map{$key})) unless (enable_encoding_is_entity($things_map{$key}));
$texi_map{$key} = chr(hex($unicode_map{$key})) unless (enable_encoding_is_entity($texi_map{$key}));
$pre_map{$key} = chr(hex($unicode_map{$key})) unless (enable_encoding_is_entity($pre_map{$key}));
}
}
}
elsif (exists($makeinfo_encoding_to_map{$Texi2HTML::THISDOC{'ENCODING_NAME'}}))
{
my $enc_map = $makeinfo_encoding_to_map{$Texi2HTML::THISDOC{'ENCODING_NAME'}};
foreach my $key (keys(%unicode_accents), 'dotless')
{
$enable_encoding_default_accent{'normal'}->{$key} = $style_map{$key}->{'function'};
$enable_encoding_default_accent{'texi'}->{$key} = $style_map_texi{$key}->{'function'};
$enable_encoding_default_accent{'pre'}->{$key} = $style_map_pre{$key}->{'function'};
$style_map{$key}->{'function'} = \&enable_encoding_normal_accent;
$style_map_texi{$key}->{'function'} = \&enable_encoding_texi_accent;
$style_map_pre{$key}->{'function'} = \&enable_encoding_pre_accent;
}
foreach my $key (%things_map)
{
if (exists($unicode_map{$key}) and ($unicode_map{$key} ne '') and
exists($makeinfo_unicode_to_eight_bit{$enc_map}->{$unicode_map{$key}}))
{ # we let perl handle the conversion
$things_map{$key} = chr(hex($unicode_map{$key})) unless (enable_encoding_is_entity($things_map{$key}));
$texi_map{$key} = chr(hex($unicode_map{$key})) unless (enable_encoding_is_entity($texi_map{$key}));
$pre_map{$key} = chr(hex($unicode_map{$key})) unless (enable_encoding_is_entity($pre_map{$key}));
}
}
}
}
sub enable_encoding_normal_accent($$$)
{
return enable_encoding_accent ('normal', @_);
}
sub enable_encoding_texi_accent($$$)
{
return enable_encoding_accent ('texi', @_);
}
sub enable_encoding_pre_accent($$$)
{
return enable_encoding_accent ('pre', @_);
}
sub enable_encoding_accent($$$$)
{
my $in = shift;
my @other_args = @_;
my $accent = shift;
my $args = shift;
my $style_stack = shift;
my $text = $args->[0];
if (scalar(@enable_encoding_accents_stack))
{
# in that case, we already have a result ready that corresponds with the
# formatting of a part of the stack mapped to
# enable_encoding_accents_stack, so it is emptied and the innermost
# $text is returned as is such that the unmodified already formatted
# innermost formatted accented text is returned.
#print STDERR " doing nothing, still in stack (@enable_encoding_accents_stack), accent: $accent";
my $stack_accent = shift @enable_encoding_accents_stack;
#print STDERR " stack_accent $stack_accent\n";
return $text;
}
# in that case there is no enable_encoding_accents_stack, so we are
# at the closing of the innermost accented command. We will try to format
# all the stack in reverse(@$style_stack) that coresponds with
# accent commands
my @accents_stack = ();
my @styles = reverse(@$style_stack);
# accents are formatted and the intermediate results are kept, such
# that we can return the maximum of multiaccented letters that can be
# rendered with a given eight bit formatting.
# first put the letter in the stack
my @utf8_partial_results = { 'result' => $text,
'accents_stack' => [ @accents_stack ]};
# then the accent that is associated with the function call
my $current_accent = t2h_utf8_accent($accent,[$text],$style_stack);
@accents_stack = ($accent);
push @utf8_partial_results, { 'result' => $current_accent,
'accents_stack' => [ @accents_stack ]};
# and then all the other accents on the stack
while (scalar(@styles) and (defined($unicode_accents{$styles[0]}) or $styles[0] eq 'dotless'))
{
my $next_style = shift @styles;
my @new_stack = reverse(@styles);
$current_accent = t2h_utf8_accent($next_style,[$current_accent],\@new_stack);
push @accents_stack, $next_style;
push @utf8_partial_results, { 'result' => $current_accent,
'accents_stack' => [ @accents_stack ]}
;
}
my $enc_map = $makeinfo_encoding_to_map{$Texi2HTML::THISDOC{'ENCODING_NAME'}};
my $eight_bit;
my $result;
# At this point we have the utf8 encoded results for the accent
# commands stack, with all the intermediate results.
# For each one we'll check if it is possible to encode it in the
# current eight bit output encoding table
foreach my $partial_result (@utf8_partial_results)
{
my $char = $partial_result->{'result'};
my $new_eight_bit = '';
my $new_codepoint;
if (ord($char) <= 128)
{
$new_eight_bit = uc(sprintf("%02x",ord($char)));
$new_codepoint = uc(sprintf("%04x",ord($char)));
}
elsif (ord($char) <= hex(0xFFFF))
{
$new_codepoint = uc(sprintf("%04x",ord($char)));
if (exists($makeinfo_unicode_to_eight_bit{$enc_map}->{$new_codepoint}))
{
$new_eight_bit = $makeinfo_unicode_to_eight_bit{$enc_map}->{$new_codepoint};
}
}
#print STDERR "" . encode('utf8', "$char") . " (@{$partial_result->{'accents_stack'}}), 8bit: $new_eight_bit\n";
last if ($new_eight_bit eq '');
last if (defined($eight_bit) and (($new_eight_bit eq $eight_bit)
and !($partial_result->{'accents_stack'}[0] eq 'dotless' and $char eq 'i')));
$result = $partial_result;
$eight_bit = $new_eight_bit;
}
if (defined($result) and scalar(@{$result->{'accents_stack'}}))
{
# we got a result, return it and put in enable_encoding_accents_stack
# the stack of accent commands that were processed.
#print STDERR "Result: ".encode('utf8', $result->{'result'}) ." '$eight_bit' (@{$result->{'accents_stack'}})\n" if defined($result);
@enable_encoding_accents_stack = @{$result->{'accents_stack'}};
# remove the first, it is the accent being processed
shift @enable_encoding_accents_stack;
return $result->{'result'};
}
return &{$enable_encoding_default_accent{$in}->{$accent}}(@other_args);
}
1;
|