File: utf8.pl

package info (click to toggle)
latex2html 2019-debian1-3
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 8,896 kB
  • sloc: perl: 33,323; makefile: 447
file content (50 lines) | stat: -rw-r--r-- 1,589 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
### File: utf8.pl
### Version 0.1,  September 21, 1999
### Written by Ross Moore <ross@maths.mq.edu.au>
###
### UTF-8 encoding of character set code-points
###

## Copyright (C) 1999 by Ross R Moore
## This program is free software; you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation; either version 2 of the License, or
## (at your option) any later version.

## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
## GNU General Public License for more details.
## You should have received a copy of the GNU General Public License
## along with this program; if not, write to the Free Software
## Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.

package main;

$utf8_str = 'utf-8';
$charset = $utf8_str;
$USE_UTF = 1;
$NO_UTF = '';

sub convert_to_utf8 {
    $_[0] =~ s/([\200-\377])/print $1;&to_utf8(ord($1))/egs;
    $_[0] =~ s/\&#(\d{2,});/print $&;&to_utf8($1)/egs;
}

sub to_utf8 {
    local($code) = @_;
    return () unless ($code);
    if ($code < 128 ) {return chr($code) };
    my ($str,$top,$level) = ('',128,64);
    while (($code > 63)&&($level>4)) {
        $top += $level; $level /= 2;
	$str = chr(128+$code%64).$str;
        $code = int($code/64);
    }
    if ($top+$code > 255) {
        print STDERR  "\n*** character $_[0] out of range for UTF-8 ***"; 
	'';
    } else { chr($top+$code).$str }
}

1;