File: Encoding.pm

package info (click to toggle)
debconf 1.5.91
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 5,180 kB
  • sloc: perl: 8,500; sh: 262; python: 182; makefile: 144
file content (124 lines) | stat: -rw-r--r-- 3,463 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
#!/usr/bin/perl

=head1 NAME

Debconf::Encoding - Character encoding support for debconf

=head1 DESCRIPTION

This module provides facilities to convert between character encodings for
debconf, as well as other functions to operate on characters.

Debconf uses glibc's character encoding converter via Text::Iconv instead
of perl's internal Encoding conversion library because I'm not really sure
if perls encoding is 100% the same. There could be round-trip errors
between iconv's encodings and perl's, conceivably.

$Debconf::Encoding::charmap holds the user's charmap.

Debconf::Encoding::convert()  takes a charmap and a string encoded in that
charmap, and converts it to the user's charmap.

Debconf::Encoding::wrap is a word-wrapping function, with the same interface
as the one in Text::Wrap (except it doesn't gratuitously unexpand tabs).
If Text::WrapI18N is available, it will be used for proper wrapping of
multibyte encodings, combining and fullwidth characters, and languages that
do not use whitespace between words.

$Debconf::Encoding::columns is used to set the number of columns text is
wrapped to by Debconf::Encoding::wrap

Debconf::Encoding::width returns the number of columns required to display
the given string. If available, Text::CharWidth is used to determine the
width, to support combining and fullwidth characters.

Any of the above can be exported, this module uses the exporter.

=cut

package Debconf::Encoding;

use strict;
use warnings;

our $charmap;
BEGIN {
	no warnings;
	eval q{	use Text::Iconv };
	use warnings;
	if (! $@) {
		# I18N::Langinfo is not even in Debian as I write this, so
		# I will use something that is to get the charmap.
		$charmap = `locale charmap`;
		chomp $charmap;
	}

	no warnings;
	## no critic (BuiltinFunctions::ProhibitStringyEval)
	eval q{ use Text::WrapI18N; use Text::CharWidth };
	## use critic
	use warnings;
	# mblen has been known to get busted and return large numbers when
	# the wrong version of perl is installed. Avoid an infinite loop
	# in Text::WrapI18n in this case.
	if (! $@ && Text::CharWidth::mblen("a") == 1) {
		# Set up wrap and width functions to point to functions
		# from the modules.
		*wrap = *Text::WrapI18N::wrap;
		*columns = *Text::WrapI18N::columns;
		*width = *Text::CharWidth::mbswidth;
	}
	else {
		# Use Text::Wrap for wrapping, but unexpand tabs.
		require Text::Wrap;
		require Text::Tabs;
		sub _wrap { return Text::Tabs::expand(Text::Wrap::wrap(@_)) }
		*wrap = *_wrap;
		*columns = *Text::Wrap::columns;
		# Cannot just use *CORE::length; perl is too dumb.
		sub _dumbwidth { length shift }
		*width = *_dumbwidth;
	}
}

use base qw(Exporter);
our @EXPORT_OK=qw(wrap $columns width convert $charmap to_Unicode);

my $converter;
my $old_input_charmap;
sub convert {
	my $input_charmap = shift;
	my $string = shift;

	return unless defined $charmap;

	# The converter object is cached.
	if (! defined $old_input_charmap ||
	    $input_charmap ne $old_input_charmap) {
		$converter = Text::Iconv->new($input_charmap, $charmap);
		$old_input_charmap = $input_charmap;
	}
	return $converter->convert($string);
}

my $unicode_conv;
sub to_Unicode {
	my $string = shift;
	my $result;

	return $string if utf8::is_utf8($string);
	if (!defined $unicode_conv) {
		$unicode_conv = Text::Iconv->new($charmap, "UTF-8");
	}
	$result = $unicode_conv->convert($string);
	utf8::decode($result);
	return $result;
}

=head1 AUTHOR

Joey Hess <joeyh@debian.org>

=cut

1