File: utf8.pl

package info (click to toggle)
xterm 398-1
  • links: PTS, VCS
  • area: main
  • in suites: trixie
  • size: 8,280 kB
  • sloc: ansic: 86,716; perl: 9,623; sh: 5,488; makefile: 843; xml: 46; sed: 11
file content (124 lines) | stat: -rwxr-xr-x 3,693 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
#!/usr/bin/env perl
# $XTermId: utf8.pl,v 1.12 2022/07/08 18:32:43 tom Exp $
# -----------------------------------------------------------------------------
# this file is part of xterm
#
# Copyright 2012-2018,2022 by Thomas E. Dickey
#
#                         All Rights Reserved
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice shall be included
# in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
# IN NO EVENT SHALL THE ABOVE LISTED COPYRIGHT HOLDER(S) BE LIABLE FOR ANY
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#
# Except as contained in this notice, the name(s) of the above copyright
# holders shall not be used in advertising or otherwise to promote the
# sale, use or other dealings in this Software without prior written
# authorization.
# -----------------------------------------------------------------------------
# display the given Unicode characters, given their hex or decimal values.

use warnings FATAL => "overflow";
no warnings "portable";
use strict;
use Encode 'encode_utf8';
use Text::CharWidth qw(mbswidth);

$| = 1;

sub num_bytes($) {
    my $char  = shift;
    my $value = length( Encode::encode_utf8($char) );
    my $result =
      ( $value <= 0
        ? "no bytes"
        : ( $value > 1 ? sprintf( "%d bytes", $value ) : "1 bytes" ) );
    return $result;
}

sub num_cells($) {
    my $char  = shift;
    my $value = mbswidth($char);
    my $result =
      ( $value <= 0
        ? "no cells"
        : ( $value > 1 ? sprintf( "%d cells", $value ) : "1 cell" ) );
    return $result;
}

sub pad_column($) {
    my $char  = shift;
    my $value = mbswidth($char);
    $value = 0 if ( $value < 0);
    my $result = sprintf( "%.*s", 3 - $value, "    ");
    return $result;
}

sub vxt_utf8($) {
    my $arg = $_[0];
    my $hex = $arg;
    my $dec = $arg;
    if ( $arg =~ /^u\+[[:xdigit:]]+$/i ) {
        $hex =~ s/^../0x/;
        $dec = hex($hex);
    }
    elsif ( $arg =~ /^0x[[:xdigit:]]+$/i ) {
        $dec = hex($hex);
    }
    elsif ( $arg =~ /^[[:xdigit:]]+$/i ) {
        $dec = hex($hex);
    }
    else {
        printf STDERR "? not a codepoint: $dec\n";
        return;
    }
    my $chr  = chr($dec);
    my $type = (
        $chr =~ /\p{isPrint}/
        ? (
            $chr =~ /\p{isAlpha}/
            ? "alpha"
            : (
                $chr =~ /\p{isPunct}/
                ? "punct"
                : (
                    $chr =~ /\p{isDigit}/
                    ? "digit"
                    : "printing"
                )
            )
          )
        : (
            $chr =~ /\p{isCntrl}/
            ? "cntrl"
            : "nonprinting"
        )
    );
    printf "%d ->%#x ->{%s}%s(%s %s %s)\n", $dec, $dec, $chr,
    &pad_column($chr),
      &num_bytes($chr),
      &num_cells($chr),
      $type;
}

binmode( STDOUT, ":utf8" );
while ( $#ARGV >= 0 ) {
    vxt_utf8( shift @ARGV );
}

1;